# coding=utf-8 import re from collections import defaultdict import pymysql def read_sql_inserts(file_path): """生成器函数,逐行读取INSERT语句""" with open(file_path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line.startswith('INSERT INTO'): yield line def process_large_sql_file(input_file, batch_size=10000): table_data = defaultdict(lambda: { 'columns': None, 'value_rows': [] }) insert_pattern = re.compile( r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);', re.IGNORECASE ) # 使用生成器处理 for insert_stmt in read_sql_inserts(input_file): match = insert_pattern.match(insert_stmt) if match: table_name = match.group(1) columns = match.group(2) values = match.group(3) if table_data[table_name]['columns'] is None: table_data[table_name]['columns'] = columns table_data[table_name]['value_rows'].append(values) # 生成批量INSERT语句 batch_inserts = {} for table_name, data in table_data.items(): columns = data['columns'] value_rows = data['value_rows'] for i in range(0, len(value_rows), batch_size): batch_values = value_rows[i:i + batch_size] batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n" batch_insert += ",\n".join([f"({values})" for values in batch_values]) batch_insert += ";" if table_name not in batch_inserts: batch_inserts[table_name] = [] batch_inserts[table_name].append(batch_insert) return batch_inserts def execute_batch_inserts(db_config, batch_inserts): """直接执行批量INSERT到数据库""" connection = pymysql.connect(**db_config) try: with connection.cursor() as cursor: for table_name, inserts in batch_inserts.items(): for index, insert_sql in enumerate(inserts): cursor.execute(insert_sql) print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功") connection.commit() finally: connection.close() # 数据库配置 db_config = { 'host': '192.168.50.235', 'user': 'root', 'password': 'admin123456', 'db': 'wtlivedb_1', 'charset': 'utf8mb4' } """ 移除INSERT 语句 其他的就是建表语句了 cat file |grep -v 'INSERT ' > create_talbe.sql 下面是 INSERT 转化为 BATCH INSERT 的脚本 """ if __name__ == "__main__": input_file = "wtlivedb.sql" # 使用 batch_inserts = process_large_sql_file("input.sql") execute_batch_inserts(db_config, batch_inserts) # 打印统计信息 for table_name, inserts in batch_inserts.items(): print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")