|
@@ -0,0 +1,100 @@
|
|
|
+# coding=utf-8
|
|
|
+
|
|
|
+
|
|
|
+import re
|
|
|
+from collections import defaultdict
|
|
|
+
|
|
|
+import pymysql
|
|
|
+
|
|
|
+
|
|
|
+def read_sql_inserts(file_path):
|
|
|
+ """生成器函数,逐行读取INSERT语句"""
|
|
|
+ with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
+ for line in f:
|
|
|
+ line = line.strip()
|
|
|
+ if line.startswith('INSERT INTO'):
|
|
|
+ yield line
|
|
|
+
|
|
|
+
|
|
|
+def process_large_sql_file(input_file, batch_size=10000):
|
|
|
+ table_data = defaultdict(lambda: {
|
|
|
+ 'columns': None,
|
|
|
+ 'value_rows': []
|
|
|
+ })
|
|
|
+
|
|
|
+ insert_pattern = re.compile(
|
|
|
+ r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);',
|
|
|
+ re.IGNORECASE
|
|
|
+ )
|
|
|
+
|
|
|
+ # 使用生成器处理
|
|
|
+ for insert_stmt in read_sql_inserts(input_file):
|
|
|
+ match = insert_pattern.match(insert_stmt)
|
|
|
+ if match:
|
|
|
+ table_name = match.group(1)
|
|
|
+ columns = match.group(2)
|
|
|
+ values = match.group(3)
|
|
|
+
|
|
|
+ if table_data[table_name]['columns'] is None:
|
|
|
+ table_data[table_name]['columns'] = columns
|
|
|
+
|
|
|
+ table_data[table_name]['value_rows'].append(values)
|
|
|
+
|
|
|
+ # 生成批量INSERT语句
|
|
|
+ batch_inserts = {}
|
|
|
+ for table_name, data in table_data.items():
|
|
|
+ columns = data['columns']
|
|
|
+ value_rows = data['value_rows']
|
|
|
+
|
|
|
+ for i in range(0, len(value_rows), batch_size):
|
|
|
+ batch_values = value_rows[i:i + batch_size]
|
|
|
+ batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n"
|
|
|
+ batch_insert += ",\n".join([f"({values})" for values in batch_values])
|
|
|
+ batch_insert += ";"
|
|
|
+
|
|
|
+ if table_name not in batch_inserts:
|
|
|
+ batch_inserts[table_name] = []
|
|
|
+ batch_inserts[table_name].append(batch_insert)
|
|
|
+
|
|
|
+ return batch_inserts
|
|
|
+
|
|
|
+
|
|
|
+def execute_batch_inserts(db_config, batch_inserts):
|
|
|
+ """直接执行批量INSERT到数据库"""
|
|
|
+ connection = pymysql.connect(**db_config)
|
|
|
+ try:
|
|
|
+ with connection.cursor() as cursor:
|
|
|
+ for table_name, inserts in batch_inserts.items():
|
|
|
+ for index, insert_sql in enumerate(inserts):
|
|
|
+ cursor.execute(insert_sql)
|
|
|
+ print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功")
|
|
|
+ connection.commit()
|
|
|
+ finally:
|
|
|
+ connection.close()
|
|
|
+
|
|
|
+
|
|
|
+# 数据库配置
|
|
|
+db_config = {
|
|
|
+ 'host': '192.168.50.235',
|
|
|
+ 'user': 'root',
|
|
|
+ 'password': 'admin123456',
|
|
|
+ 'db': 'wtlivedb_1',
|
|
|
+ 'charset': 'utf8mb4'
|
|
|
+}
|
|
|
+
|
|
|
+"""
|
|
|
+移除INSERT 语句 其他的就是建表语句了
|
|
|
+cat file |grep -v 'INSERT ' > create_talbe.sql
|
|
|
+下面是 INSERT 转化为 BATCH INSERT 的脚本
|
|
|
+"""
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ input_file = "wtlivedb.sql"
|
|
|
+
|
|
|
+ # 使用
|
|
|
+ batch_inserts = process_large_sql_file("input.sql")
|
|
|
+ execute_batch_inserts(db_config, batch_inserts)
|
|
|
+
|
|
|
+ # 打印统计信息
|
|
|
+ for table_name, inserts in batch_inserts.items():
|
|
|
+ print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")
|