123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- # coding=utf-8
- import re
- from collections import defaultdict
- import pymysql
- def read_sql_inserts(file_path):
- """生成器函数,逐行读取INSERT语句"""
- with open(file_path, 'r', encoding='utf-8') as f:
- for line in f:
- line = line.strip()
- if line.startswith('INSERT INTO'):
- yield line
- def process_large_sql_file(input_file, batch_size=10000):
- table_data = defaultdict(lambda: {
- 'columns': None,
- 'value_rows': []
- })
- insert_pattern = re.compile(
- r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);',
- re.IGNORECASE
- )
- # 使用生成器处理
- for insert_stmt in read_sql_inserts(input_file):
- match = insert_pattern.match(insert_stmt)
- if match:
- table_name = match.group(1)
- columns = match.group(2)
- values = match.group(3)
- if table_data[table_name]['columns'] is None:
- table_data[table_name]['columns'] = columns
- table_data[table_name]['value_rows'].append(values)
- # 生成批量INSERT语句
- batch_inserts = {}
- for table_name, data in table_data.items():
- columns = data['columns']
- value_rows = data['value_rows']
- for i in range(0, len(value_rows), batch_size):
- batch_values = value_rows[i:i + batch_size]
- batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n"
- batch_insert += ",\n".join([f"({values})" for values in batch_values])
- batch_insert += ";"
- if table_name not in batch_inserts:
- batch_inserts[table_name] = []
- batch_inserts[table_name].append(batch_insert)
- return batch_inserts
- def execute_batch_inserts(db_config, batch_inserts):
- """直接执行批量INSERT到数据库"""
- connection = pymysql.connect(**db_config)
- try:
- with connection.cursor() as cursor:
- for table_name, inserts in batch_inserts.items():
- for index, insert_sql in enumerate(inserts):
- cursor.execute(insert_sql)
- print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功")
- connection.commit()
- finally:
- connection.close()
- # 数据库配置
- db_config = {
- 'host': '192.168.50.235',
- 'user': 'root',
- 'password': 'admin123456',
- 'db': 'wtlivedb_1',
- 'charset': 'utf8mb4'
- }
- """
- 移除INSERT 语句 其他的就是建表语句了
- cat file |grep -v 'INSERT ' > create_talbe.sql
- 下面是 INSERT 转化为 BATCH INSERT 的脚本
- """
- if __name__ == "__main__":
- input_file = "wtlivedb.sql"
- # 使用
- batch_inserts = process_large_sql_file("input.sql")
- execute_batch_inserts(db_config, batch_inserts)
- # 打印统计信息
- for table_name, inserts in batch_inserts.items():
- print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")
|