整理INSERT到批量INSERT.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # coding=utf-8
  2. import re
  3. from collections import defaultdict
  4. import pymysql
  5. def read_sql_inserts(file_path):
  6. """生成器函数,逐行读取INSERT语句"""
  7. with open(file_path, 'r', encoding='utf-8') as f:
  8. for line in f:
  9. line = line.strip()
  10. if line.startswith('INSERT INTO'):
  11. yield line
  12. def process_large_sql_file(input_file, batch_size=10000):
  13. table_data = defaultdict(lambda: {
  14. 'columns': None,
  15. 'value_rows': []
  16. })
  17. insert_pattern = re.compile(
  18. r'INSERT\s+INTO\s+`?([a-zA-Z_][a-zA-Z0-9_]*)`?\s*\((.*?)\)\s*VALUES\s*\((.*?)\);',
  19. re.IGNORECASE
  20. )
  21. # 使用生成器处理
  22. for insert_stmt in read_sql_inserts(input_file):
  23. match = insert_pattern.match(insert_stmt)
  24. if match:
  25. table_name = match.group(1)
  26. columns = match.group(2)
  27. values = match.group(3)
  28. if table_data[table_name]['columns'] is None:
  29. table_data[table_name]['columns'] = columns
  30. table_data[table_name]['value_rows'].append(values)
  31. # 生成批量INSERT语句
  32. batch_inserts = {}
  33. for table_name, data in table_data.items():
  34. columns = data['columns']
  35. value_rows = data['value_rows']
  36. for i in range(0, len(value_rows), batch_size):
  37. batch_values = value_rows[i:i + batch_size]
  38. batch_insert = f"INSERT INTO `{table_name}` ({columns}) VALUES\n"
  39. batch_insert += ",\n".join([f"({values})" for values in batch_values])
  40. batch_insert += ";"
  41. if table_name not in batch_inserts:
  42. batch_inserts[table_name] = []
  43. batch_inserts[table_name].append(batch_insert)
  44. return batch_inserts
  45. def execute_batch_inserts(db_config, batch_inserts):
  46. """直接执行批量INSERT到数据库"""
  47. connection = pymysql.connect(**db_config)
  48. try:
  49. with connection.cursor() as cursor:
  50. for table_name, inserts in batch_inserts.items():
  51. for index, insert_sql in enumerate(inserts):
  52. cursor.execute(insert_sql)
  53. print(f"表 {table_name},共 {len(inserts)} 个, 第 {index + 1} 个批量INSERT语句执行成功")
  54. connection.commit()
  55. finally:
  56. connection.close()
  57. # 数据库配置
  58. db_config = {
  59. 'host': '192.168.50.235',
  60. 'user': 'root',
  61. 'password': 'admin123456',
  62. 'db': 'wtlivedb_1',
  63. 'charset': 'utf8mb4'
  64. }
  65. """
  66. 移除INSERT 语句 其他的就是建表语句了
  67. cat file |grep -v 'INSERT ' > create_talbe.sql
  68. 下面是 INSERT 转化为 BATCH INSERT 的脚本
  69. """
  70. if __name__ == "__main__":
  71. input_file = "wtlivedb.sql"
  72. # 使用
  73. batch_inserts = process_large_sql_file("input.sql")
  74. execute_batch_inserts(db_config, batch_inserts)
  75. # 打印统计信息
  76. for table_name, inserts in batch_inserts.items():
  77. print(f"表 '{table_name}': {len(inserts)} 个批量INSERT语句")