|
@@ -15,6 +15,7 @@ import threading
|
|
|
from threading import Thread, Event
|
|
from threading import Thread, Event
|
|
|
import signal
|
|
import signal
|
|
|
import sys
|
|
import sys
|
|
|
|
|
+import os
|
|
|
|
|
|
|
|
from config import DatabaseConfig, TableConfig
|
|
from config import DatabaseConfig, TableConfig
|
|
|
from file_scanner import ParquetFileInfo
|
|
from file_scanner import ParquetFileInfo
|
|
@@ -67,12 +68,12 @@ class DatabaseManager:
|
|
|
|
|
|
|
|
# 注册信号处理,优雅关闭
|
|
# 注册信号处理,优雅关闭
|
|
|
self._setup_signal_handlers()
|
|
self._setup_signal_handlers()
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
def _setup_signal_handlers(self):
|
|
def _setup_signal_handlers(self):
|
|
|
"""设置信号处理,确保程序退出时能正确关闭资源"""
|
|
"""设置信号处理,确保程序退出时能正确关闭资源"""
|
|
|
def signal_handler(signum, frame):
|
|
def signal_handler(signum, frame):
|
|
|
logger.info(f"接收到信号 {signum},正在关闭数据库连接池...")
|
|
logger.info(f"接收到信号 {signum},正在关闭数据库连接池...")
|
|
|
- self.close_all()
|
|
|
|
|
|
|
+ self.close()
|
|
|
sys.exit(0)
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
|
|
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
|
|
@@ -84,15 +85,15 @@ class DatabaseManager:
|
|
|
logger.info(f"正在初始化数据库连接池到 {self.config.host}:{self.config.port}/{self.config.database}")
|
|
logger.info(f"正在初始化数据库连接池到 {self.config.host}:{self.config.port}/{self.config.database}")
|
|
|
|
|
|
|
|
self.pool = PooledDB(
|
|
self.pool = PooledDB(
|
|
|
- creator=pymysql, # 使用的数据库驱动
|
|
|
|
|
- maxconnections=self.pool_size, # 连接池允许的最大连接数
|
|
|
|
|
- mincached=2, # 初始化时创建的连接数
|
|
|
|
|
- maxcached=5, # 连接池中空闲连接的最大数量
|
|
|
|
|
- maxshared=3, # 最大共享连接数
|
|
|
|
|
- blocking=True, # 连接数达到最大时是否阻塞等待
|
|
|
|
|
- maxusage=None, # 连接的最大使用次数,None表示不限制
|
|
|
|
|
- setsession=[], # 连接时执行的SQL语句列表
|
|
|
|
|
- ping=1, # 使用连接前是否ping检查连接可用性 (0=从不, 1=每次, 2=每2次请求, 4=每4次请求, 7=每次请求)
|
|
|
|
|
|
|
+ creator=pymysql,
|
|
|
|
|
+ maxconnections=self.pool_size,
|
|
|
|
|
+ mincached=2,
|
|
|
|
|
+ maxcached=5,
|
|
|
|
|
+ maxshared=3,
|
|
|
|
|
+ blocking=True,
|
|
|
|
|
+ maxusage=None,
|
|
|
|
|
+ setsession=[],
|
|
|
|
|
+ ping=1,
|
|
|
host=self.config.host,
|
|
host=self.config.host,
|
|
|
port=self.config.port,
|
|
port=self.config.port,
|
|
|
user=self.config.user,
|
|
user=self.config.user,
|
|
@@ -100,15 +101,14 @@ class DatabaseManager:
|
|
|
database=self.config.database,
|
|
database=self.config.database,
|
|
|
charset=self.config.charset,
|
|
charset=self.config.charset,
|
|
|
cursorclass=cursors.DictCursor,
|
|
cursorclass=cursors.DictCursor,
|
|
|
- autocommit=False, # 手动控制事务
|
|
|
|
|
- connect_timeout=30, # 连接超时时间
|
|
|
|
|
- read_timeout=600, # 读取超时时间
|
|
|
|
|
- write_timeout=600, # 写入超时时间
|
|
|
|
|
- client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS # 支持多语句
|
|
|
|
|
|
|
+ autocommit=False,
|
|
|
|
|
+ connect_timeout=30,
|
|
|
|
|
+ read_timeout=600,
|
|
|
|
|
+ write_timeout=600,
|
|
|
|
|
+ client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
logger.info(f"数据库连接池初始化成功")
|
|
logger.info(f"数据库连接池初始化成功")
|
|
|
- logger.info(f"连接池配置: maxconnections={self.pool_size}, mincached=2, maxcached=5")
|
|
|
|
|
|
|
|
|
|
# 测试连接池
|
|
# 测试连接池
|
|
|
test_result = self._test_pool_connection()
|
|
test_result = self._test_pool_connection()
|
|
@@ -133,7 +133,7 @@ class DatabaseManager:
|
|
|
elapsed = time.time() - start_time
|
|
elapsed = time.time() - start_time
|
|
|
|
|
|
|
|
cursor.close()
|
|
cursor.close()
|
|
|
- conn.close() # 归还连接
|
|
|
|
|
|
|
+ conn.close()
|
|
|
|
|
|
|
|
logger.info(f"连接池测试成功: 响应时间={elapsed:.3f}s, 服务器时间={result['server_time']}")
|
|
logger.info(f"连接池测试成功: 响应时间={elapsed:.3f}s, 服务器时间={result['server_time']}")
|
|
|
return True
|
|
return True
|
|
@@ -149,7 +149,7 @@ class DatabaseManager:
|
|
|
self._connection_monitor_thread = Thread(
|
|
self._connection_monitor_thread = Thread(
|
|
|
target=self._connection_monitor_loop,
|
|
target=self._connection_monitor_loop,
|
|
|
name="DBConnectionMonitor",
|
|
name="DBConnectionMonitor",
|
|
|
- daemon=True # 设置为守护线程,主程序退出时自动结束
|
|
|
|
|
|
|
+ daemon=True
|
|
|
)
|
|
)
|
|
|
self._connection_monitor_thread.start()
|
|
self._connection_monitor_thread.start()
|
|
|
logger.info("数据库连接监控线程已启动")
|
|
logger.info("数据库连接监控线程已启动")
|
|
@@ -157,7 +157,7 @@ class DatabaseManager:
|
|
|
def _stop_connection_monitor(self):
|
|
def _stop_connection_monitor(self):
|
|
|
"""停止连接监控线程"""
|
|
"""停止连接监控线程"""
|
|
|
if self._connection_monitor_thread and self._connection_monitor_thread.is_alive():
|
|
if self._connection_monitor_thread and self._connection_monitor_thread.is_alive():
|
|
|
- self._monitor_running.set() # 设置事件,通知线程退出
|
|
|
|
|
|
|
+ self._monitor_running.set()
|
|
|
self._connection_monitor_thread.join(timeout=5)
|
|
self._connection_monitor_thread.join(timeout=5)
|
|
|
logger.info("数据库连接监控线程已停止")
|
|
logger.info("数据库连接监控线程已停止")
|
|
|
|
|
|
|
@@ -166,25 +166,22 @@ class DatabaseManager:
|
|
|
logger.info(f"连接监控线程开始运行,检查间隔: {self._monitor_interval}秒")
|
|
logger.info(f"连接监控线程开始运行,检查间隔: {self._monitor_interval}秒")
|
|
|
|
|
|
|
|
last_log_time = time.time()
|
|
last_log_time = time.time()
|
|
|
- log_interval = 60 # 每分钟记录一次状态
|
|
|
|
|
|
|
+ log_interval = 60
|
|
|
|
|
|
|
|
while not self._monitor_running.is_set():
|
|
while not self._monitor_running.is_set():
|
|
|
try:
|
|
try:
|
|
|
- # 执行连接检查
|
|
|
|
|
self._perform_connection_check()
|
|
self._perform_connection_check()
|
|
|
|
|
|
|
|
- # 定期记录状态
|
|
|
|
|
current_time = time.time()
|
|
current_time = time.time()
|
|
|
if current_time - last_log_time >= log_interval:
|
|
if current_time - last_log_time >= log_interval:
|
|
|
self._log_connection_status()
|
|
self._log_connection_status()
|
|
|
last_log_time = current_time
|
|
last_log_time = current_time
|
|
|
|
|
|
|
|
- # 等待下一次检查
|
|
|
|
|
time.sleep(self._monitor_interval)
|
|
time.sleep(self._monitor_interval)
|
|
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"连接监控循环异常: {e}")
|
|
logger.error(f"连接监控循环异常: {e}")
|
|
|
- time.sleep(self._monitor_interval) # 异常后继续尝试
|
|
|
|
|
|
|
+ time.sleep(self._monitor_interval)
|
|
|
|
|
|
|
|
logger.info("连接监控循环结束")
|
|
logger.info("连接监控循环结束")
|
|
|
|
|
|
|
@@ -194,7 +191,6 @@ class DatabaseManager:
|
|
|
self._monitor_stats['total_checks'] += 1
|
|
self._monitor_stats['total_checks'] += 1
|
|
|
self._last_connection_check = dt.now()
|
|
self._last_connection_check = dt.now()
|
|
|
|
|
|
|
|
- # 检查连接池是否初始化
|
|
|
|
|
if self.pool is None:
|
|
if self.pool is None:
|
|
|
self._connection_status = "ERROR"
|
|
self._connection_status = "ERROR"
|
|
|
self._connection_error_count += 1
|
|
self._connection_error_count += 1
|
|
@@ -203,27 +199,23 @@ class DatabaseManager:
|
|
|
logger.warning("连接池未初始化")
|
|
logger.warning("连接池未初始化")
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
- # 尝试获取连接并执行简单查询
|
|
|
|
|
conn = None
|
|
conn = None
|
|
|
cursor = None
|
|
cursor = None
|
|
|
try:
|
|
try:
|
|
|
conn = self.pool.connection()
|
|
conn = self.pool.connection()
|
|
|
cursor = conn.cursor()
|
|
cursor = conn.cursor()
|
|
|
|
|
|
|
|
- # 执行一个简单的查询测试连接
|
|
|
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
|
cursor.execute("SELECT 1 as test, NOW() as server_time, "
|
|
cursor.execute("SELECT 1 as test, NOW() as server_time, "
|
|
|
"VERSION() as version, CONNECTION_ID() as connection_id")
|
|
"VERSION() as version, CONNECTION_ID() as connection_id")
|
|
|
result = cursor.fetchone()
|
|
result = cursor.fetchone()
|
|
|
elapsed = time.time() - start_time
|
|
elapsed = time.time() - start_time
|
|
|
|
|
|
|
|
- # 更新状态
|
|
|
|
|
self._connection_status = "HEALTHY"
|
|
self._connection_status = "HEALTHY"
|
|
|
self._connection_error_count = 0
|
|
self._connection_error_count = 0
|
|
|
self._monitor_stats['successful_checks'] += 1
|
|
self._monitor_stats['successful_checks'] += 1
|
|
|
self._monitor_stats['last_success'] = dt.now()
|
|
self._monitor_stats['last_success'] = dt.now()
|
|
|
|
|
|
|
|
- # 记录详细连接信息(调试级别)
|
|
|
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
if logger.isEnabledFor(logging.DEBUG):
|
|
|
logger.debug(f"连接检查成功: "
|
|
logger.debug(f"连接检查成功: "
|
|
|
f"响应时间={elapsed:.3f}s, "
|
|
f"响应时间={elapsed:.3f}s, "
|
|
@@ -237,14 +229,12 @@ class DatabaseManager:
|
|
|
self._monitor_stats['failed_checks'] += 1
|
|
self._monitor_stats['failed_checks'] += 1
|
|
|
self._monitor_stats['last_error'] = str(e)
|
|
self._monitor_stats['last_error'] = str(e)
|
|
|
|
|
|
|
|
- # 根据错误计数判断连接状态
|
|
|
|
|
if self._connection_error_count >= self._max_error_count:
|
|
if self._connection_error_count >= self._max_error_count:
|
|
|
self._connection_status = "DISCONNECTED"
|
|
self._connection_status = "DISCONNECTED"
|
|
|
logger.error(f"数据库连接失败,已连续失败 {self._connection_error_count} 次: {e}")
|
|
logger.error(f"数据库连接失败,已连续失败 {self._connection_error_count} 次: {e}")
|
|
|
else:
|
|
else:
|
|
|
logger.warning(f"数据库连接检查失败 (第{self._connection_error_count}次): {e}")
|
|
logger.warning(f"数据库连接检查失败 (第{self._connection_error_count}次): {e}")
|
|
|
|
|
|
|
|
- # 尝试自动重连
|
|
|
|
|
if self._connection_error_count >= 3:
|
|
if self._connection_error_count >= 3:
|
|
|
self._auto_reconnect()
|
|
self._auto_reconnect()
|
|
|
|
|
|
|
@@ -252,7 +242,7 @@ class DatabaseManager:
|
|
|
if cursor:
|
|
if cursor:
|
|
|
cursor.close()
|
|
cursor.close()
|
|
|
if conn:
|
|
if conn:
|
|
|
- conn.close() # 归还连接到连接池
|
|
|
|
|
|
|
+ conn.close()
|
|
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"执行连接检查时发生异常: {e}")
|
|
logger.error(f"执行连接检查时发生异常: {e}")
|
|
@@ -264,13 +254,9 @@ class DatabaseManager:
|
|
|
try:
|
|
try:
|
|
|
logger.warning(f"检测到连接问题,正在尝试自动重连 (错误计数: {self._connection_error_count})")
|
|
logger.warning(f"检测到连接问题,正在尝试自动重连 (错误计数: {self._connection_error_count})")
|
|
|
|
|
|
|
|
- # 1. 先释放当前线程的连接
|
|
|
|
|
self.release_connection()
|
|
self.release_connection()
|
|
|
-
|
|
|
|
|
- # 2. 等待一小段时间
|
|
|
|
|
time.sleep(2)
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
- # 3. 测试当前连接池
|
|
|
|
|
if self.pool:
|
|
if self.pool:
|
|
|
test_result = self._test_pool_connection()
|
|
test_result = self._test_pool_connection()
|
|
|
if test_result:
|
|
if test_result:
|
|
@@ -282,7 +268,6 @@ class DatabaseManager:
|
|
|
else:
|
|
else:
|
|
|
logger.warning("连接池测试失败,尝试重新初始化")
|
|
logger.warning("连接池测试失败,尝试重新初始化")
|
|
|
|
|
|
|
|
- # 4. 重新初始化连接池
|
|
|
|
|
old_pool = self.pool
|
|
old_pool = self.pool
|
|
|
try:
|
|
try:
|
|
|
self.pool = None
|
|
self.pool = None
|
|
@@ -368,18 +353,15 @@ class DatabaseManager:
|
|
|
while time.time() - start_time < timeout:
|
|
while time.time() - start_time < timeout:
|
|
|
attempts += 1
|
|
attempts += 1
|
|
|
|
|
|
|
|
- # 检查连接状态
|
|
|
|
|
if self._connection_status == "HEALTHY":
|
|
if self._connection_status == "HEALTHY":
|
|
|
logger.info(f"数据库连接已恢复,等待时间: {time.time() - start_time:.1f}秒")
|
|
logger.info(f"数据库连接已恢复,等待时间: {time.time() - start_time:.1f}秒")
|
|
|
return True
|
|
return True
|
|
|
|
|
|
|
|
- # 如果连接断开,尝试立即重连
|
|
|
|
|
if self._connection_status == "DISCONNECTED":
|
|
if self._connection_status == "DISCONNECTED":
|
|
|
logger.info(f"尝试重连 (第{attempts}次)")
|
|
logger.info(f"尝试重连 (第{attempts}次)")
|
|
|
if self._auto_reconnect():
|
|
if self._auto_reconnect():
|
|
|
return True
|
|
return True
|
|
|
|
|
|
|
|
- # 等待下一次检查
|
|
|
|
|
logger.info(f"等待连接恢复... ({attempts}/{int(timeout/check_interval)})")
|
|
logger.info(f"等待连接恢复... ({attempts}/{int(timeout/check_interval)})")
|
|
|
time.sleep(check_interval)
|
|
time.sleep(check_interval)
|
|
|
|
|
|
|
@@ -389,15 +371,12 @@ class DatabaseManager:
|
|
|
def get_connection(self) -> Connection:
|
|
def get_connection(self) -> Connection:
|
|
|
"""从连接池获取数据库连接"""
|
|
"""从连接池获取数据库连接"""
|
|
|
try:
|
|
try:
|
|
|
- # 检查当前连接状态
|
|
|
|
|
if self._connection_status in ["ERROR", "DISCONNECTED"]:
|
|
if self._connection_status in ["ERROR", "DISCONNECTED"]:
|
|
|
logger.warning(f"获取连接时检测到连接状态为 {self._connection_status},尝试自动重连")
|
|
logger.warning(f"获取连接时检测到连接状态为 {self._connection_status},尝试自动重连")
|
|
|
if not self.wait_for_connection(timeout=10):
|
|
if not self.wait_for_connection(timeout=10):
|
|
|
raise ConnectionError(f"数据库连接不可用,当前状态: {self._connection_status}")
|
|
raise ConnectionError(f"数据库连接不可用,当前状态: {self._connection_status}")
|
|
|
|
|
|
|
|
- # 检查线程局部存储中是否有连接
|
|
|
|
|
if not hasattr(self._thread_local, 'connection') or self._thread_local.connection is None:
|
|
if not hasattr(self._thread_local, 'connection') or self._thread_local.connection is None:
|
|
|
- # 从连接池获取新连接
|
|
|
|
|
conn = self.pool.connection()
|
|
conn = self.pool.connection()
|
|
|
self._thread_local.connection = conn
|
|
self._thread_local.connection = conn
|
|
|
logger.debug(f"从连接池获取新连接,当前线程: {threading.current_thread().name}")
|
|
logger.debug(f"从连接池获取新连接,当前线程: {threading.current_thread().name}")
|
|
@@ -407,12 +386,10 @@ class DatabaseManager:
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"从连接池获取连接失败: {e}")
|
|
logger.error(f"从连接池获取连接失败: {e}")
|
|
|
|
|
|
|
|
- # 更新连接状态
|
|
|
|
|
self._connection_status = "ERROR"
|
|
self._connection_status = "ERROR"
|
|
|
self._connection_error_count += 1
|
|
self._connection_error_count += 1
|
|
|
self._monitor_stats['last_error'] = str(e)
|
|
self._monitor_stats['last_error'] = str(e)
|
|
|
|
|
|
|
|
- # 尝试自动重连
|
|
|
|
|
self._auto_reconnect()
|
|
self._auto_reconnect()
|
|
|
raise
|
|
raise
|
|
|
|
|
|
|
@@ -422,17 +399,13 @@ class DatabaseManager:
|
|
|
if hasattr(self._thread_local, 'connection') and self._thread_local.connection is not None:
|
|
if hasattr(self._thread_local, 'connection') and self._thread_local.connection is not None:
|
|
|
conn = self._thread_local.connection
|
|
conn = self._thread_local.connection
|
|
|
try:
|
|
try:
|
|
|
- # 修复:SteadyDBConnection 需要访问 _con 属性获取原始连接
|
|
|
|
|
- # 检查原始连接的 autocommit 状态
|
|
|
|
|
if hasattr(conn, '_con') and isinstance(conn._con, Connection):
|
|
if hasattr(conn, '_con') and isinstance(conn._con, Connection):
|
|
|
original_conn = conn._con
|
|
original_conn = conn._con
|
|
|
if not original_conn.get_autocommit():
|
|
if not original_conn.get_autocommit():
|
|
|
original_conn.rollback()
|
|
original_conn.rollback()
|
|
|
else:
|
|
else:
|
|
|
- # 备选方案:直接尝试回滚,不检查 autocommit
|
|
|
|
|
conn.rollback()
|
|
conn.rollback()
|
|
|
|
|
|
|
|
- # 关闭连接(实际上是归还到连接池)
|
|
|
|
|
conn.close()
|
|
conn.close()
|
|
|
logger.debug(f"连接已归还到连接池,当前线程: {threading.current_thread().name}")
|
|
logger.debug(f"连接已归还到连接池,当前线程: {threading.current_thread().name}")
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -447,7 +420,6 @@ class DatabaseManager:
|
|
|
try:
|
|
try:
|
|
|
conn = self.get_connection()
|
|
conn = self.get_connection()
|
|
|
|
|
|
|
|
- # 执行一个简单的查询来检查连接
|
|
|
|
|
cursor = None
|
|
cursor = None
|
|
|
try:
|
|
try:
|
|
|
cursor = conn.cursor()
|
|
cursor = conn.cursor()
|
|
@@ -459,7 +431,6 @@ class DatabaseManager:
|
|
|
cursor.close()
|
|
cursor.close()
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.warning(f"连接检查失败: {e}")
|
|
logger.warning(f"连接检查失败: {e}")
|
|
|
- # 释放无效连接
|
|
|
|
|
self.release_connection()
|
|
self.release_connection()
|
|
|
return False
|
|
return False
|
|
|
|
|
|
|
@@ -467,27 +438,23 @@ class DatabaseManager:
|
|
|
"""重新连接数据库(连接池会自动处理)"""
|
|
"""重新连接数据库(连接池会自动处理)"""
|
|
|
try:
|
|
try:
|
|
|
logger.info("正在重新连接数据库...")
|
|
logger.info("正在重新连接数据库...")
|
|
|
- # 释放当前连接,下次获取时会自动创建新连接
|
|
|
|
|
self.release_connection()
|
|
self.release_connection()
|
|
|
- time.sleep(1) # 等待1秒
|
|
|
|
|
- # 获取新连接
|
|
|
|
|
|
|
+ time.sleep(1)
|
|
|
self.get_connection()
|
|
self.get_connection()
|
|
|
logger.info("数据库重新连接成功")
|
|
logger.info("数据库重新连接成功")
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"数据库重新连接失败: {e}")
|
|
logger.error(f"数据库重新连接失败: {e}")
|
|
|
raise
|
|
raise
|
|
|
|
|
|
|
|
|
|
+ def close(self):
|
|
|
|
|
+ """关闭数据库连接池的简便方法"""
|
|
|
|
|
+ self.close_pool()
|
|
|
|
|
+
|
|
|
def close_all(self):
|
|
def close_all(self):
|
|
|
"""关闭所有资源,包括连接池和监控线程"""
|
|
"""关闭所有资源,包括连接池和监控线程"""
|
|
|
logger.info("正在关闭所有数据库资源...")
|
|
logger.info("正在关闭所有数据库资源...")
|
|
|
-
|
|
|
|
|
- # 停止监控线程
|
|
|
|
|
self._stop_connection_monitor()
|
|
self._stop_connection_monitor()
|
|
|
-
|
|
|
|
|
- # 关闭连接池
|
|
|
|
|
self.close_pool()
|
|
self.close_pool()
|
|
|
-
|
|
|
|
|
- # 记录最终状态
|
|
|
|
|
self._log_connection_status()
|
|
self._log_connection_status()
|
|
|
logger.info("所有数据库资源已关闭")
|
|
logger.info("所有数据库资源已关闭")
|
|
|
|
|
|
|
@@ -495,10 +462,7 @@ class DatabaseManager:
|
|
|
"""关闭整个连接池"""
|
|
"""关闭整个连接池"""
|
|
|
try:
|
|
try:
|
|
|
if self.pool:
|
|
if self.pool:
|
|
|
- # 释放当前线程的连接
|
|
|
|
|
self.release_connection()
|
|
self.release_connection()
|
|
|
-
|
|
|
|
|
- # 关闭连接池
|
|
|
|
|
self.pool.close()
|
|
self.pool.close()
|
|
|
self.pool = None
|
|
self.pool = None
|
|
|
self._connection_status = "DISCONNECTED"
|
|
self._connection_status = "DISCONNECTED"
|
|
@@ -506,11 +470,36 @@ class DatabaseManager:
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"关闭连接池时出错: {e}")
|
|
logger.error(f"关闭连接池时出错: {e}")
|
|
|
|
|
|
|
|
- # 以下为原有的业务方法,保持不变
|
|
|
|
|
|
|
+ # ============ 数据库操作相关方法 ============
|
|
|
|
|
+
|
|
|
|
|
+ def check_table_exists(self, table_name: str) -> bool:
|
|
|
|
|
+ """检查表是否存在"""
|
|
|
|
|
+ conn = None
|
|
|
|
|
+ cursor = None
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ conn = self.get_connection()
|
|
|
|
|
+ cursor = conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ cursor.execute("SHOW TABLES LIKE %s", (table_name,))
|
|
|
|
|
+ result = cursor.fetchone()
|
|
|
|
|
+ exists = result is not None
|
|
|
|
|
+ logger.info(f"🔍 检查表 '{table_name}' 存在: {exists}")
|
|
|
|
|
+ return exists
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if cursor:
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"❌ 检查表存在失败: {e}")
|
|
|
|
|
+ return False
|
|
|
|
|
+ finally:
|
|
|
|
|
+ self.release_connection()
|
|
|
|
|
+
|
|
|
def create_table_with_unique_key(self, table_name: str, columns: List[str],
|
|
def create_table_with_unique_key(self, table_name: str, columns: List[str],
|
|
|
unique_keys: List[str]) -> bool:
|
|
unique_keys: List[str]) -> bool:
|
|
|
"""根据列定义创建表,包含三字段唯一键,数据字段使用DOUBLE类型"""
|
|
"""根据列定义创建表,包含三字段唯一键,数据字段使用DOUBLE类型"""
|
|
|
- # 检查连接状态
|
|
|
|
|
if not self.wait_for_connection():
|
|
if not self.wait_for_connection():
|
|
|
logger.error("创建表失败:数据库连接不可用")
|
|
logger.error("创建表失败:数据库连接不可用")
|
|
|
return False
|
|
return False
|
|
@@ -523,15 +512,11 @@ class DatabaseManager:
|
|
|
cursor = conn.cursor()
|
|
cursor = conn.cursor()
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
- # 删除已存在的表
|
|
|
|
|
drop_sql = f"DROP TABLE IF EXISTS `{table_name}`"
|
|
drop_sql = f"DROP TABLE IF EXISTS `{table_name}`"
|
|
|
cursor.execute(drop_sql)
|
|
cursor.execute(drop_sql)
|
|
|
logger.info(f"已删除旧表: {table_name}")
|
|
logger.info(f"已删除旧表: {table_name}")
|
|
|
|
|
|
|
|
- # 构建创建表的SQL
|
|
|
|
|
columns_sql = ",\n ".join(columns)
|
|
columns_sql = ",\n ".join(columns)
|
|
|
-
|
|
|
|
|
- # 添加三字段唯一键约束
|
|
|
|
|
unique_keys_str = ', '.join([f'`{key}`' for key in unique_keys])
|
|
unique_keys_str = ', '.join([f'`{key}`' for key in unique_keys])
|
|
|
|
|
|
|
|
create_sql = f"""
|
|
create_sql = f"""
|
|
@@ -551,22 +536,16 @@ class DatabaseManager:
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
logger.debug(f"创建表的SQL语句:\n{create_sql}")
|
|
logger.debug(f"创建表的SQL语句:\n{create_sql}")
|
|
|
-
|
|
|
|
|
- # 执行创建表
|
|
|
|
|
cursor.execute(create_sql)
|
|
cursor.execute(create_sql)
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
|
|
|
|
|
logger.info(f"表 '{table_name}' 创建成功!")
|
|
logger.info(f"表 '{table_name}' 创建成功!")
|
|
|
logger.info(f"三字段唯一键: {unique_keys}")
|
|
logger.info(f"三字段唯一键: {unique_keys}")
|
|
|
-
|
|
|
|
|
return True
|
|
return True
|
|
|
|
|
|
|
|
finally:
|
|
finally:
|
|
|
if cursor:
|
|
if cursor:
|
|
|
- try:
|
|
|
|
|
- cursor.close()
|
|
|
|
|
- except:
|
|
|
|
|
- pass
|
|
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"创建表失败: {e}")
|
|
logger.error(f"创建表失败: {e}")
|
|
@@ -575,148 +554,478 @@ class DatabaseManager:
|
|
|
conn.rollback()
|
|
conn.rollback()
|
|
|
return False
|
|
return False
|
|
|
finally:
|
|
finally:
|
|
|
- # 不释放连接,让调用者控制
|
|
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
- # ... 其余的业务方法保持不变,包括:
|
|
|
|
|
- # create_data_scada_turbine_table
|
|
|
|
|
- # _clean_and_convert_simple
|
|
|
|
|
- # _prepare_upsert_sql
|
|
|
|
|
- # _convert_to_numeric
|
|
|
|
|
- # _convert_row_to_tuple
|
|
|
|
|
- # _escape_sql_value
|
|
|
|
|
- # _get_sql_with_values
|
|
|
|
|
- # _log_failed_row_details
|
|
|
|
|
- # batch_upsert_data_direct
|
|
|
|
|
- # upsert_parquet_data
|
|
|
|
|
- # check_table_exists
|
|
|
|
|
- # get_table_row_count
|
|
|
|
|
- # get_table_stats
|
|
|
|
|
- # check_duplicate_keys
|
|
|
|
|
- # with_connection 装饰器
|
|
|
|
|
-
|
|
|
|
|
-# 添加一个独立的连接测试函数,方便外部调用
|
|
|
|
|
-def test_database_connection(config: DatabaseConfig, test_query: str = "SELECT 1") -> Dict[str, Any]:
|
|
|
|
|
- """
|
|
|
|
|
- 测试数据库连接
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- config: 数据库配置
|
|
|
|
|
- test_query: 测试查询语句
|
|
|
|
|
|
|
+ def get_table_row_count(self, table_name: str) -> int:
|
|
|
|
|
+ """获取表的行数"""
|
|
|
|
|
+ conn = None
|
|
|
|
|
+ cursor = None
|
|
|
|
|
|
|
|
- Returns:
|
|
|
|
|
- Dict[str, Any]: 测试结果
|
|
|
|
|
- """
|
|
|
|
|
- result = {
|
|
|
|
|
- 'success': False,
|
|
|
|
|
- 'error': None,
|
|
|
|
|
- 'response_time': None,
|
|
|
|
|
- 'server_info': None,
|
|
|
|
|
- 'timestamp': dt.now().isoformat()
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- conn = None
|
|
|
|
|
- cursor = None
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ conn = self.get_connection()
|
|
|
|
|
+ cursor = conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ cursor.execute(f"SELECT COUNT(*) as count FROM `{table_name}`")
|
|
|
|
|
+ result = cursor.fetchone()
|
|
|
|
|
+ count = result['count'] if result else 0
|
|
|
|
|
+ logger.info(f"📊 表 '{table_name}' 行数: {count:,}")
|
|
|
|
|
+ return count
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if cursor:
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"❌ 获取表行数失败: {e}")
|
|
|
|
|
+ return 0
|
|
|
|
|
+ finally:
|
|
|
|
|
+ self.release_connection()
|
|
|
|
|
|
|
|
- try:
|
|
|
|
|
- # 记录开始时间
|
|
|
|
|
- start_time = time.time()
|
|
|
|
|
|
|
+ def get_table_stats(self, table_name: str) -> Dict[str, Any]:
|
|
|
|
|
+ """获取表统计信息"""
|
|
|
|
|
+ conn = None
|
|
|
|
|
+ cursor = None
|
|
|
|
|
|
|
|
- # 建立连接
|
|
|
|
|
- conn = pymysql.connect(
|
|
|
|
|
- host=config.host,
|
|
|
|
|
- port=config.port,
|
|
|
|
|
- user=config.user,
|
|
|
|
|
- password=config.password,
|
|
|
|
|
- database=config.database,
|
|
|
|
|
- charset=config.charset,
|
|
|
|
|
- cursorclass=cursors.DictCursor,
|
|
|
|
|
- connect_timeout=10,
|
|
|
|
|
- read_timeout=10
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ conn = self.get_connection()
|
|
|
|
|
+ cursor = conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ cursor.execute(f"""
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ COUNT(*) as total_rows,
|
|
|
|
|
+ COUNT(DISTINCT id_farm) as farm_count,
|
|
|
|
|
+ COUNT(DISTINCT id_turbine) as turbine_count,
|
|
|
|
|
+ MIN(data_time) as first_data_time,
|
|
|
|
|
+ MAX(data_time) as last_data_time,
|
|
|
|
|
+ MIN(create_time) as first_create,
|
|
|
|
|
+ MAX(update_time) as last_update
|
|
|
|
|
+ FROM `{table_name}`
|
|
|
|
|
+ """)
|
|
|
|
|
+
|
|
|
|
|
+ result = cursor.fetchone()
|
|
|
|
|
+ if result:
|
|
|
|
|
+ stats = {
|
|
|
|
|
+ 'total_rows': result[0],
|
|
|
|
|
+ 'farm_count': result[1],
|
|
|
|
|
+ 'turbine_count': result[2],
|
|
|
|
|
+ 'first_data_time': result[3],
|
|
|
|
|
+ 'last_data_time': result[4],
|
|
|
|
|
+ 'first_create': result[5],
|
|
|
|
|
+ 'last_update': result[6]
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"📈 表 '{table_name}' 统计信息:")
|
|
|
|
|
+ logger.info(f" 总行数: {stats['total_rows']:,}")
|
|
|
|
|
+ logger.info(f" 风场数量: {stats['farm_count']}")
|
|
|
|
|
+ logger.info(f" 风机数量: {stats['turbine_count']}")
|
|
|
|
|
+ logger.info(f" 最早数据时间: {stats['first_data_time']}")
|
|
|
|
|
+ logger.info(f" 最新数据时间: {stats['last_data_time']}")
|
|
|
|
|
+
|
|
|
|
|
+ return stats
|
|
|
|
|
+ else:
|
|
|
|
|
+ return {}
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if cursor:
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"❌ 获取表统计失败: {e}")
|
|
|
|
|
+ return {}
|
|
|
|
|
+ finally:
|
|
|
|
|
+ self.release_connection()
|
|
|
|
|
+
|
|
|
|
|
+ def check_duplicate_keys(self, table_name: str) -> List[Dict]:
|
|
|
|
|
+ """检查重复的唯一键记录"""
|
|
|
|
|
+ conn = None
|
|
|
|
|
+ cursor = None
|
|
|
|
|
|
|
|
- # 执行测试查询
|
|
|
|
|
- cursor = conn.cursor()
|
|
|
|
|
- cursor.execute(test_query)
|
|
|
|
|
- query_result = cursor.fetchone()
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ conn = self.get_connection()
|
|
|
|
|
+ cursor = conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ cursor.execute(f"""
|
|
|
|
|
+ SELECT
|
|
|
|
|
+ id_farm,
|
|
|
|
|
+ id_turbine,
|
|
|
|
|
+ data_time,
|
|
|
|
|
+ COUNT(*) as duplicate_count,
|
|
|
|
|
+ MIN(id) as min_id,
|
|
|
|
|
+ MAX(id) as max_id
|
|
|
|
|
+ FROM `{table_name}`
|
|
|
|
|
+ GROUP BY id_farm, id_turbine, data_time
|
|
|
|
|
+ HAVING COUNT(*) > 1
|
|
|
|
|
+ ORDER BY duplicate_count DESC
|
|
|
|
|
+ LIMIT 10
|
|
|
|
|
+ """)
|
|
|
|
|
+
|
|
|
|
|
+ duplicates = []
|
|
|
|
|
+ for row in cursor.fetchall():
|
|
|
|
|
+ duplicate_info = {
|
|
|
|
|
+ 'id_farm': row[0],
|
|
|
|
|
+ 'id_turbine': row[1],
|
|
|
|
|
+ 'data_time': row[2],
|
|
|
|
|
+ 'duplicate_count': row[3],
|
|
|
|
|
+ 'min_id': row[4],
|
|
|
|
|
+ 'max_id': row[5]
|
|
|
|
|
+ }
|
|
|
|
|
+ duplicates.append(duplicate_info)
|
|
|
|
|
+
|
|
|
|
|
+ if duplicates:
|
|
|
|
|
+ logger.warning(f"⚠️ 发现重复的唯一键记录: {len(duplicates)} 组")
|
|
|
|
|
+ for dup in duplicates[:3]:
|
|
|
|
|
+ logger.warning(f" 重复: 风场={dup['id_farm']}, 风机={dup['id_turbine']}, "
|
|
|
|
|
+ f"时间={dup['data_time']}, 重复次数={dup['duplicate_count']}")
|
|
|
|
|
+ else:
|
|
|
|
|
+ logger.info(f"✅ 无重复的唯一键记录")
|
|
|
|
|
+
|
|
|
|
|
+ return duplicates
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if cursor:
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"❌ 检查重复键失败: {e}")
|
|
|
|
|
+ return []
|
|
|
|
|
+ finally:
|
|
|
|
|
+ self.release_connection()
|
|
|
|
|
+
|
|
|
|
|
+ def upsert_parquet_data(self, file_info: ParquetFileInfo, table_name: str,
|
|
|
|
|
+ batch_size: int = 100, max_retries: int = 3) -> Tuple[int, int, int]:
|
|
|
|
|
+ """
|
|
|
|
|
+ UPSERT单个parquet文件数据到数据库,使用三字段唯一键
|
|
|
|
|
|
|
|
- # 获取服务器信息
|
|
|
|
|
- cursor.execute("SELECT VERSION() as version, DATABASE() as database_name, "
|
|
|
|
|
- "USER() as user, NOW() as server_time")
|
|
|
|
|
- server_info = cursor.fetchone()
|
|
|
|
|
|
|
+ Args:
|
|
|
|
|
+ file_info: 文件信息(包含识别到的时间字段名)
|
|
|
|
|
+ table_name: 表名
|
|
|
|
|
+ batch_size: 批处理大小
|
|
|
|
|
+ max_retries: 最大重试次数
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ (总行数, 插入行数, 更新行数)
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ logger.info(f"📂 正在读取并处理文件: {file_info.file_path}")
|
|
|
|
|
+ logger.info(f"⏰ 识别到的时间字段: {file_info.data_time_column}")
|
|
|
|
|
+
|
|
|
|
|
+ # 读取parquet文件
|
|
|
|
|
+ df = pd.read_parquet(file_info.file_path, engine='pyarrow')
|
|
|
|
|
+
|
|
|
|
|
+ # 添加元数据字段
|
|
|
|
|
+ df['id_farm'] = file_info.farm_id
|
|
|
|
|
+ df['name_farm'] = file_info.farm_name
|
|
|
|
|
+ df['no_model_turbine'] = file_info.model_type
|
|
|
|
|
+ df['id_turbine'] = file_info.turbine_id
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"📊 文件 {file_info.turbine_id}.parquet 读取完成,形状: {df.shape}")
|
|
|
|
|
+
|
|
|
|
|
+ # 简化数据清理
|
|
|
|
|
+ cleaned_df = self._clean_and_convert_simple(df, file_info.data_time_column)
|
|
|
|
|
+
|
|
|
|
|
+ # 确保必需字段存在
|
|
|
|
|
+ required_columns = ['id_farm', 'id_turbine', 'data_time']
|
|
|
|
|
+ for col in required_columns:
|
|
|
|
|
+ if col not in cleaned_df.columns:
|
|
|
|
|
+ logger.error(f"必需字段 '{col}' 不存在于数据中")
|
|
|
|
|
+ cleaned_df[col] = None
|
|
|
|
|
+
|
|
|
|
|
+ # 获取列名
|
|
|
|
|
+ columns = list(cleaned_df.columns)
|
|
|
|
|
+
|
|
|
|
|
+ # 准备UPSERT SQL
|
|
|
|
|
+ upsert_sql = self._prepare_upsert_sql(table_name, columns)
|
|
|
|
|
+
|
|
|
|
|
+ # 分批处理
|
|
|
|
|
+ total_rows = len(cleaned_df)
|
|
|
|
|
+ total_batches = (total_rows + batch_size - 1) // batch_size
|
|
|
|
|
+ total_affected_rows = 0
|
|
|
|
|
+ total_failed_rows = 0
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"🚀 准备处理 {total_rows} 行数据,分为 {total_batches} 个批次")
|
|
|
|
|
+
|
|
|
|
|
+ for i in range(0, total_rows, batch_size):
|
|
|
|
|
+ batch_df = cleaned_df.iloc[i:i + batch_size]
|
|
|
|
|
+ batch_num = i // batch_size + 1
|
|
|
|
|
+
|
|
|
|
|
+ retry_count = 0
|
|
|
|
|
+ batch_success = False
|
|
|
|
|
+
|
|
|
|
|
+ while retry_count <= max_retries and not batch_success:
|
|
|
|
|
+ conn = None
|
|
|
|
|
+ cursor = None
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ if not self.check_connection():
|
|
|
|
|
+ logger.warning(f"🔌 连接已断开,正在重新连接...")
|
|
|
|
|
+ self.reconnect()
|
|
|
|
|
+
|
|
|
|
|
+ conn = self.get_connection()
|
|
|
|
|
+ cursor = conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ # 转换为元组列表
|
|
|
|
|
+ batch_values = []
|
|
|
|
|
+ for _, row in batch_df.iterrows():
|
|
|
|
|
+ row_tuple = self._convert_row_to_tuple(row, columns)
|
|
|
|
|
+ batch_values.append(row_tuple)
|
|
|
|
|
+
|
|
|
|
|
+ # 执行批量插入
|
|
|
|
|
+ affected = cursor.executemany(upsert_sql, batch_values)
|
|
|
|
|
+ total_affected_rows += affected
|
|
|
|
|
+
|
|
|
|
|
+ conn.commit()
|
|
|
|
|
+ batch_success = True
|
|
|
|
|
+
|
|
|
|
|
+ if batch_num % 10 == 0 or batch_num == total_batches:
|
|
|
|
|
+ logger.info(f"✅ 批次 {batch_num}/{total_batches}: 处理 {len(batch_df)} 行, "
|
|
|
|
|
+ f"受影响 {affected} 行")
|
|
|
|
|
+
|
|
|
|
|
+ except (pymysql.Error, AttributeError) as e:
|
|
|
|
|
+ retry_count += 1
|
|
|
|
|
+ logger.error(f"❌ 批次 {batch_num} UPSERT失败,错误: {str(e)}")
|
|
|
|
|
+
|
|
|
|
|
+ if retry_count > max_retries:
|
|
|
|
|
+ logger.error(f"❌ 批次 {batch_num} UPSERT失败,已达到最大重试次数")
|
|
|
|
|
+ # 单条插入
|
|
|
|
|
+ batch_affected = 0
|
|
|
|
|
+ batch_failed = 0
|
|
|
|
|
+
|
|
|
|
|
+ for idx, (_, row) in enumerate(batch_df.iterrows()):
|
|
|
|
|
+ row_retry_count = 0
|
|
|
|
|
+ row_success = False
|
|
|
|
|
+
|
|
|
|
|
+ while row_retry_count <= max_retries and not row_success:
|
|
|
|
|
+ try:
|
|
|
|
|
+ if not self.check_connection():
|
|
|
|
|
+ self.reconnect()
|
|
|
|
|
+
|
|
|
|
|
+ single_conn = self.get_connection()
|
|
|
|
|
+ single_cursor = single_conn.cursor()
|
|
|
|
|
+
|
|
|
|
|
+ row_tuple = self._convert_row_to_tuple(row, columns)
|
|
|
|
|
+ single_cursor.execute(upsert_sql, row_tuple)
|
|
|
|
|
+ batch_affected += single_cursor.rowcount
|
|
|
|
|
+ single_conn.commit()
|
|
|
|
|
+ row_success = True
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as single_e:
|
|
|
|
|
+ row_retry_count += 1
|
|
|
|
|
+ if row_retry_count > max_retries:
|
|
|
|
|
+ batch_failed += 1
|
|
|
|
|
+ break
|
|
|
|
|
+ else:
|
|
|
|
|
+ time.sleep(1 * row_retry_count)
|
|
|
|
|
+
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if 'single_cursor' in locals() and single_cursor:
|
|
|
|
|
+ single_cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ total_affected_rows += batch_affected
|
|
|
|
|
+ total_failed_rows += batch_failed
|
|
|
|
|
+
|
|
|
|
|
+ if batch_affected > 0:
|
|
|
|
|
+ logger.info(f"⚠️ 批次 {batch_num} 单条处理完成,成功 {batch_affected} 行, 失败 {batch_failed} 行")
|
|
|
|
|
+
|
|
|
|
|
+ break
|
|
|
|
|
+ else:
|
|
|
|
|
+ logger.warning(f"⚠️ 批次 {batch_num} UPSERT失败,第 {retry_count} 次重试")
|
|
|
|
|
+ time.sleep(2 * retry_count)
|
|
|
|
|
+
|
|
|
|
|
+ finally:
|
|
|
|
|
+ if cursor:
|
|
|
|
|
+ cursor.close()
|
|
|
|
|
+
|
|
|
|
|
+ # 批次处理完成后暂停一小段时间
|
|
|
|
|
+ if batch_success and batch_num < total_batches:
|
|
|
|
|
+ time.sleep(0.1)
|
|
|
|
|
+
|
|
|
|
|
+ # 估算插入和更新行数
|
|
|
|
|
+ successful_rows = total_rows - total_failed_rows
|
|
|
|
|
+ estimated_inserted = successful_rows // 2
|
|
|
|
|
+ estimated_updated = successful_rows - estimated_inserted
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"🎉 文件 {os.path.basename(file_info.file_path)} UPSERT完成:")
|
|
|
|
|
+ logger.info(f" 总处理行数: {total_rows}")
|
|
|
|
|
+ logger.info(f" 总受影响行数: {total_affected_rows}")
|
|
|
|
|
+ logger.info(f" 失败行数: {total_failed_rows}")
|
|
|
|
|
+
|
|
|
|
|
+ return total_rows, estimated_inserted, estimated_updated
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"❌ 处理文件 {file_info.file_path} 失败: {str(e)}")
|
|
|
|
|
+ raise
|
|
|
|
|
+
|
|
|
|
|
+ def _clean_and_convert_simple(self, df: pd.DataFrame, data_time_column: str = None) -> pd.DataFrame:
|
|
|
|
|
+ """简化版数据清理"""
|
|
|
|
|
+ try:
|
|
|
|
|
+ cleaned_df = df.copy()
|
|
|
|
|
+
|
|
|
|
|
+ # 确保必需字段存在
|
|
|
|
|
+ required_fields = ['id_farm', 'name_farm', 'no_model_turbine', 'id_turbine']
|
|
|
|
|
+ for field in required_fields:
|
|
|
|
|
+ if field not in cleaned_df.columns:
|
|
|
|
|
+ cleaned_df[field] = None
|
|
|
|
|
+
|
|
|
|
|
+ # 处理时间字段
|
|
|
|
|
+ if 'data_time' not in cleaned_df.columns:
|
|
|
|
|
+ if data_time_column and data_time_column in cleaned_df.columns:
|
|
|
|
|
+ cleaned_df['data_time'] = cleaned_df[data_time_column]
|
|
|
|
|
+ else:
|
|
|
|
|
+ for col in cleaned_df.columns:
|
|
|
|
|
+ col_lower = col.lower()
|
|
|
|
|
+ if any(keyword in col_lower for keyword in ['time', 'date', 'timestamp']):
|
|
|
|
|
+ cleaned_df['data_time'] = cleaned_df[col]
|
|
|
|
|
+ logger.info(f"使用字段 '{col}' 作为 data_time")
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ # 确保data_time是datetime类型
|
|
|
|
|
+ if 'data_time' in cleaned_df.columns:
|
|
|
|
|
+ try:
|
|
|
|
|
+ cleaned_df['data_time'] = pd.to_datetime(cleaned_df['data_time'], errors='coerce')
|
|
|
|
|
+ except:
|
|
|
|
|
+ logger.warning("data_time字段转换失败,保持原样")
|
|
|
|
|
+
|
|
|
|
|
+ # 处理NaN
|
|
|
|
|
+ cleaned_df = cleaned_df.replace({np.nan: None, pd.NaT: None})
|
|
|
|
|
+
|
|
|
|
|
+ # 计算数据哈希
|
|
|
|
|
+ def simple_hash(row):
|
|
|
|
|
+ try:
|
|
|
|
|
+ data_fields = [col for col in cleaned_df.columns
|
|
|
|
|
+ if col not in ['id_farm', 'name_farm', 'no_model_turbine',
|
|
|
|
|
+ 'id_turbine', 'data_time', 'data_hash']]
|
|
|
|
|
+
|
|
|
|
|
+ hash_str = ''
|
|
|
|
|
+ for field in sorted(data_fields):
|
|
|
|
|
+ val = row[field]
|
|
|
|
|
+ if val is not None:
|
|
|
|
|
+ if isinstance(val, (dt, pd.Timestamp)):
|
|
|
|
|
+ hash_str += f"{field}:{val.isoformat()}|"
|
|
|
|
|
+ else:
|
|
|
|
|
+ hash_str += f"{field}:{str(val)}|"
|
|
|
|
|
+
|
|
|
|
|
+ return hashlib.md5(hash_str.encode('utf-8')).hexdigest() if hash_str else None
|
|
|
|
|
+ except:
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+ cleaned_df['data_hash'] = cleaned_df.apply(simple_hash, axis=1)
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"数据清理完成,原始形状: {df.shape}, 清理后形状: {cleaned_df.shape}")
|
|
|
|
|
+ return cleaned_df
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"数据清理失败: {e}")
|
|
|
|
|
+ logger.error(traceback.format_exc())
|
|
|
|
|
+ return df
|
|
|
|
|
+
|
|
|
|
|
+ def _prepare_upsert_sql(self, table_name: str, columns: List[str]) -> str:
|
|
|
|
|
+ """准备UPSERT SQL语句"""
|
|
|
|
|
+ exclude_columns = ['id_farm', 'id_turbine', 'data_time', 'id',
|
|
|
|
|
+ 'create_time', 'update_time', 'data_hash']
|
|
|
|
|
+ update_columns = [col for col in columns if col not in exclude_columns]
|
|
|
|
|
|
|
|
- # 计算响应时间
|
|
|
|
|
- response_time = time.time() - start_time
|
|
|
|
|
|
|
+ column_names = ', '.join([f'`{col}`' for col in columns])
|
|
|
|
|
+ placeholders = ', '.join(['%s'] * len(columns))
|
|
|
|
|
|
|
|
- result.update({
|
|
|
|
|
- 'success': True,
|
|
|
|
|
- 'response_time': response_time,
|
|
|
|
|
- 'server_info': server_info,
|
|
|
|
|
- 'test_result': query_result
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ update_clauses = []
|
|
|
|
|
+ for col in update_columns:
|
|
|
|
|
+ update_clauses.append(f"`{col}` = VALUES(`{col}`)")
|
|
|
|
|
|
|
|
- logger.info(f"数据库连接测试成功: {config.host}:{config.port}/{config.database}")
|
|
|
|
|
- logger.info(f"响应时间: {response_time:.3f}s")
|
|
|
|
|
- logger.info(f"MySQL版本: {server_info['version']}")
|
|
|
|
|
- logger.info(f"当前数据库: {server_info['database_name']}")
|
|
|
|
|
|
|
+ update_clause = ', '.join(update_clauses)
|
|
|
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- result['error'] = str(e)
|
|
|
|
|
- logger.error(f"数据库连接测试失败: {e}")
|
|
|
|
|
|
|
+ upsert_sql = f"""
|
|
|
|
|
+ INSERT INTO `{table_name}` ({column_names})
|
|
|
|
|
+ VALUES ({placeholders})
|
|
|
|
|
+ ON DUPLICATE KEY UPDATE
|
|
|
|
|
+ {update_clause}
|
|
|
|
|
+ """
|
|
|
|
|
|
|
|
- finally:
|
|
|
|
|
- if cursor:
|
|
|
|
|
- cursor.close()
|
|
|
|
|
- if conn:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+ logger.debug(f"UPSERT SQL生成完成,共 {len(columns)} 列")
|
|
|
|
|
+ return upsert_sql
|
|
|
|
|
|
|
|
- return result
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-# 添加一个监控守护进程示例
|
|
|
|
|
-class DatabaseMonitorDaemon:
|
|
|
|
|
- """数据库监控守护进程"""
|
|
|
|
|
-
|
|
|
|
|
- def __init__(self, db_manager: DatabaseManager, check_interval: int = 5):
|
|
|
|
|
- self.db_manager = db_manager
|
|
|
|
|
- self.check_interval = check_interval
|
|
|
|
|
- self.monitor_thread = None
|
|
|
|
|
- self.running = False
|
|
|
|
|
|
|
+ def _convert_to_numeric(self, value):
|
|
|
|
|
+ """将值转换为数值类型"""
|
|
|
|
|
+ if pd.isna(value) or value is None:
|
|
|
|
|
+ return None
|
|
|
|
|
|
|
|
- def start(self):
|
|
|
|
|
- """启动监控"""
|
|
|
|
|
- if not self.running:
|
|
|
|
|
- self.running = True
|
|
|
|
|
- self.monitor_thread = Thread(target=self._monitor_loop, daemon=True)
|
|
|
|
|
- self.monitor_thread.start()
|
|
|
|
|
- logger.info(f"数据库监控守护进程已启动,检查间隔: {self.check_interval}秒")
|
|
|
|
|
-
|
|
|
|
|
- def stop(self):
|
|
|
|
|
- """停止监控"""
|
|
|
|
|
- self.running = False
|
|
|
|
|
- if self.monitor_thread:
|
|
|
|
|
- self.monitor_thread.join(timeout=10)
|
|
|
|
|
- logger.info("数据库监控守护进程已停止")
|
|
|
|
|
-
|
|
|
|
|
- def _monitor_loop(self):
|
|
|
|
|
- """监控循环"""
|
|
|
|
|
- while self.running:
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ if isinstance(value, (int, float, np.integer, np.floating)):
|
|
|
|
|
+ if isinstance(value, np.integer):
|
|
|
|
|
+ return int(value)
|
|
|
|
|
+ elif isinstance(value, np.floating):
|
|
|
|
|
+ return float(value)
|
|
|
|
|
+ return value
|
|
|
|
|
+
|
|
|
|
|
+ if isinstance(value, (bool, np.bool_)):
|
|
|
|
|
+ return 1 if bool(value) else 0
|
|
|
|
|
+
|
|
|
|
|
+ if isinstance(value, str):
|
|
|
|
|
+ cleaned = value.strip()
|
|
|
|
|
+ if cleaned == '':
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+ cleaned = cleaned.replace(',', '').replace('%', '').replace(' ', '')
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ return float(cleaned)
|
|
|
|
|
+ except ValueError:
|
|
|
|
|
+ try:
|
|
|
|
|
+ return int(cleaned)
|
|
|
|
|
+ except ValueError:
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
- # 获取当前状态
|
|
|
|
|
- status = self.db_manager.get_connection_status()
|
|
|
|
|
|
|
+ str_val = str(value)
|
|
|
|
|
+ cleaned = str_val.replace(',', '').replace('%', '').replace(' ', '')
|
|
|
|
|
+ return float(cleaned)
|
|
|
|
|
+ except:
|
|
|
|
|
+ return None
|
|
|
|
|
|
|
|
- # 记录状态变化
|
|
|
|
|
- if status['status'] != "HEALTHY":
|
|
|
|
|
- logger.warning(f"数据库连接状态异常: {status['status']}")
|
|
|
|
|
-
|
|
|
|
|
- # 如果连续错误超过阈值,尝试自动修复
|
|
|
|
|
- if status['error_count'] >= 3:
|
|
|
|
|
- logger.info("检测到连续错误,尝试自动修复连接...")
|
|
|
|
|
- self.db_manager._auto_reconnect()
|
|
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+ def _convert_row_to_tuple(self, row: pd.Series, columns: List[str]) -> Tuple:
|
|
|
|
|
+ """将单行数据转换为元组"""
|
|
|
|
|
+ try:
|
|
|
|
|
+ row_values = []
|
|
|
|
|
+
|
|
|
|
|
+ keep_original_fields = [
|
|
|
|
|
+ 'id', 'data_time', 'id_farm', 'id_turbine',
|
|
|
|
|
+ 'name_farm', 'no_model_turbine', 'create_time',
|
|
|
|
|
+ 'update_time', 'data_hash'
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ for col in columns:
|
|
|
|
|
+ value = row[col]
|
|
|
|
|
|
|
|
- # 等待下一次检查
|
|
|
|
|
- time.sleep(self.check_interval)
|
|
|
|
|
|
|
+ if col == 'data_time':
|
|
|
|
|
+ if pd.isna(value):
|
|
|
|
|
+ row_values.append(None)
|
|
|
|
|
+ elif isinstance(value, pd.Timestamp):
|
|
|
|
|
+ row_values.append(value.to_pydatetime())
|
|
|
|
|
+ elif isinstance(value, dt):
|
|
|
|
|
+ row_values.append(value)
|
|
|
|
|
+ else:
|
|
|
|
|
+ try:
|
|
|
|
|
+ row_values.append(pd.to_datetime(value).to_pydatetime())
|
|
|
|
|
+ except:
|
|
|
|
|
+ row_values.append(None)
|
|
|
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.error(f"监控循环异常: {e}")
|
|
|
|
|
- time.sleep(self.check_interval)
|
|
|
|
|
|
|
+ elif col in ['id_farm', 'id_turbine', 'name_farm', 'no_model_turbine', 'data_hash']:
|
|
|
|
|
+ if pd.isna(value) or value is None:
|
|
|
|
|
+ row_values.append(None)
|
|
|
|
|
+ else:
|
|
|
|
|
+ row_values.append(str(value))
|
|
|
|
|
+
|
|
|
|
|
+ elif col in keep_original_fields:
|
|
|
|
|
+ row_values.append(value)
|
|
|
|
|
+
|
|
|
|
|
+ else:
|
|
|
|
|
+ numeric_value = self._convert_to_numeric(value)
|
|
|
|
|
+ row_values.append(numeric_value)
|
|
|
|
|
+
|
|
|
|
|
+ return tuple(row_values)
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning(f"转换行数据失败: {e}")
|
|
|
|
|
+ return tuple([None] * len(columns))
|