Jelajahi Sumber

数据库访问增加连通测试;增加记录成功完成处理文件

zhouyang.xie 2 bulan lalu
induk
melakukan
9d3ac856f9

+ 473 - 992
dataStorage_datang/database.py

@@ -12,31 +12,77 @@ import json
 import time
 import re
 import threading
+from threading import Thread, Event
+import signal
+import sys
 
 from config import DatabaseConfig, TableConfig
 from file_scanner import ParquetFileInfo
 
 # 配置日志
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
 logger = logging.getLogger(__name__)
 
 
 class DatabaseManager:
-    """数据库管理器,使用连接池管理数据库连接"""
+    """数据库管理器,使用连接池管理数据库连接,包含连通性测试和自动重连"""
     
     # 线程局部存储,用于保存每个线程的数据库连接
     _thread_local = threading.local()
     
-    def __init__(self, config: DatabaseConfig, table_config: TableConfig, pool_size: int = 10):
+    def __init__(self, config: DatabaseConfig, table_config: TableConfig, pool_size: int = 6):
         self.config = config
         self.table_config = table_config
         self.pool_size = pool_size
+        
+        # 连接状态监控相关
+        self._connection_monitor_thread = None
+        self._monitor_running = Event()
+        self._last_connection_check = None
+        self._connection_status = "UNKNOWN"  # UNKNOWN, HEALTHY, WARNING, ERROR, DISCONNECTED
+        self._connection_error_count = 0
+        self._max_error_count = 5  # 连续错误次数阈值
+        self._monitor_interval = 1  # 监控间隔(秒)
+        self._monitor_stats = {
+            'total_checks': 0,
+            'successful_checks': 0,
+            'failed_checks': 0,
+            'total_reconnections': 0,
+            'last_error': None,
+            'last_success': None
+        }
+        
+        # 连接池
         self.pool: Optional[PooledDB] = None
+        
+        # 初始化连接池
         self._init_pool()
         
+        # 启动连接监控线程
+        self._start_connection_monitor()
+        
+        # 注册信号处理,优雅关闭
+        self._setup_signal_handlers()
+        
+    def _setup_signal_handlers(self):
+        """设置信号处理,确保程序退出时能正确关闭资源"""
+        def signal_handler(signum, frame):
+            logger.info(f"接收到信号 {signum},正在关闭数据库连接池...")
+            self.close_all()
+            sys.exit(0)
+        
+        signal.signal(signal.SIGINT, signal_handler)  # Ctrl+C
+        signal.signal(signal.SIGTERM, signal_handler)  # 终止信号
+    
     def _init_pool(self):
         """初始化数据库连接池"""
         try:
+            logger.info(f"正在初始化数据库连接池到 {self.config.host}:{self.config.port}/{self.config.database}")
+            
             self.pool = PooledDB(
                 creator=pymysql,  # 使用的数据库驱动
                 maxconnections=self.pool_size,  # 连接池允许的最大连接数
@@ -61,14 +107,19 @@ class DatabaseManager:
                 client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS  # 支持多语句
             )
             
-            logger.info(f"数据库连接池初始化成功: {self.config.host}:{self.config.port}/{self.config.database}")
+            logger.info(f"数据库连接池初始化成功")
             logger.info(f"连接池配置: maxconnections={self.pool_size}, mincached=2, maxcached=5")
             
             # 测试连接池
-            self._test_pool_connection()
+            test_result = self._test_pool_connection()
+            if test_result:
+                self._connection_status = "HEALTHY"
+                self._monitor_stats['last_success'] = dt.now()
             
         except Exception as e:
             logger.error(f"数据库连接池初始化失败: {e}")
+            self._connection_status = "ERROR"
+            self._monitor_stats['last_error'] = str(e)
             raise
     
     def _test_pool_connection(self):
@@ -76,18 +127,274 @@ class DatabaseManager:
         try:
             conn = self.pool.connection()
             cursor = conn.cursor()
-            cursor.execute("SELECT 1 as test")
+            start_time = time.time()
+            cursor.execute("SELECT 1 as test, NOW() as server_time")
             result = cursor.fetchone()
+            elapsed = time.time() - start_time
+            
             cursor.close()
             conn.close()  # 归还连接
-            logger.info(f"连接池测试成功: {result}")
+            
+            logger.info(f"连接池测试成功: 响应时间={elapsed:.3f}s, 服务器时间={result['server_time']}")
+            return True
+            
         except Exception as e:
             logger.error(f"连接池测试失败: {e}")
-            raise
+            return False
+    
+    def _start_connection_monitor(self):
+        """启动连接监控线程"""
+        if self._connection_monitor_thread is None or not self._connection_monitor_thread.is_alive():
+            self._monitor_running.clear()
+            self._connection_monitor_thread = Thread(
+                target=self._connection_monitor_loop,
+                name="DBConnectionMonitor",
+                daemon=True  # 设置为守护线程,主程序退出时自动结束
+            )
+            self._connection_monitor_thread.start()
+            logger.info("数据库连接监控线程已启动")
+    
+    def _stop_connection_monitor(self):
+        """停止连接监控线程"""
+        if self._connection_monitor_thread and self._connection_monitor_thread.is_alive():
+            self._monitor_running.set()  # 设置事件,通知线程退出
+            self._connection_monitor_thread.join(timeout=5)
+            logger.info("数据库连接监控线程已停止")
+    
+    def _connection_monitor_loop(self):
+        """连接监控循环"""
+        logger.info(f"连接监控线程开始运行,检查间隔: {self._monitor_interval}秒")
+        
+        last_log_time = time.time()
+        log_interval = 60  # 每分钟记录一次状态
+        
+        while not self._monitor_running.is_set():
+            try:
+                # 执行连接检查
+                self._perform_connection_check()
+                
+                # 定期记录状态
+                current_time = time.time()
+                if current_time - last_log_time >= log_interval:
+                    self._log_connection_status()
+                    last_log_time = current_time
+                
+                # 等待下一次检查
+                time.sleep(self._monitor_interval)
+                
+            except Exception as e:
+                logger.error(f"连接监控循环异常: {e}")
+                time.sleep(self._monitor_interval)  # 异常后继续尝试
+        
+        logger.info("连接监控循环结束")
+    
+    def _perform_connection_check(self):
+        """执行连接检查"""
+        try:
+            self._monitor_stats['total_checks'] += 1
+            self._last_connection_check = dt.now()
+            
+            # 检查连接池是否初始化
+            if self.pool is None:
+                self._connection_status = "ERROR"
+                self._connection_error_count += 1
+                self._monitor_stats['failed_checks'] += 1
+                self._monitor_stats['last_error'] = "连接池未初始化"
+                logger.warning("连接池未初始化")
+                return
+            
+            # 尝试获取连接并执行简单查询
+            conn = None
+            cursor = None
+            try:
+                conn = self.pool.connection()
+                cursor = conn.cursor()
+                
+                # 执行一个简单的查询测试连接
+                start_time = time.time()
+                cursor.execute("SELECT 1 as test, NOW() as server_time, "
+                             "VERSION() as version, CONNECTION_ID() as connection_id")
+                result = cursor.fetchone()
+                elapsed = time.time() - start_time
+                
+                # 更新状态
+                self._connection_status = "HEALTHY"
+                self._connection_error_count = 0
+                self._monitor_stats['successful_checks'] += 1
+                self._monitor_stats['last_success'] = dt.now()
+                
+                # 记录详细连接信息(调试级别)
+                if logger.isEnabledFor(logging.DEBUG):
+                    logger.debug(f"连接检查成功: "
+                               f"响应时间={elapsed:.3f}s, "
+                               f"服务器时间={result['server_time']}, "
+                               f"MySQL版本={result['version']}, "
+                               f"连接ID={result['connection_id']}")
+                
+            except Exception as e:
+                self._connection_status = "ERROR"
+                self._connection_error_count += 1
+                self._monitor_stats['failed_checks'] += 1
+                self._monitor_stats['last_error'] = str(e)
+                
+                # 根据错误计数判断连接状态
+                if self._connection_error_count >= self._max_error_count:
+                    self._connection_status = "DISCONNECTED"
+                    logger.error(f"数据库连接失败,已连续失败 {self._connection_error_count} 次: {e}")
+                else:
+                    logger.warning(f"数据库连接检查失败 (第{self._connection_error_count}次): {e}")
+                
+                # 尝试自动重连
+                if self._connection_error_count >= 3:
+                    self._auto_reconnect()
+                
+            finally:
+                if cursor:
+                    cursor.close()
+                if conn:
+                    conn.close()  # 归还连接到连接池
+        
+        except Exception as e:
+            logger.error(f"执行连接检查时发生异常: {e}")
+            self._monitor_stats['failed_checks'] += 1
+            self._monitor_stats['last_error'] = str(e)
+    
+    def _auto_reconnect(self):
+        """自动重连机制"""
+        try:
+            logger.warning(f"检测到连接问题,正在尝试自动重连 (错误计数: {self._connection_error_count})")
+            
+            # 1. 先释放当前线程的连接
+            self.release_connection()
+            
+            # 2. 等待一小段时间
+            time.sleep(2)
+            
+            # 3. 测试当前连接池
+            if self.pool:
+                test_result = self._test_pool_connection()
+                if test_result:
+                    self._connection_status = "HEALTHY"
+                    self._connection_error_count = 0
+                    self._monitor_stats['total_reconnections'] += 1
+                    logger.info("自动重连成功")
+                    return True
+                else:
+                    logger.warning("连接池测试失败,尝试重新初始化")
+            
+            # 4. 重新初始化连接池
+            old_pool = self.pool
+            try:
+                self.pool = None
+                if old_pool:
+                    old_pool.close()
+                
+                self._init_pool()
+                self._connection_status = "HEALTHY"
+                self._connection_error_count = 0
+                self._monitor_stats['total_reconnections'] += 1
+                logger.info("连接池重新初始化成功")
+                return True
+                
+            except Exception as e:
+                logger.error(f"连接池重新初始化失败: {e}")
+                self._connection_status = "ERROR"
+                return False
+                
+        except Exception as e:
+            logger.error(f"自动重连失败: {e}")
+            return False
+    
+    def _log_connection_status(self):
+        """记录连接状态"""
+        status_map = {
+            "HEALTHY": "✅ 健康",
+            "WARNING": "⚠️  警告",
+            "ERROR": "❌ 错误",
+            "DISCONNECTED": "🔌 断开连接",
+            "UNKNOWN": "❓ 未知"
+        }
+        
+        status_text = status_map.get(self._connection_status, self._connection_status)
+        
+        stats_info = (
+            f"连接状态统计:\n"
+            f"   当前状态: {status_text}\n"
+            f"   总检查次数: {self._monitor_stats['total_checks']}\n"
+            f"   成功次数: {self._monitor_stats['successful_checks']}\n"
+            f"   失败次数: {self._monitor_stats['failed_checks']}\n"
+            f"   连续错误次数: {self._connection_error_count}\n"
+            f"   总重连次数: {self._monitor_stats['total_reconnections']}\n"
+            f"   最后成功: {self._monitor_stats['last_success']}\n"
+            f"   最后错误: {self._monitor_stats['last_error']}"
+        )
+        
+        if self._connection_status in ["HEALTHY", "WARNING"]:
+            logger.info(stats_info)
+        else:
+            logger.warning(stats_info)
+    
+    def get_connection_status(self) -> Dict[str, Any]:
+        """获取当前连接状态"""
+        return {
+            'status': self._connection_status,
+            'error_count': self._connection_error_count,
+            'last_check': self._last_connection_check,
+            'monitor_stats': self._monitor_stats.copy(),
+            'config': {
+                'host': self.config.host,
+                'port': self.config.port,
+                'database': self.config.database,
+                'pool_size': self.pool_size
+            }
+        }
+    
+    def wait_for_connection(self, timeout: int = 30, check_interval: int = 1) -> bool:
+        """
+        等待数据库连接恢复正常
+        
+        Args:
+            timeout: 总等待时间(秒)
+            check_interval: 检查间隔(秒)
+            
+        Returns:
+            bool: 是否成功连接
+        """
+        logger.info(f"等待数据库连接恢复,超时时间: {timeout}秒")
+        
+        start_time = time.time()
+        attempts = 0
+        
+        while time.time() - start_time < timeout:
+            attempts += 1
+            
+            # 检查连接状态
+            if self._connection_status == "HEALTHY":
+                logger.info(f"数据库连接已恢复,等待时间: {time.time() - start_time:.1f}秒")
+                return True
+            
+            # 如果连接断开,尝试立即重连
+            if self._connection_status == "DISCONNECTED":
+                logger.info(f"尝试重连 (第{attempts}次)")
+                if self._auto_reconnect():
+                    return True
+            
+            # 等待下一次检查
+            logger.info(f"等待连接恢复... ({attempts}/{int(timeout/check_interval)})")
+            time.sleep(check_interval)
+        
+        logger.error(f"等待数据库连接恢复超时 ({timeout}秒)")
+        return False
     
     def get_connection(self) -> Connection:
         """从连接池获取数据库连接"""
         try:
+            # 检查当前连接状态
+            if self._connection_status in ["ERROR", "DISCONNECTED"]:
+                logger.warning(f"获取连接时检测到连接状态为 {self._connection_status},尝试自动重连")
+                if not self.wait_for_connection(timeout=10):
+                    raise ConnectionError(f"数据库连接不可用,当前状态: {self._connection_status}")
+            
             # 检查线程局部存储中是否有连接
             if not hasattr(self._thread_local, 'connection') or self._thread_local.connection is None:
                 # 从连接池获取新连接
@@ -99,6 +406,14 @@ class DatabaseManager:
             
         except Exception as e:
             logger.error(f"从连接池获取连接失败: {e}")
+            
+            # 更新连接状态
+            self._connection_status = "ERROR"
+            self._connection_error_count += 1
+            self._monitor_stats['last_error'] = str(e)
+            
+            # 尝试自动重连
+            self._auto_reconnect()
             raise
     
     def release_connection(self):
@@ -107,8 +422,14 @@ class DatabaseManager:
             if hasattr(self._thread_local, 'connection') and self._thread_local.connection is not None:
                 conn = self._thread_local.connection
                 try:
-                    # 确保没有未提交的事务
-                    if not conn.get_autocommit():
+                    # 修复:SteadyDBConnection 需要访问 _con 属性获取原始连接
+                    # 检查原始连接的 autocommit 状态
+                    if hasattr(conn, '_con') and isinstance(conn._con, Connection):
+                        original_conn = conn._con
+                        if not original_conn.get_autocommit():
+                            original_conn.rollback()
+                    else:
+                        # 备选方案:直接尝试回滚,不检查 autocommit
                         conn.rollback()
                     
                     # 关闭连接(实际上是归还到连接池)
@@ -121,20 +442,6 @@ class DatabaseManager:
         except Exception as e:
             logger.error(f"释放连接时出错: {e}")
     
-    def with_connection(self):
-        """连接上下文管理器装饰器"""
-        def decorator(func):
-            def wrapper(*args, **kwargs):
-                # 确保有数据库连接
-                self.get_connection()
-                try:
-                    return func(*args, **kwargs)
-                finally:
-                    # 不自动释放连接,让调用者控制
-                    pass
-            return wrapper
-        return decorator
-    
     def check_connection(self) -> bool:
         """检查连接是否有效"""
         try:
@@ -170,6 +477,20 @@ class DatabaseManager:
             logger.error(f"数据库重新连接失败: {e}")
             raise
     
+    def close_all(self):
+        """关闭所有资源,包括连接池和监控线程"""
+        logger.info("正在关闭所有数据库资源...")
+        
+        # 停止监控线程
+        self._stop_connection_monitor()
+        
+        # 关闭连接池
+        self.close_pool()
+        
+        # 记录最终状态
+        self._log_connection_status()
+        logger.info("所有数据库资源已关闭")
+    
     def close_pool(self):
         """关闭整个连接池"""
         try:
@@ -180,13 +501,20 @@ class DatabaseManager:
                 # 关闭连接池
                 self.pool.close()
                 self.pool = None
+                self._connection_status = "DISCONNECTED"
                 logger.info("数据库连接池已关闭")
         except Exception as e:
             logger.error(f"关闭连接池时出错: {e}")
     
+    # 以下为原有的业务方法,保持不变
     def create_table_with_unique_key(self, table_name: str, columns: List[str], 
                                    unique_keys: List[str]) -> bool:
         """根据列定义创建表,包含三字段唯一键,数据字段使用DOUBLE类型"""
+        # 检查连接状态
+        if not self.wait_for_connection():
+            logger.error("创建表失败:数据库连接不可用")
+            return False
+            
         conn = None
         cursor = None
         
@@ -250,992 +578,145 @@ class DatabaseManager:
             # 不释放连接,让调用者控制
             pass
     
-    def create_data_scada_turbine_table(self) -> bool:
-        """创建data_scada_turbine表,所有数据字段使用DOUBLE类型"""
-        conn = None
-        cursor = None
+    # ... 其余的业务方法保持不变,包括:
+    # create_data_scada_turbine_table
+    # _clean_and_convert_simple
+    # _prepare_upsert_sql
+    # _convert_to_numeric
+    # _convert_row_to_tuple
+    # _escape_sql_value
+    # _get_sql_with_values
+    # _log_failed_row_details
+    # batch_upsert_data_direct
+    # upsert_parquet_data
+    # check_table_exists
+    # get_table_row_count
+    # get_table_stats
+    # check_duplicate_keys
+    # with_connection 装饰器
+
+# 添加一个独立的连接测试函数,方便外部调用
+def test_database_connection(config: DatabaseConfig, test_query: str = "SELECT 1") -> Dict[str, Any]:
+    """
+    测试数据库连接
+    
+    Args:
+        config: 数据库配置
+        test_query: 测试查询语句
         
-        try:
-            conn = self.get_connection()
-            cursor = conn.cursor()
-            
-            try:
-                # 删除已存在的表
-                drop_sql = f"DROP TABLE IF EXISTS `data_scada_turbine`"
-                cursor.execute(drop_sql)
-                logger.info("已删除旧表: data_scada_turbine")
-                
-                # 创建新表,所有数据字段使用DOUBLE类型
-                create_sql = """
-                CREATE TABLE `data_scada_turbine` (
-                    `id` BIGINT AUTO_INCREMENT PRIMARY KEY,
-                    `data_time` DATETIME NOT NULL COMMENT '数据时间',
-                    
-                    -- 所有数据字段使用DOUBLE类型
-                    `conv_spd` DOUBLE COMMENT '变流器转速',
-                    `grid_ia` DOUBLE COMMENT '电网A相电流',
-                    `grid_ib` DOUBLE COMMENT '电网B相电流',
-                    `grid_ic` DOUBLE COMMENT '电网C相电流',
-                    `grid_freq` DOUBLE COMMENT '电网频率',
-                    `grid_ua` DOUBLE COMMENT '电网A相电压',
-                    `grid_ub` DOUBLE COMMENT '电网B相电压',
-                    `grid_uc` DOUBLE COMMENT '电网C相电压',
-                    `p_active_set_fbk` DOUBLE COMMENT '有功功率设定反馈',
-                    `p_active` DOUBLE COMMENT '有功功率',
-                    `p_reactive` DOUBLE COMMENT '无功功率',
-                    `gen_spd` DOUBLE COMMENT '发电机转速',
-                    `gen_de_temp` DOUBLE COMMENT '发电机驱动端温度',
-                    `gen_nde_temp` DOUBLE COMMENT '发电机非驱动端温度',
-                    `stator_wind_temp_1` DOUBLE COMMENT '定子绕组温度1',
-                    `stator_wind_temp_2` DOUBLE COMMENT '定子绕组温度2',
-                    `stator_wind_temp_3` DOUBLE COMMENT '定子绕组温度3',
-                    `stator_wind_temp_4` DOUBLE COMMENT '定子绕组温度4',
-                    `stator_wind_temp_5` DOUBLE COMMENT '定子绕组温度5',
-                    `stator_wind_temp_6` DOUBLE COMMENT '定子绕组温度6',
-                    `pitch_ang_set_3` DOUBLE COMMENT '3号桨叶角度设定',
-                    `tower_fb_vib_acc` DOUBLE COMMENT '塔筒前后振动加速度',
-                    `tower_lr_vib_acc` DOUBLE COMMENT '塔筒左右振动加速度',
-                    `nacelle_in_temp` DOUBLE COMMENT '机舱内部温度',
-                    `nacelle_cab_temp` DOUBLE COMMENT '机舱柜内温度',
-                    `nacelle_out_temp` DOUBLE COMMENT '机舱外部温度',
-                    `theory_p_active` DOUBLE COMMENT '理论有功功率',
-                    `wind_dir` DOUBLE COMMENT '风向',
-                    `wind_spd` DOUBLE COMMENT '风速',
-                    `yaw_to_wind_ang` DOUBLE COMMENT '偏航对风角度',
-                    `pitch_ang_act_1` DOUBLE COMMENT '1号桨叶实际角度',
-                    `pitch_ang_act_2` DOUBLE COMMENT '2号桨叶实际角度',
-                    `pitch_ang_act_3` DOUBLE COMMENT '3号桨叶实际角度',
-                    `pitch_motor_cur_1` DOUBLE COMMENT '1号桨叶电机电流',
-                    `pitch_motor_cur_2` DOUBLE COMMENT '2号桨叶电机电流',
-                    `pitch_motor_cur_3` DOUBLE COMMENT '3号桨叶电机电流',
-                    `pitch_cab_temp_1` DOUBLE COMMENT '1号桨叶柜内温度',
-                    `pitch_cab_temp_2` DOUBLE COMMENT '2号桨叶柜内温度',
-                    `pitch_cab_temp_3` DOUBLE COMMENT '3号桨叶柜内温度',
-                    `tower_base_cab_temp` DOUBLE COMMENT '塔基柜内温度',
-                    `gearbox_spd_1` DOUBLE COMMENT '齿轮箱转速1',
-                    `gb_out_oil_prs` DOUBLE COMMENT '齿轮箱出油压力',
-                    `low_spd_shaft_temp` DOUBLE COMMENT '低速轴温度',
-                    `low_spd_shaft_de_temp` DOUBLE COMMENT '低速轴驱动端温度',
-                    `high_spd_shaft_de_temp` DOUBLE COMMENT '高速轴驱动端温度',
-                    `high_spd_shaft_temp` DOUBLE COMMENT '高速轴温度',
-                    `hyd_oil_temp` DOUBLE COMMENT '液压油温度',
-                    `hyd_station_prs` DOUBLE COMMENT '液压站压力',
-                    `gb_in_oil_prs` DOUBLE COMMENT '齿轮箱进油压力',
-                    `rotor_spd` DOUBLE COMMENT '转子转速',
-                    `gearbox_oil_temp` DOUBLE COMMENT '齿轮箱油温',
-                    `main_brg_temp_1` DOUBLE COMMENT '主轴承温度1',
-                    `tower_lr_vib` DOUBLE COMMENT '塔筒左右振动',
-                    `tower_fb_vib` DOUBLE COMMENT '塔筒前后振动',
-                    `nacelle_pos` DOUBLE COMMENT '机舱位置',
-                    `twist_ang` DOUBLE COMMENT '扭缆角度',
-                    
-                    -- 元数据字段
-                    `id_farm` VARCHAR(50) NOT NULL COMMENT '风场ID',
-                    `name_farm` VARCHAR(100) COMMENT '风场名称',
-                    `no_model_turbine` VARCHAR(50) COMMENT '风机型号',
-                    `id_turbine` VARCHAR(50) NOT NULL COMMENT '风机ID',
-                    
-                    -- 系统字段
-                    `create_time` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-                    `update_time` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-                    `data_hash` VARCHAR(64) COMMENT '数据哈希值,用于快速比较',
-                    
-                    -- 唯一键约束
-                    UNIQUE KEY uk_turbine_data (`id_farm`, `id_turbine`, `data_time`),
-                    
-                    -- 索引
-                    INDEX idx_farm (`id_farm`),
-                    INDEX idx_turbine (`id_turbine`),
-                    INDEX idx_time (`data_time`),
-                    INDEX idx_farm_turbine (`id_farm`, `id_turbine`),
-                    INDEX idx_composite (`id_farm`, `id_turbine`, `data_time`)
-                ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='风机SCADA数据表'
-                """
-                
-                logger.debug(f"创建data_scada_turbine表的SQL语句")
-                
-                # 执行创建表
-                cursor.execute(create_sql)
-                conn.commit()
-                
-                logger.info("表 'data_scada_turbine' 创建成功!")
-                logger.info("所有数据字段使用DOUBLE类型,三字段唯一键: id_farm, id_turbine, data_time")
-                
-                return True
-                
-            finally:
-                if cursor:
-                    try:
-                        cursor.close()
-                    except:
-                        pass
-                
-        except Exception as e:
-            logger.error(f"创建data_scada_turbine表失败: {e}")
-            logger.error(traceback.format_exc())
-            if conn:
-                conn.rollback()
-            return False
-        finally:
-            # 不释放连接,让调用者控制
-            pass
+    Returns:
+        Dict[str, Any]: 测试结果
+    """
+    result = {
+        'success': False,
+        'error': None,
+        'response_time': None,
+        'server_info': None,
+        'timestamp': dt.now().isoformat()
+    }
     
-    def _clean_and_convert_simple(self, df: pd.DataFrame, data_time_column: str = None) -> pd.DataFrame:
-        """简化版数据清理,避免复杂类型转换"""
-        try:
-            cleaned_df = df.copy()
-            
-            # 1. 确保必需字段存在
-            required_fields = ['id_farm', 'name_farm', 'no_model_turbine', 'id_turbine']
-            for field in required_fields:
-                if field not in cleaned_df.columns:
-                    cleaned_df[field] = None
-            
-            # 2. 处理时间字段 - 简化处理
-            if 'data_time' not in cleaned_df.columns:
-                if data_time_column and data_time_column in cleaned_df.columns:
-                    cleaned_df['data_time'] = cleaned_df[data_time_column]
-                else:
-                    # 查找时间字段
-                    for col in cleaned_df.columns:
-                        col_lower = col.lower()
-                        if any(keyword in col_lower for keyword in ['time', 'date', 'timestamp']):
-                            cleaned_df['data_time'] = cleaned_df[col]
-                            logger.info(f"使用字段 '{col}' 作为 data_time")
-                            break
-            
-            # 3. 确保data_time是datetime类型
-            if 'data_time' in cleaned_df.columns:
-                try:
-                    cleaned_df['data_time'] = pd.to_datetime(cleaned_df['data_time'], errors='coerce')
-                except:
-                    logger.warning("data_time字段转换失败,保持原样")
-            
-            # 4. 简单处理NaN
-            cleaned_df = cleaned_df.replace({np.nan: None, pd.NaT: None})
-            
-            # 5. 计算数据哈希(简化版)
-            def simple_hash(row):
-                try:
-                    # 只使用数据字段,排除元数据字段
-                    data_fields = [col for col in cleaned_df.columns 
-                                  if col not in ['id_farm', 'name_farm', 'no_model_turbine', 
-                                               'id_turbine', 'data_time', 'data_hash']]
-                    
-                    hash_str = ''
-                    for field in sorted(data_fields):
-                        val = row[field]
-                        if val is not None:
-                            if isinstance(val, (dt, pd.Timestamp)):
-                                hash_str += f"{field}:{val.isoformat()}|"
-                            else:
-                                hash_str += f"{field}:{str(val)}|"
-                    
-                    return hashlib.md5(hash_str.encode('utf-8')).hexdigest() if hash_str else None
-                except:
-                    return None
-            
-            cleaned_df['data_hash'] = cleaned_df.apply(simple_hash, axis=1)
-            
-            logger.info(f"数据清理完成,原始形状: {df.shape}, 清理后形状: {cleaned_df.shape}")
-            return cleaned_df
-            
-        except Exception as e:
-            logger.error(f"数据清理失败: {e}")
-            logger.error(traceback.format_exc())
-            return df
+    conn = None
+    cursor = None
     
-    def _prepare_upsert_sql(self, table_name: str, columns: List[str]) -> str:
-        """准备UPSERT SQL语句"""
-        # 排除不需要更新的字段
-        exclude_columns = ['id_farm', 'id_turbine', 'data_time', 'id', 
-                          'create_time', 'update_time', 'data_hash']
-        update_columns = [col for col in columns if col not in exclude_columns]
+    try:
+        # 记录开始时间
+        start_time = time.time()
         
-        # 构建列名和占位符
-        column_names = ', '.join([f'`{col}`' for col in columns])
-        placeholders = ', '.join(['%s'] * len(columns))
+        # 建立连接
+        conn = pymysql.connect(
+            host=config.host,
+            port=config.port,
+            user=config.user,
+            password=config.password,
+            database=config.database,
+            charset=config.charset,
+            cursorclass=cursors.DictCursor,
+            connect_timeout=10,
+            read_timeout=10
+        )
         
-        # 构建UPDATE部分
-        update_clauses = []
-        for col in update_columns:
-            update_clauses.append(f"`{col}` = VALUES(`{col}`)")
+        # 执行测试查询
+        cursor = conn.cursor()
+        cursor.execute(test_query)
+        query_result = cursor.fetchone()
         
-        update_clause = ', '.join(update_clauses)
+        # 获取服务器信息
+        cursor.execute("SELECT VERSION() as version, DATABASE() as database_name, "
+                      "USER() as user, NOW() as server_time")
+        server_info = cursor.fetchone()
         
-        # 构建完整UPSERT SQL
-        upsert_sql = f"""
-        INSERT INTO `{table_name}` ({column_names}) 
-        VALUES ({placeholders}) 
-        ON DUPLICATE KEY UPDATE 
-        {update_clause}
-        """
+        # 计算响应时间
+        response_time = time.time() - start_time
         
-        logger.debug(f"UPSERT SQL生成完成,共 {len(columns)} 列")
-        return upsert_sql
-    
-    def _convert_to_numeric(self, value):
-        """
-        将值转换为数值类型(整数或浮点数)
-        如果无法转换或值为空,返回None
-        """
-        if pd.isna(value) or value is None:
-            return None
+        result.update({
+            'success': True,
+            'response_time': response_time,
+            'server_info': server_info,
+            'test_result': query_result
+        })
         
-        try:
-            # 如果已经是数值类型,直接返回
-            if isinstance(value, (int, float, np.integer, np.floating)):
-                # 将numpy类型转换为Python标准类型
-                if isinstance(value, np.integer):
-                    return int(value)
-                elif isinstance(value, np.floating):
-                    return float(value)
-                return value
-            
-            # 如果是布尔值,转换为0或1
-            if isinstance(value, (bool, np.bool_)):
-                return 1 if bool(value) else 0
-            
-            # 如果是字符串,尝试转换为数值
-            if isinstance(value, str):
-                # 去除空格
-                cleaned = value.strip()
-                
-                # 如果为空字符串,返回None
-                if cleaned == '':
-                    return None
-                
-                # 尝试直接转换为浮点数
-                try:
-                    # 处理常见的数值格式
-                    cleaned = cleaned.replace(',', '')  # 移除千位分隔符
-                    cleaned = cleaned.replace('%', '')  # 移除百分号
-                    cleaned = cleaned.replace(' ', '')  # 移除空格
-                    
-                    # 尝试转换为浮点数
-                    return float(cleaned)
-                except ValueError:
-                    # 如果浮点数转换失败,尝试整数
-                    try:
-                        return int(cleaned)
-                    except ValueError:
-                        return None
-            
-            # 如果是其他类型,先转换为字符串再尝试
-            try:
-                str_val = str(value)
-                cleaned = str_val.replace(',', '').replace('%', '').replace(' ', '')
-                return float(cleaned)
-            except:
-                return None
-                
-        except Exception as e:
-            # 记录转换失败,但返回None而不是抛出异常
-            return None
-    
-    def _convert_row_to_tuple(self, row: pd.Series, columns: List[str]) -> Tuple:
-        """
-        将单行数据转换为元组,处理特殊类型
-        除了指定字段外,其他字段都转换为数值类型(包括空值)
-        """
-        try:
-            row_values = []
-            
-            # 指定保持原类型的字段
-            keep_original_fields = [
-                'id', 'data_time', 'id_farm', 'id_turbine', 
-                'name_farm', 'no_model_turbine', 'create_time', 
-                'update_time', 'data_hash'
-            ]
-            
-            for col in columns:
-                value = row[col]
-                
-                # 处理特殊类型
-                if col == 'data_time':
-                    # 处理时间字段
-                    if pd.isna(value):
-                        row_values.append(None)
-                    elif isinstance(value, pd.Timestamp):
-                        # 转换为Python datetime
-                        row_values.append(value.to_pydatetime())
-                    elif isinstance(value, dt):
-                        row_values.append(value)
-                    else:
-                        # 尝试转换为datetime
-                        try:
-                            row_values.append(pd.to_datetime(value).to_pydatetime())
-                        except:
-                            row_values.append(None)
-                
-                elif col in ['id_farm', 'id_turbine', 'name_farm', 'no_model_turbine', 'data_hash']:
-                    # 这些字段保持字符串类型
-                    if pd.isna(value) or value is None:
-                        row_values.append(None)
-                    else:
-                        row_values.append(str(value))
-                
-                elif col in keep_original_fields:
-                    # 其他保持原样的字段(id, create_time, update_time)
-                    row_values.append(value)
-                
-                else:
-                    # 其他字段都尝试转换为数值类型
-                    numeric_value = self._convert_to_numeric(value)
-                    row_values.append(numeric_value)
-            
-            return tuple(row_values)
-            
-        except Exception as e:
-            logger.warning(f"转换行数据失败: {e}")
-            # 返回None元组
-            return tuple([None] * len(columns))
-    
-    def _escape_sql_value(self, val):
-        """转义SQL值,用于生成调试SQL"""
-        if val is None:
-            return "NULL"
-        elif isinstance(val, (dt, pd.Timestamp)):
-            # MySQL/TiDB日期时间格式
-            return f"'{val.strftime('%Y-%m-%d %H:%M:%S')}'"
-        elif isinstance(val, (int, float, bool)):
-            return str(val)
-        elif isinstance(val, str):
-            # 转义字符串中的单引号
-            escaped = val.replace("'", "''")
-            return f"'{escaped}'"
-        else:
-            # 其他类型转换为字符串并转义
-            str_val = str(val)
-            escaped = str_val.replace("'", "''")
-            return f"'{escaped}'"
-    
-    def _get_sql_with_values(self, sql_template: str, values: Tuple, columns: List[str]) -> str:
-        """生成包含实际值的SQL语句,用于调试(修复版本)"""
-        try:
-            # 将SQL模板拆分为INSERT部分和UPDATE部分
-            # 找到第一个VALUES后面的位置
-            values_index = sql_template.find("VALUES (")
-            if values_index == -1:
-                return f"{sql_template} -- 无法找到VALUES部分"
-            
-            # 找到INSERT部分的结束(ON DUPLICATE KEY UPDATE之前)
-            on_duplicate_index = sql_template.find("ON DUPLICATE KEY UPDATE")
-            if on_duplicate_index == -1:
-                return f"{sql_template} -- 无法找到ON DUPLICATE KEY UPDATE部分"
-            
-            # 获取INSERT部分
-            insert_part = sql_template[:on_duplicate_index]
-            
-            # 在INSERT部分中替换占位符
-            # 构建实际值的SQL字符串
-            values_sql = ', '.join([self._escape_sql_value(v) for v in values])
-            
-            # 替换INSERT部分的占位符
-            insert_placeholder = "VALUES (" + ", ".join(["%s"] * len(values)) + ")"
-            insert_with_values = insert_part.replace(insert_placeholder, f"VALUES ({values_sql})")
-            
-            # 组合完整的SQL
-            debug_sql = insert_with_values + sql_template[on_duplicate_index:]
-            
-            return debug_sql
-        except Exception as e:
-            logger.error(f"生成调试SQL失败: {e}")
-            return f"{sql_template} -- 生成调试SQL时出错: {e}"
-    
-    def _log_failed_row_details(self, row_index: int, row_tuple: Tuple, columns: List[str], 
-                               sql_template: str, error: Exception):
-        """记录失败行的详细信息"""
-        try:
-            # 1. 生成包含实际值的SQL语句
-            debug_sql = self._get_sql_with_values(sql_template, row_tuple, columns)
-            
-            # 2. 创建详细的错误信息
-            error_details = {
-                'row_index': row_index,
-                'error_message': str(error),
-                'error_type': type(error).__name__,
-                'sql_template': sql_template,
-                'debug_sql': debug_sql,
-                'columns_count': len(columns),
-                'values_count': len(row_tuple),
-                'timestamp': dt.now().isoformat()
-            }
-            
-            # 3. 添加所有列和值的对应关系
-            column_value_pairs = {}
-            for i, (col, val) in enumerate(zip(columns, row_tuple)):
-                column_value_pairs[col] = {
-                    'value': str(val) if val is not None else 'NULL',
-                    'python_type': type(val).__name__ if val is not None else 'NoneType',
-                    'index': i,
-                    'escaped_sql_value': self._escape_sql_value(val)
-                }
-            
-            error_details['column_values'] = column_value_pairs
-            
-            # 4. 记录数据类型统计
-            type_stats = {}
-            for val in row_tuple:
-                type_name = type(val).__name__ if val is not None else 'NoneType'
-                type_stats[type_name] = type_stats.get(type_name, 0) + 1
-            error_details['type_statistics'] = type_stats
-            
-            # 5. 记录NULL值统计
-            null_count = sum(1 for v in row_tuple if v is None)
-            error_details['null_values_count'] = null_count
-            
-            # 6. 记录异常堆栈
-            error_details['traceback'] = traceback.format_exc()
-            
-            # 7. 如果是MySQL错误,记录错误码
-            if isinstance(error, pymysql.Error):
-                error_details['mysql_error_code'] = error.args[0] if len(error.args) > 0 else 'Unknown'
-                error_details['mysql_error_message'] = error.args[1] if len(error.args) > 1 else 'Unknown'
-            
-            # 8. 格式化输出到日志
-            error_str = "\n" + "="*100 + "\n"
-            error_str += f"🚨 单条UPSERT失败详细报告(行 {row_index})\n"
-            error_str += "="*100 + "\n"
-            error_str += f"📅 时间: {error_details['timestamp']}\n"
-            error_str += f"🔧 错误类型: {error_details['error_type']}\n"
-            error_str += f"📝 错误信息: {error_details['error_message']}\n"
-            
-            if 'mysql_error_code' in error_details:
-                error_str += f"🔢 MySQL错误码: {error_details['mysql_error_code']}\n"
-                error_str += f"📋 MySQL错误消息: {error_details['mysql_error_message']}\n"
-            
-            error_str += f"📊 列数/值数: {error_details['columns_count']}/{error_details['values_count']}\n"
-            error_str += f"🔘 NULL值数量: {error_details['null_values_count']}\n"
-            error_str += f"📈 数据类型统计: {error_details['type_statistics']}\n\n"
-            
-            error_str += "📋 所有列数据详情:\n"
-            error_str += "-"*100 + "\n"
-            for col, info in error_details['column_values'].items():
-                error_str += f"  列[{info['index']:2d}] `{col}`:\n"
-                error_str += f"      值: {info['value']}\n"
-                error_str += f"      Python类型: {info['python_type']}\n"
-                error_str += f"      SQL转义值: {info['escaped_sql_value']}\n"
-            error_str += "\n"
-            
-            error_str += "📄 SQL模板:\n"
-            error_str += "-"*100 + "\n"
-            # 简化SQL模板显示
-            simple_sql = sql_template[:500] + ("..." if len(sql_template) > 500 else "")
-            error_str += simple_sql + "\n\n"
-            
-            error_str += "🔍 调试SQL(含实际值,可直接执行):\n"
-            error_str += "-"*100 + "\n"
-            # 如果SQL太长,截断显示
-            if len(debug_sql) > 2000:
-                error_str += debug_sql[:1000] + "\n... [SQL太长,中间省略] ...\n" + debug_sql[-1000:] + "\n"
-            else:
-                error_str += debug_sql + "\n"
-            error_str += "\n"
-            
-            error_str += "🔧 异常堆栈:\n"
-            error_str += "-"*100 + "\n"
-            error_str += error_details['traceback']
-            error_str += "\n" + "="*100
-            
-            logger.error(error_str)
-            
-            # 9. 将错误详情写入文件
-            try:
-                with open('failed_rows_log.json', 'a', encoding='utf-8') as f:
-                    json.dump(error_details, f, ensure_ascii=False, default=str, indent=2)
-                    f.write('\n,\n')
-                logger.info(f"📁 失败行详情已保存到文件: failed_rows_log.json")
-            except Exception as file_error:
-                logger.error(f"保存失败行详情到文件时出错: {file_error}")
-                
-        except Exception as log_error:
-            logger.error(f"记录失败行详情时出错: {log_error}")
-            logger.error(f"原始错误: {error}")
-            logger.error(f"行索引: {row_index}")
-            logger.error(f"SQL模板: {sql_template}")
-            logger.error(f"异常堆栈: {traceback.format_exc()}")
-    
-    def batch_upsert_data_direct(self, table_name: str, df: pd.DataFrame, 
-                                data_time_column: str = None,
-                                batch_size: int = 100, max_retries: int = 3) -> Tuple[int, int, int]:
-        """
-        直接使用pymysql批量UPSERT数据,使用连接池
+        logger.info(f"数据库连接测试成功: {config.host}:{config.port}/{config.database}")
+        logger.info(f"响应时间: {response_time:.3f}s")
+        logger.info(f"MySQL版本: {server_info['version']}")
+        logger.info(f"当前数据库: {server_info['database_name']}")
         
-        Args:
-            table_name: 表名
-            df: 要插入的DataFrame
-            data_time_column: 实际数据中的时间字段名
-            batch_size: 每批插入的行数(减小批处理大小,避免连接超时)
-            max_retries: 最大重试次数
-            
-        Returns:
-            (总行数, 插入行数, 更新行数)
-        """
-        if df.empty:
-            logger.warning("DataFrame为空,跳过插入")
-            return 0, 0, 0
-            
-        total_rows = len(df)
-        total_affected_rows = 0
-        total_failed_rows = 0
+    except Exception as e:
+        result['error'] = str(e)
+        logger.error(f"数据库连接测试失败: {e}")
         
-        try:
-            # 1. 简化数据清理
-            logger.info("开始简化数据清理...")
-            cleaned_df = self._clean_and_convert_simple(df, data_time_column)
-            
-            # 2. 确保必需字段存在
-            required_columns = ['id_farm', 'id_turbine', 'data_time']
-            for col in required_columns:
-                if col not in cleaned_df.columns:
-                    logger.error(f"必需字段 '{col}' 不存在于数据中")
-                    cleaned_df[col] = None  # 设为None而不是抛出异常
-            
-            # 3. 获取列名
-            columns = list(cleaned_df.columns)
-            logger.info(f"📊 数据列: {len(columns)} 列")
-            logger.info(f"📋 列名: {', '.join(columns[:10])}" + ("..." if len(columns) > 10 else ""))
-            
-            # 4. 准备UPSERT SQL
-            upsert_sql = self._prepare_upsert_sql(table_name, columns)
-            logger.info(f"📄 UPSERT SQL准备完成,共 {len(columns)} 列")
-            
-            # 5. 分批处理
-            total_batches = (total_rows + batch_size - 1) // batch_size
-            logger.info(f"🚀 准备处理 {total_rows} 行数据,分为 {total_batches} 个批次,批次大小: {batch_size}")
-            
-            for i in range(0, total_rows, batch_size):
-                batch_df = cleaned_df.iloc[i:i + batch_size]
-                batch_num = i // batch_size + 1
-                
-                retry_count = 0
-                batch_success = False
-                
-                while retry_count <= max_retries and not batch_success:
-                    conn = None
-                    cursor = None
-                    
-                    try:
-                        # 每次重试前检查连接并重新连接
-                        if not self.check_connection():
-                            logger.warning(f"🔌 连接已断开,正在重新连接...")
-                            self.reconnect()
-                        
-                        conn = self.get_connection()
-                        cursor = conn.cursor()
-                        
-                        # 转换为元组列表
-                        batch_values = []
-                        for _, row in batch_df.iterrows():
-                            row_tuple = self._convert_row_to_tuple(row, columns)
-                            batch_values.append(row_tuple)
-                        
-                        if batch_num == 1 and batch_values:
-                            logger.info(f"📋 第一批数据示例(第一行前5个值): {batch_values[0][:5]}")
-                            logger.info(f"📊 第一批数据类型: {[type(v).__name__ for v in batch_values[0][:5]]}")
-                        
-                        # 执行批量插入 - 关键:使用executemany
-                        affected = cursor.executemany(upsert_sql, batch_values)
-                        total_affected_rows += affected
-                        
-                        # 提交当前批次
-                        conn.commit()
-                        
-                        if batch_num % 10 == 0 or batch_num == total_batches:
-                            logger.info(f"✅ 批次 {batch_num}/{total_batches}: 处理 {len(batch_df)} 行, "
-                                      f"受影响 {affected} 行, 已提交到数据库")
-                        
-                        batch_success = True
-                        
-                    except (pymysql.Error, AttributeError) as e:
-                        retry_count += 1
-                        
-                        # 记录错误信息
-                        logger.error(f"❌ 批次 {batch_num} UPSERT失败,错误: {str(e)}")
-                        logger.error(f"🔧 错误类型: {type(e).__name__}")
-                        
-                        # 如果是连接相关错误,立即重新连接
-                        if isinstance(e, (pymysql.Error, AttributeError)) and any(keyword in str(e).lower() for keyword in ['socket', 'connection', 'timeout', 'none']):
-                            logger.warning(f"🔌 检测到连接错误,尝试重新连接...")
-                            try:
-                                if conn:
-                                    conn.rollback()
-                                self.reconnect()
-                            except:
-                                logger.error("重新连接失败")
-                        
-                        if retry_count > max_retries:
-                            logger.error(f"❌ 批次 {batch_num} UPSERT失败,已达到最大重试次数")
-                            
-                            # 记录批次失败信息
-                            batch_failed_info = {
-                                'batch_num': batch_num,
-                                'batch_size': len(batch_df),
-                                'total_retries': retry_count,
-                                'error': str(e),
-                                'error_type': type(e).__name__
-                            }
-                            
-                            if isinstance(e, pymysql.Error) and hasattr(e, 'args') and len(e.args) > 0:
-                                batch_failed_info['mysql_error_code'] = e.args[0]
-                                if len(e.args) > 1:
-                                    batch_failed_info['mysql_error_message'] = e.args[1]
-                            
-                            logger.error(f"📋 批次失败详情: {batch_failed_info}")
-                            
-                            # 尝试单条插入
-                            batch_affected = 0
-                            batch_failed_rows = 0
-                            
-                            for idx, (_, row) in enumerate(batch_df.iterrows()):
-                                row_retry_count = 0
-                                row_success = False
-                                
-                                while row_retry_count <= max_retries and not row_success:
-                                    single_conn = None
-                                    single_cursor = None
-                                    
-                                    try:
-                                        # 每次单条插入前检查连接
-                                        if not self.check_connection():
-                                            logger.warning(f"🔌 单条插入前连接已断开,正在重新连接...")
-                                            self.reconnect()
-                                        
-                                        single_conn = self.get_connection()
-                                        single_cursor = single_conn.cursor()
-                                        
-                                        row_tuple = self._convert_row_to_tuple(row, columns)
-                                        single_cursor.execute(upsert_sql, row_tuple)
-                                        batch_affected += single_cursor.rowcount
-                                        single_conn.commit()
-                                        row_success = True
-                                        
-                                    except Exception as single_e:
-                                        row_retry_count += 1
-                                        if row_retry_count > max_retries:
-                                            batch_failed_rows += 1
-                                            total_failed_rows += 1
-                                            # 使用改进的错误日志记录
-                                            try:
-                                                row_tuple = self._convert_row_to_tuple(row, columns)
-                                                self._log_failed_row_details(
-                                                    row_index=idx + (batch_num-1)*batch_size,
-                                                    row_tuple=row_tuple,
-                                                    columns=columns,
-                                                    sql_template=upsert_sql,
-                                                    error=single_e
-                                                )
-                                            except:
-                                                logger.error(f"无法记录失败行详情: {single_e}")
-                                            break
-                                        else:
-                                            wait_time = 1 * row_retry_count
-                                            logger.info(f"⏳ 等待 {wait_time} 秒后重试...")
-                                            time.sleep(wait_time)
-                                    finally:
-                                        if single_cursor:
-                                            single_cursor.close()
-                                        # 单条插入不释放连接,复用当前线程的连接
-                                        
-                            total_affected_rows += batch_affected
-                            
-                            if batch_affected > 0:
-                                logger.info(f"⚠️  批次 {batch_num} 单条处理完成,成功 {batch_affected} 行, 失败 {batch_failed_rows} 行")
-                                
-                                if batch_failed_rows > 0:
-                                    logger.warning(f"⚠️  批次 {batch_num} 有 {batch_failed_rows} 行数据插入失败")
-                                    logger.warning(f"📁 失败的数据已记录到日志中,请查看详细错误信息")
-                            
-                            # 跳出循环,处理下一个批次
-                            break
-                        else:
-                            logger.warning(f"⚠️  批次 {batch_num} UPSERT失败,第 {retry_count} 次重试")
-                            
-                            # 记录更多错误信息
-                            if isinstance(e, pymysql.Error) and hasattr(e, 'args') and len(e.args) > 0:
-                                error_code = e.args[0]
-                                error_msg = e.args[1] if len(e.args) > 1 else ''
-                                logger.warning(f"🔢 MySQL错误码: {error_code}")
-                                logger.warning(f"📋 MySQL错误信息: {error_msg}")
-                            
-                            # 如果是特定错误,提供解决方案提示
-                            if isinstance(e, pymysql.Error) and hasattr(e, 'args') and len(e.args) > 0:
-                                error_code = e.args[0]
-                                if error_code == 1062:
-                                    logger.warning(f"💡 提示: 错误码1062表示唯一键冲突,可能是重复数据")
-                                elif error_code == 1366:
-                                    logger.warning(f"💡 提示: 错误码1366表示数据类型不匹配,请检查数据格式")
-                                elif error_code == 2006:  # MySQL server has gone away
-                                    logger.warning(f"💡 提示: 错误码2006表示MySQL服务器连接已断开")
-                                elif error_code == 2013:  # Lost connection to MySQL server during query
-                                    logger.warning(f"💡 提示: 错误码2013表示查询期间失去连接")
-                            
-                            # 等待一段时间后重试
-                            wait_time = 2 * retry_count
-                            logger.info(f"⏳ 等待 {wait_time} 秒后重试...")
-                            time.sleep(wait_time)
-                    
-                    finally:
-                        if cursor:
-                            cursor.close()
-                        # 不释放连接,让当前线程保持连接复用
-                
-                # 批次处理完成后,暂停一小段时间,避免数据库压力过大
-                if batch_success and batch_num < total_batches:
-                    time.sleep(0.1)  # 100毫秒
-            
-            # 估算插入和更新行数
-            successful_rows = total_rows - total_failed_rows
-            estimated_inserted = successful_rows // 2
-            estimated_updated = successful_rows - estimated_inserted
-            
-            logger.info(f"🎉 UPSERT完成统计:")
-            logger.info(f"  总处理行数: {total_rows}")
-            logger.info(f"  总受影响行数: {total_affected_rows}")
-            logger.info(f"  失败行数: {total_failed_rows}")
-            logger.info(f"  成功行数: {successful_rows}")
-            logger.info(f"  估计插入行数: {estimated_inserted}")
-            logger.info(f"  估计更新行数: {estimated_updated}")
-            
-            if total_failed_rows > 0:
-                logger.warning(f"⚠️  警告: 有 {total_failed_rows} 行数据插入失败")
-                logger.warning(f"📁 请查看上面的详细错误日志,或检查文件 'failed_rows_log.json'")
-                logger.warning(f"💡 提示: 失败的数据可以手动修复后重新导入")
-            
-            return successful_rows, estimated_inserted, estimated_updated
-                
-        except Exception as e:
-            logger.error(f"❌ 批量UPSERT失败")
-            logger.error(f"🔧 错误类型: {type(e).__name__}")
-            logger.error(f"📝 错误信息: {str(e)}")
-            logger.error(f"🔧 错误详情: {traceback.format_exc()}")
-            raise
-        finally:
-            # 批量处理完成后释放连接
-            self.release_connection()
+    finally:
+        if cursor:
+            cursor.close()
+        if conn:
+            conn.close()
     
-    def upsert_parquet_data(self, file_info: ParquetFileInfo, table_name: str, 
-                           batch_size: int = 100, max_retries: int = 3) -> Tuple[int, int, int]:
-        """
-        UPSERT单个parquet文件数据到数据库,使用三字段唯一键
-        每个批次(batch_size)独立提交到数据库
-        
-        Args:
-            file_info: 文件信息(包含识别到的时间字段名)
-            table_name: 表名
-            batch_size: 批处理大小(每个事务的行数,减小以避免连接超时)
-            max_retries: 最大重试次数
-            
-        Returns:
-            (总行数, 插入行数, 更新行数)
-        """
-        try:
-            logger.info(f"📂 正在读取并处理文件: {file_info.file_path}")
-            logger.info(f"⏰ 识别到的时间字段: {file_info.data_time_column}")
-            
-            # 1. 读取parquet文件
-            df = pd.read_parquet(file_info.file_path, engine='pyarrow')
-            
-            # 2. 添加元数据字段
-            df['id_farm'] = file_info.farm_id
-            df['name_farm'] = file_info.farm_name
-            df['no_model_turbine'] = file_info.model_type
-            df['id_turbine'] = file_info.turbine_id
-            
-            logger.info(f"📊 文件 {file_info.turbine_id}.parquet 读取完成,形状: {df.shape}")
-            logger.info(f"📋 数据列: {list(df.columns)[:10]}" + ("..." if len(df.columns) > 10 else ""))
-            
-            # 3. 批量UPSERT到数据库,按batch_size独立提交
-            total_rows, inserted_rows, updated_rows = self.batch_upsert_data_direct(
-                table_name=table_name,
-                df=df,
-                data_time_column=file_info.data_time_column,
-                batch_size=batch_size,
-                max_retries=max_retries
-            )
-            
-            return total_rows, inserted_rows, updated_rows
-            
-        except Exception as e:
-            logger.error(f"❌ 处理文件 {file_info.file_path} 失败")
-            logger.error(f"🔧 错误类型: {type(e).__name__}")
-            logger.error(f"📝 错误信息: {str(e)}")
-            logger.error(f"🔧 错误详情: {traceback.format_exc()}")
-            raise
+    return result
+
+
+# 添加一个监控守护进程示例
+class DatabaseMonitorDaemon:
+    """数据库监控守护进程"""
     
-    def check_table_exists(self, table_name: str) -> bool:
-        """检查表是否存在"""
-        conn = None
-        cursor = None
+    def __init__(self, db_manager: DatabaseManager, check_interval: int = 5):
+        self.db_manager = db_manager
+        self.check_interval = check_interval
+        self.monitor_thread = None
+        self.running = False
         
-        try:
-            conn = self.get_connection()
-            cursor = conn.cursor()
-            
-            try:
-                cursor.execute("SHOW TABLES LIKE %s", (table_name,))
-                result = cursor.fetchone()
-                exists = result is not None
-                logger.info(f"🔍 检查表 '{table_name}' 存在: {exists}")
-                return exists
-            finally:
-                if cursor:
-                    try:
-                        cursor.close()
-                    except:
-                        pass
-                
-        except Exception as e:
-            logger.error(f"❌ 检查表存在失败: {e}")
-            return False
-        finally:
-            # 查询完成后释放连接
-            self.release_connection()
+    def start(self):
+        """启动监控"""
+        if not self.running:
+            self.running = True
+            self.monitor_thread = Thread(target=self._monitor_loop, daemon=True)
+            self.monitor_thread.start()
+            logger.info(f"数据库监控守护进程已启动,检查间隔: {self.check_interval}秒")
     
-    def get_table_row_count(self, table_name: str) -> int:
-        """获取表的行数"""
-        conn = None
-        cursor = None
-        
-        try:
-            conn = self.get_connection()
-            cursor = conn.cursor()
-            
-            try:
-                cursor.execute(f"SELECT COUNT(*) FROM `{table_name}`")
-                result = cursor.fetchone()
-                count = result[0] if result else 0
-                logger.info(f"📊 表 '{table_name}' 行数: {count:,}")
-                return count
-            finally:
-                if cursor:
-                    try:
-                        cursor.close()
-                    except:
-                        pass
-                
-        except Exception as e:
-            logger.error(f"❌ 获取表行数失败: {e}")
-            return 0
-        finally:
-            # 查询完成后释放连接
-            self.release_connection()
+    def stop(self):
+        """停止监控"""
+        self.running = False
+        if self.monitor_thread:
+            self.monitor_thread.join(timeout=10)
+            logger.info("数据库监控守护进程已停止")
     
-    def get_table_stats(self, table_name: str) -> Dict[str, Any]:
-        """获取表统计信息"""
-        conn = None
-        cursor = None
-        
-        try:
-            conn = self.get_connection()
-            cursor = conn.cursor()
-            
+    def _monitor_loop(self):
+        """监控循环"""
+        while self.running:
             try:
-                cursor.execute(f"""
-                    SELECT 
-                        COUNT(*) as total_rows,
-                        COUNT(DISTINCT id_farm) as farm_count,
-                        COUNT(DISTINCT id_turbine) as turbine_count,
-                        MIN(data_time) as first_data_time,
-                        MAX(data_time) as last_data_time,
-                        MIN(create_time) as first_create,
-                        MAX(update_time) as last_update
-                    FROM `{table_name}`
-                """)
+                # 获取当前状态
+                status = self.db_manager.get_connection_status()
                 
-                result = cursor.fetchone()
-                if result:
-                    stats = {
-                        'total_rows': result[0],
-                        'farm_count': result[1],
-                        'turbine_count': result[2],
-                        'first_data_time': result[3],
-                        'last_data_time': result[4],
-                        'first_create': result[5],
-                        'last_update': result[6]
-                    }
+                # 记录状态变化
+                if status['status'] != "HEALTHY":
+                    logger.warning(f"数据库连接状态异常: {status['status']}")
                     
-                    logger.info(f"📈 表 '{table_name}' 统计信息:")
-                    logger.info(f"   总行数: {stats['total_rows']:,}")
-                    logger.info(f"   风场数量: {stats['farm_count']}")
-                    logger.info(f"   风机数量: {stats['turbine_count']}")
-                    logger.info(f"   最早数据时间: {stats['first_data_time']}")
-                    logger.info(f"   最新数据时间: {stats['last_data_time']}")
-                    
-                    return stats
-                else:
-                    return {}
-            finally:
-                if cursor:
-                    try:
-                        cursor.close()
-                    except:
-                        pass
+                    # 如果连续错误超过阈值,尝试自动修复
+                    if status['error_count'] >= 3:
+                        logger.info("检测到连续错误,尝试自动修复连接...")
+                        self.db_manager._auto_reconnect()
                 
-        except Exception as e:
-            logger.error(f"❌ 获取表统计失败: {e}")
-            return {}
-        finally:
-            # 查询完成后释放连接
-            self.release_connection()
-    
-    def check_duplicate_keys(self, table_name: str) -> List[Dict]:
-        """检查重复的唯一键记录"""
-        conn = None
-        cursor = None
-        
-        try:
-            conn = self.get_connection()
-            cursor = conn.cursor()
-            
-            try:
-                cursor.execute(f"""
-                    SELECT 
-                        id_farm, 
-                        id_turbine, 
-                        data_time,
-                        COUNT(*) as duplicate_count,
-                        MIN(id) as min_id,
-                        MAX(id) as max_id
-                    FROM `{table_name}`
-                    GROUP BY id_farm, id_turbine, data_time
-                    HAVING COUNT(*) > 1
-                    ORDER BY duplicate_count DESC
-                    LIMIT 10
-                """)
+                # 等待下一次检查
+                time.sleep(self.check_interval)
                 
-                duplicates = []
-                for row in cursor.fetchall():
-                    duplicate_info = {
-                        'id_farm': row[0],
-                        'id_turbine': row[1],
-                        'data_time': row[2],
-                        'duplicate_count': row[3],
-                        'min_id': row[4],
-                        'max_id': row[5]
-                    }
-                    duplicates.append(duplicate_info)
-                
-                if duplicates:
-                    logger.warning(f"⚠️  发现重复的唯一键记录: {len(duplicates)} 组")
-                    for dup in duplicates[:3]:  # 只显示前3个
-                        logger.warning(f"   重复: 风场={dup['id_farm']}, 风机={dup['id_turbine']}, "
-                                     f"时间={dup['data_time']}, 重复次数={dup['duplicate_count']}")
-                else:
-                    logger.info(f"✅ 无重复的唯一键记录")
-                
-                return duplicates
-            finally:
-                if cursor:
-                    try:
-                        cursor.close()
-                    except:
-                        pass
-                
-        except Exception as e:
-            logger.error(f"❌ 检查重复键失败: {e}")
-            return []
-        finally:
-            # 查询完成后释放连接
-            self.release_connection()
+            except Exception as e:
+                logger.error(f"监控循环异常: {e}")
+                time.sleep(self.check_interval)

+ 0 - 0
dataStorage_datang/info_model_turbine.py → dataStorage_datang/info_model_turbine_v1.py


+ 1098 - 0
dataStorage_datang/info_model_turbine_v2.py

@@ -0,0 +1,1098 @@
+import pymysql
+import threading
+from typing import List, Dict, Any, Optional, Tuple
+import logging
+from datetime import datetime
+from collections import Counter
+import statistics
+import math
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class ConnectionPool:
+    """MySQL数据库连接池"""
+    
+    def __init__(self, host, port, user, password, database, charset='utf8mb4', 
+                 max_connections=2, mem_quota=4 << 30):
+        self.host = host
+        self.port = port
+        self.user = user
+        self.password = password
+        self.database = database
+        self.charset = charset
+        self.max_connections = max_connections
+        self.mem_quota = mem_quota
+        
+        self._lock = threading.Lock()
+        self._connections = []
+        self._in_use = set()
+        
+    def _create_connection(self):
+        """创建新连接并设置内存配额"""
+        try:
+            conn = pymysql.connect(
+                host=self.host,
+                port=self.port,
+                user=self.user,
+                password=self.password,
+                database=self.database,
+                charset=self.charset,
+                cursorclass=pymysql.cursors.DictCursor
+            )
+            
+            # 设置会话内存配额
+            with conn.cursor() as cursor:
+                cursor.execute(f"SET SESSION tidb_mem_quota_query = {self.mem_quota}")
+                conn.commit()
+            
+            logger.debug(f"创建新数据库连接,设置内存配额为 {self.mem_quota}")
+            return conn
+        except Exception as e:
+            logger.error(f"创建数据库连接失败: {e}")
+            raise
+    
+    def get_connection(self):
+        """从连接池获取连接"""
+        with self._lock:
+            # 如果有空闲连接,返回一个
+            for conn in self._connections:
+                if conn not in self._in_use:
+                    self._in_use.add(conn)
+                    logger.debug(f"从连接池获取现有连接")
+                    return conn
+            
+            # 如果没有空闲连接但可以创建新连接
+            if len(self._connections) < self.max_connections:
+                conn = self._create_connection()
+                self._connections.append(conn)
+                self._in_use.add(conn)
+                logger.debug(f"创建新连接,当前连接数: {len(self._connections)}")
+                return conn
+            
+            # 连接池已满,等待
+            logger.warning("连接池已满,等待可用连接...")
+            # 这里简单实现为等待并重试
+            import time
+            time.sleep(1)
+            return self.get_connection()  # 递归重试
+    
+    def release_connection(self, conn):
+        """释放连接回连接池"""
+        with self._lock:
+            if conn in self._in_use:
+                self._in_use.remove(conn)
+                logger.debug(f"释放连接回连接池")
+    
+    def close_all(self):
+        """关闭所有连接"""
+        with self._lock:
+            for conn in self._connections:
+                try:
+                    conn.close()
+                except:
+                    pass
+            self._connections.clear()
+            self._in_use.clear()
+            logger.info("已关闭所有数据库连接")
+
+
+class DatabaseConfig:
+    """数据库配置类"""
+    def __init__(self, host='192.168.50.234', port=4000, user='root', 
+                 password='123456', database='wind_data', charset='utf8mb4',
+                 max_connections=2, mem_quota=4 << 30):
+        self.host = host
+        self.port = port
+        self.user = user
+        self.password = password
+        self.database = database
+        self.charset = charset
+        self.max_connections = max_connections
+        self.mem_quota = mem_quota
+
+
+class SCADADataProcessor:
+    """SCADA数据处理类,用于计算额定转速和传动比"""
+    
+    @staticmethod
+    def calculate_mode(values: List[float], decimal_places: int = 1) -> float:
+        """
+        计算众数(对连续数据使用四舍五入后统计)
+        
+        Args:
+            values: 数值列表
+            decimal_places: 保留的小数位数
+            
+        Returns:
+            众数值
+        """
+        if not values:
+            return 0.0
+        
+        # 对值进行四舍五入处理,减少噪声影响
+        rounded_values = [round(v, decimal_places) for v in values]
+        
+        # 使用Counter统计频率
+        counter = Counter(rounded_values)
+        
+        # 找到众数
+        if counter:
+            mode_value, count = counter.most_common(1)[0]
+            logger.debug(f"众数统计: 值={mode_value}, 频次={count}, 总数据点={len(values)}")
+            return mode_value
+        
+        return 0.0
+    
+    @staticmethod
+    def calculate_median(values: List[float]) -> float:
+        """
+        计算中位数
+        
+        Args:
+            values: 数值列表
+            
+        Returns:
+            中位数值
+        """
+        if not values:
+            return 0.0
+        
+        try:
+            return float(statistics.median(values))
+        except:
+            # 如果统计模块出错,使用排序方法
+            sorted_values = sorted(values)
+            n = len(sorted_values)
+            if n % 2 == 1:
+                return sorted_values[n // 2]
+            else:
+                return (sorted_values[n // 2 - 1] + sorted_values[n // 2]) / 2
+    
+    @staticmethod
+    def calculate_rated_speeds_and_ratio(data: List[Dict[str, Any]]) -> Tuple[float, float, float]:
+        """
+        计算额定叶轮转速、额定发电机转速和传动比
+        
+        Args:
+            data: SCADA数据列表,包含转子转速和发电机转速
+            
+        Returns:
+            Tuple[float, float, float]: (rated_rotor_spd, rated_gen_spd, transmission_ratio)
+        """
+        if not data:
+            return 0.0, 0.0, 0.0
+        
+        # 提取数据
+        rotor_speeds = []
+        gen_speeds = []
+        
+        for record in data:
+            rotor_spd = record.get('rotor_spd')
+            gen_spd = record.get('gen_spd')
+            
+            if rotor_spd is not None and gen_spd is not None:
+                try:
+                    rotor_speeds.append(float(rotor_spd))
+                    gen_speeds.append(float(gen_spd))
+                except (ValueError, TypeError):
+                    continue
+        
+        if not rotor_speeds or not gen_speeds:
+            logger.warning(f"数据不足: 转子转速点={len(rotor_speeds)}, 发电机转速点={len(gen_speeds)}")
+            return 0.0, 0.0, 0.0
+        
+        # 计算额定转速(使用众数方法)
+        rated_rotor_spd = SCADADataProcessor.calculate_mode(rotor_speeds, decimal_places=1)
+        rated_gen_spd = SCADADataProcessor.calculate_mode(gen_speeds, decimal_places=1)
+        
+        # 如果众数方法效果不好,尝试使用中位数
+        if rated_rotor_spd <= 0 or rated_gen_spd <= 0:
+            rated_rotor_spd = SCADADataProcessor.calculate_median(rotor_speeds)
+            rated_gen_spd = SCADADataProcessor.calculate_median(gen_speeds)
+        
+        # 计算传动比
+        if rated_rotor_spd > 0:
+            transmission_ratio = rated_gen_spd / rated_rotor_spd
+        else:
+            transmission_ratio = 0.0
+        
+        # 验证数据合理性
+        if rated_rotor_spd > 30:  # 叶轮转速通常小于30 rpm
+            logger.warning(f"叶轮转速异常高: {rated_rotor_spd} rpm,使用中位数重新计算")
+            rated_rotor_spd = SCADADataProcessor.calculate_median(rotor_speeds)
+            if rated_rotor_spd > 0:
+                transmission_ratio = rated_gen_spd / rated_rotor_spd
+        
+        if transmission_ratio > 200:  # 传动比通常小于200
+            logger.warning(f"传动比异常高: {transmission_ratio}")
+            transmission_ratio = 0.0
+        
+        logger.debug(f"计算结果: 转子转速={rated_rotor_spd:.2f} rpm, "
+                    f"发电机转速={rated_gen_spd:.2f} rpm, "
+                    f"传动比={transmission_ratio:.2f}")
+        
+        return rated_rotor_spd, rated_gen_spd, transmission_ratio
+    
+    @staticmethod
+    def detect_turbine_type(transmission_ratio: float, rated_gen_spd: float) -> str:
+        """
+        根据传动比和发电机额定转速判断风机类型
+        
+        Returns:
+            str: 风机类型 (直驱/双馈/半直驱/未知)
+        """
+        if transmission_ratio <= 1.2:
+            return "直驱"
+        elif 1.2 < transmission_ratio <= 30:
+            return "半直驱"
+        elif 30 < transmission_ratio <= 120:
+            return "双馈"
+        else:
+            # 根据发电机转速进一步判断
+            if rated_gen_spd < 50:  # 直驱发电机转速很低
+                return "直驱"
+            elif 1000 <= rated_gen_spd <= 2000:  # 双馈通常在同步转速附近
+                return "双馈"
+            elif rated_gen_spd > 2000:  # 半直驱转速较高
+                return "半直驱"
+            else:
+                return "未知"
+
+
+# SQL语句定义
+CREATE_MODEL_TURBINE_TABLE_SQL = """
+CREATE TABLE IF NOT EXISTS info_model_turbine (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    no_model VARCHAR(255) NOT NULL COMMENT '机型唯一标识',
+    model VARCHAR(100) COMMENT '机型',
+    manufacturer VARCHAR(100) COMMENT '制造商',
+    rated_capacity INT COMMENT '额定容量(kW)',
+    cut_in_wind_speed DECIMAL(5, 2) COMMENT '切入风速(m/s)',
+    cut_out_wind_speed DECIMAL(5, 2) COMMENT '切出风速(m/s)',
+    rotor_diameter INT COMMENT '叶轮直径(m)',
+    hub_height DECIMAL(10, 2) COMMENT '轮毂高度(m)',
+    turbine_count INT DEFAULT 0 COMMENT '该机型风机数量',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    UNIQUE KEY idx_no_model (no_model),
+    INDEX idx_model (model),
+    INDEX idx_manufacturer (manufacturer),
+    INDEX idx_rated_capacity (rated_capacity)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='风机机型信息表';
+"""
+
+ALTER_MODEL_TURBINE_TABLE_SQL = """
+ALTER TABLE info_model_turbine 
+ADD COLUMN IF NOT EXISTS rated_rotor_spd DECIMAL(10, 3) COMMENT '额定叶轮转速(rpm)',
+ADD COLUMN IF NOT EXISTS rated_gen_spd DECIMAL(10, 3) COMMENT '额定发电机转速(rpm)',
+ADD COLUMN IF NOT EXISTS transmission_ratio DECIMAL(10, 4) COMMENT '传动比',
+ADD COLUMN IF NOT EXISTS turbine_type VARCHAR(20) COMMENT '风机类型(直驱/双馈/半直驱)',
+ADD COLUMN IF NOT EXISTS calculation_time TIMESTAMP COMMENT '参数计算时间',
+ADD COLUMN IF NOT EXISTS data_points INT DEFAULT 0 COMMENT '用于计算的数据点数';
+"""
+
+SELECT_MODEL_DATA_SQL = """
+SELECT 
+    CONCAT(
+        IFNULL(model, ''), 
+        '-', 
+        IFNULL(cut_in_wind_speed, ''), 
+        '-', 
+        IFNULL(cut_out_wind_speed, ''),
+        '-', 
+        IFNULL(hub_height, '')
+    ) AS no_model,
+    model,
+    manufacturer,
+    rated_capacity,
+    cut_in_wind_speed,
+    cut_out_wind_speed,
+    rotor_diameter,
+    hub_height,
+    COUNT(*) AS turbine_count
+FROM info_turbine 
+WHERE model IS NOT NULL
+GROUP BY model, manufacturer, rated_capacity, cut_in_wind_speed, cut_out_wind_speed, rotor_diameter, hub_height
+ORDER BY model, manufacturer, rated_capacity;
+"""
+
+SELECT_NO_MODEL_LIST_SQL = """
+SELECT DISTINCT no_model FROM info_model_turbine ORDER BY no_model;
+"""
+
+SELECT_SCADA_FOR_NO_MODEL_SQL = """
+SELECT 
+    it.wind_farm_id,
+    it.turbine_id,
+    it.model,
+    icpt.rated_wind_speed,
+    imt.no_model,
+    imt.rated_capacity,
+    imt.cut_in_wind_speed,
+    imt.cut_out_wind_speed,
+    imt.rotor_diameter,
+    imt.hub_height,
+    dst.data_time,
+    dst.wind_spd,
+    dst.rotor_spd,
+    dst.gen_spd,
+    dst.p_active
+FROM info_turbine it,
+     info_curve_power_turbine icpt,
+     info_model_turbine imt,
+     data_scada_turbine dst  
+WHERE 1=1
+    AND it.wind_farm_id = dst.id_farm
+    AND it.turbine_id = dst.id_turbine
+    AND it.model = dst.no_model_turbine
+    AND it.model = imt.model
+    AND it.cut_in_wind_speed = imt.cut_in_wind_speed
+    AND it.cut_out_wind_speed = imt.cut_out_wind_speed
+    AND it.rotor_diameter = imt.rotor_diameter
+    AND it.hub_height = imt.hub_height 
+    AND it.wind_farm_id = icpt.wind_farm_id
+    AND it.model = icpt.standard_model
+    AND imt.no_model = %s
+    AND dst.wind_spd >= icpt.rated_wind_speed
+    AND dst.p_active >= imt.rated_capacity * 0.95 
+    AND dst.p_active <= imt.rated_capacity * 1.05
+    AND dst.rotor_spd IS NOT NULL 
+    AND dst.gen_spd IS NOT NULL
+    AND dst.rotor_spd > 0 
+    AND dst.gen_spd > 0
+ORDER BY dst.data_time;
+"""
+
+DROP_MODEL_TURBINE_TABLE_SQL = "DROP TABLE IF EXISTS info_model_turbine"
+
+CHECK_TABLE_EXISTS_SQL = """
+SELECT COUNT(*) as table_exists 
+FROM information_schema.tables 
+WHERE table_schema = %s AND table_name = 'info_model_turbine'
+"""
+
+CHECK_TABLE_COLUMNS_SQL = """
+SELECT COLUMN_NAME 
+FROM INFORMATION_SCHEMA.COLUMNS 
+WHERE TABLE_SCHEMA = %s AND TABLE_NAME = 'info_model_turbine'
+"""
+
+UPDATE_MODEL_PARAMETERS_SQL = """
+UPDATE info_model_turbine 
+SET rated_rotor_spd = %s,
+    rated_gen_spd = %s,
+    transmission_ratio = %s,
+    turbine_type = %s,
+    calculation_time = %s,
+    data_points = %s,
+    updated_at = CURRENT_TIMESTAMP
+WHERE no_model = %s
+"""
+
+
+class ModelTurbineManager:
+    """风机机型信息管理器"""
+    
+    def __init__(self, db_config: DatabaseConfig):
+        """
+        初始化管理器
+        
+        Args:
+            db_config: 数据库配置对象
+        """
+        self.db_config = db_config
+        self.connection_pool = None
+        self._initialize_connection_pool()
+    
+    def _initialize_connection_pool(self):
+        """初始化数据库连接池"""
+        self.connection_pool = ConnectionPool(
+            host=self.db_config.host,
+            port=self.db_config.port,
+            user=self.db_config.user,
+            password=self.db_config.password,
+            database=self.db_config.database,
+            charset=self.db_config.charset,
+            max_connections=self.db_config.max_connections,
+            mem_quota=self.db_config.mem_quota
+        )
+        logger.info(f"数据库连接池初始化完成,最大连接数: {self.db_config.max_connections}")
+    
+    def get_connection(self):
+        """从连接池获取连接"""
+        return self.connection_pool.get_connection()
+    
+    def release_connection(self, conn):
+        """释放连接回连接池"""
+        self.connection_pool.release_connection(conn)
+    
+    def execute_query(self, sql: str, params: Optional[tuple] = None) -> List[Dict[str, Any]]:
+        """
+        执行查询语句
+        
+        Args:
+            sql: SQL查询语句
+            params: SQL参数
+            
+        Returns:
+            查询结果列表
+        """
+        conn = self.get_connection()
+        if not conn:
+            raise Exception("无法从连接池获取数据库连接")
+        
+        try:
+            with conn.cursor() as cursor:
+                cursor.execute(sql, params)
+                result = cursor.fetchall()
+                logger.debug(f"查询执行成功,返回 {len(result)} 条记录")
+                return result
+        except Exception as e:
+            logger.error(f"查询执行失败: {e}")
+            logger.error(f"SQL: {sql}")
+            if params:
+                logger.error(f"参数: {params}")
+            raise
+        finally:
+            self.release_connection(conn)
+    
+    def execute_update(self, sql: str, params: Optional[tuple] = None) -> int:
+        """
+        执行更新语句(INSERT, UPDATE, DELETE)
+        
+        Args:
+            sql: SQL更新语句
+            params: SQL参数
+            
+        Returns:
+            影响的行数
+        """
+        conn = self.get_connection()
+        if not conn:
+            raise Exception("无法从连接池获取数据库连接")
+        
+        try:
+            with conn.cursor() as cursor:
+                affected_rows = cursor.execute(sql, params)
+                conn.commit()
+                logger.debug(f"更新执行成功,影响 {affected_rows} 行")
+                return affected_rows
+        except Exception as e:
+            conn.rollback()
+            logger.error(f"更新执行失败: {e}")
+            logger.error(f"SQL: {sql}")
+            if params:
+                logger.error(f"参数: {params}")
+            raise
+        finally:
+            self.release_connection(conn)
+    
+    def execute_batch_update(self, sql: str, params_list: List[tuple]) -> int:
+        """
+        批量执行更新语句
+        
+        Args:
+            sql: SQL更新语句
+            params_list: SQL参数列表
+            
+        Returns:
+            影响的行数
+        """
+        conn = self.get_connection()
+        if not conn:
+            raise Exception("无法从连接池获取数据库连接")
+        
+        try:
+            with conn.cursor() as cursor:
+                affected_rows = cursor.executemany(sql, params_list)
+                conn.commit()
+                logger.debug(f"批量更新执行成功,影响 {affected_rows} 行")
+                return affected_rows
+        except Exception as e:
+            conn.rollback()
+            logger.error(f"批量更新执行失败: {e}")
+            logger.error(f"SQL: {sql}")
+            raise
+        finally:
+            self.release_connection(conn)
+    
+    def check_table_exists(self, table_name: str = 'info_model_turbine') -> bool:
+        """
+        检查表是否存在
+        
+        Args:
+            table_name: 表名
+            
+        Returns:
+            bool: 表是否存在
+        """
+        try:
+            result = self.execute_query(CHECK_TABLE_EXISTS_SQL, (self.db_config.database,))
+            return result[0]['table_exists'] > 0
+        except Exception as e:
+            logger.error(f"检查表存在性失败: {e}")
+            return False
+    
+    def check_table_columns(self) -> List[str]:
+        """
+        检查表的列
+        
+        Returns:
+            List[str]: 列名列表
+        """
+        try:
+            result = self.execute_query(CHECK_TABLE_COLUMNS_SQL, (self.db_config.database,))
+            return [row['COLUMN_NAME'] for row in result]
+        except Exception as e:
+            logger.error(f"检查表列失败: {e}")
+            return []
+    
+    def add_missing_columns(self):
+        """添加缺失的列"""
+        try:
+            logger.info("检查并添加缺失的列...")
+            self.execute_update(ALTER_MODEL_TURBINE_TABLE_SQL)
+            logger.info("表结构更新完成")
+        except Exception as e:
+            logger.error(f"更新表结构失败: {e}")
+    
+    def create_model_turbine_table(self) -> bool:
+        """
+        创建风机机型信息表
+        
+        Returns:
+            bool: 是否成功创建表
+        """
+        try:
+            logger.info("开始创建风机机型信息表...")
+            self.execute_update(CREATE_MODEL_TURBINE_TABLE_SQL)
+            logger.info("风机机型信息表创建成功")
+            return True
+        except Exception as e:
+            logger.error(f"创建风机机型信息表失败: {e}")
+            return False
+    
+    def drop_model_turbine_table(self) -> bool:
+        """
+        删除风机机型信息表
+        
+        Returns:
+            bool: 是否成功删除表
+        """
+        try:
+            logger.info("开始删除风机机型信息表...")
+            self.execute_update(DROP_MODEL_TURBINE_TABLE_SQL)
+            logger.info("风机机型信息表删除成功")
+            return True
+        except Exception as e:
+            logger.error(f"删除风机机型信息表失败: {e}")
+            return False
+    
+    def get_model_data(self) -> List[Dict[str, Any]]:
+        """
+        从info_turbine表获取机型分组数据
+        
+        Returns:
+            机型数据列表
+        """
+        try:
+            logger.info("开始从info_turbine表查询机型数据...")
+            data = self.execute_query(SELECT_MODEL_DATA_SQL)
+            logger.info(f"成功查询到 {len(data)} 条机型记录")
+            return data
+        except Exception as e:
+            logger.error(f"查询机型数据失败: {e}")
+            return []
+    
+    def get_no_model_list(self) -> List[str]:
+        """
+        获取所有no_model列表
+        
+        Returns:
+            no_model列表
+        """
+        try:
+            logger.info("开始查询所有机型标识...")
+            result = self.execute_query(SELECT_NO_MODEL_LIST_SQL)
+            no_models = [row['no_model'] for row in result]
+            logger.info(f"成功获取 {len(no_models)} 个机型标识")
+            return no_models
+        except Exception as e:
+            logger.error(f"查询机型标识列表失败: {e}")
+            return []
+    
+    def get_scada_data_for_no_model(self, no_model: str) -> List[Dict[str, Any]]:
+        """
+        获取指定机型的SCADA数据用于参数计算
+        
+        Args:
+            no_model: 机型标识
+            
+        Returns:
+            SCADA数据列表
+        """
+        try:
+            logger.debug(f"开始查询机型 {no_model} 的SCADA数据...")
+            data = self.execute_query(SELECT_SCADA_FOR_NO_MODEL_SQL, (no_model,))
+            logger.debug(f"机型 {no_model} 查询到 {len(data)} 条SCADA记录")
+            return data
+        except Exception as e:
+            logger.error(f"查询机型 {no_model} 的SCADA数据失败: {e}")
+            return []
+    
+    def calculate_and_update_parameters_for_model(self, no_model: str) -> Dict[str, Any]:
+        """
+        计算并更新指定机型的额定参数
+        
+        Args:
+            no_model: 机型标识
+            
+        Returns:
+            计算结果的字典
+        """
+        try:
+            # 获取该机型的SCADA数据
+            scada_data = self.get_scada_data_for_no_model(no_model)
+            
+            if not scada_data:
+                logger.warning(f"机型 {no_model}: 没有可用于计算的SCADA数据")
+                return {
+                    "no_model": no_model,
+                    "success": False,
+                    "reason": "无SCADA数据",
+                    "data_points": 0
+                }
+            
+            # 计算额定参数
+            rated_rotor_spd, rated_gen_spd, transmission_ratio = \
+                SCADADataProcessor.calculate_rated_speeds_and_ratio(scada_data)
+            
+            if rated_rotor_spd <= 0 or transmission_ratio <= 0:
+                logger.warning(f"机型 {no_model}: 计算出的参数无效")
+                return {
+                    "no_model": no_model,
+                    "success": False,
+                    "reason": "参数无效",
+                    "rated_rotor_spd": rated_rotor_spd,
+                    "rated_gen_spd": rated_gen_spd,
+                    "transmission_ratio": transmission_ratio,
+                    "data_points": len(scada_data)
+                }
+            
+            # 判断风机类型
+            turbine_type = SCADADataProcessor.detect_turbine_type(transmission_ratio, rated_gen_spd)
+            
+            # 更新数据库
+            update_result = self.execute_update(
+                UPDATE_MODEL_PARAMETERS_SQL,
+                (
+                    rated_rotor_spd,
+                    rated_gen_spd,
+                    transmission_ratio,
+                    turbine_type,
+                    datetime.now(),
+                    len(scada_data),
+                    no_model
+                )
+            )
+            
+            if update_result > 0:
+                logger.info(f"机型 {no_model}: 成功更新参数 - "
+                           f"叶轮转速={rated_rotor_spd:.2f} rpm, "
+                           f"发电机转速={rated_gen_spd:.2f} rpm, "
+                           f"传动比={transmission_ratio:.2f}, "
+                           f"类型={turbine_type}, "
+                           f"数据点={len(scada_data)}")
+                
+                return {
+                    "no_model": no_model,
+                    "success": True,
+                    "rated_rotor_spd": rated_rotor_spd,
+                    "rated_gen_spd": rated_gen_spd,
+                    "transmission_ratio": transmission_ratio,
+                    "turbine_type": turbine_type,
+                    "data_points": len(scada_data)
+                }
+            else:
+                logger.warning(f"机型 {no_model}: 数据库更新失败")
+                return {
+                    "no_model": no_model,
+                    "success": False,
+                    "reason": "数据库更新失败",
+                    "rated_rotor_spd": rated_rotor_spd,
+                    "rated_gen_spd": rated_gen_spd,
+                    "transmission_ratio": transmission_ratio,
+                    "turbine_type": turbine_type,
+                    "data_points": len(scada_data)
+                }
+                
+        except Exception as e:
+            logger.error(f"计算机型 {no_model} 参数时出错: {e}")
+            return {
+                "no_model": no_model,
+                "success": False,
+                "reason": str(e),
+                "data_points": 0
+            }
+    
+    def calculate_and_update_all_parameters(self) -> Dict[str, Any]:
+        """
+        计算并更新所有机型的额定参数
+        
+        Returns:
+            计算统计信息
+        """
+        try:
+            logger.info("开始计算并更新所有机型的额定参数...")
+            
+            # 获取所有机型标识
+            no_model_list = self.get_no_model_list()
+            
+            if not no_model_list:
+                logger.warning("没有找到任何机型标识")
+                return {
+                    "total_models": 0,
+                    "success_count": 0,
+                    "failed_count": 0,
+                    "results": []
+                }
+            
+            logger.info(f"共发现 {len(no_model_list)} 个机型需要计算")
+            
+            results = []
+            success_count = 0
+            failed_count = 0
+            
+            # 遍历每个机型进行计算
+            for i, no_model in enumerate(no_model_list, 1):
+                logger.info(f"处理机型 {i}/{len(no_model_list)}: {no_model}")
+                
+                result = self.calculate_and_update_parameters_for_model(no_model)
+                results.append(result)
+                
+                if result.get("success"):
+                    success_count += 1
+                else:
+                    failed_count += 1
+            
+            # 汇总统计
+            stats = {
+                "total_models": len(no_model_list),
+                "success_count": success_count,
+                "failed_count": failed_count,
+                "success_rate": success_count / len(no_model_list) * 100 if no_model_list else 0,
+                "results": results
+            }
+            
+            logger.info(f"参数计算完成: 成功={success_count}, 失败={failed_count}, "
+                       f"成功率={stats['success_rate']:.1f}%")
+            
+            return stats
+            
+        except Exception as e:
+            logger.error(f"计算并更新所有参数失败: {e}")
+            raise
+    
+    def insert_model_data(self, model_data: List[Dict[str, Any]]) -> int:
+        """
+        将机型数据插入到info_model_turbine表
+        
+        Args:
+            model_data: 机型数据列表
+            
+        Returns:
+            int: 成功插入的记录数
+        """
+        if not model_data:
+            logger.warning("没有数据需要插入")
+            return 0
+        
+        insert_sql = """
+        INSERT INTO info_model_turbine 
+        (no_model, model, manufacturer, rated_capacity, cut_in_wind_speed, 
+         cut_out_wind_speed, rotor_diameter, hub_height, turbine_count)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
+        ON DUPLICATE KEY UPDATE
+        turbine_count = VALUES(turbine_count),
+        updated_at = CURRENT_TIMESTAMP
+        """
+        
+        try:
+            logger.info(f"开始向info_model_turbine表插入 {len(model_data)} 条记录...")
+            
+            # 准备插入数据
+            insert_data = []
+            for item in model_data:
+                insert_data.append((
+                    item['no_model'],
+                    item['model'],
+                    item['manufacturer'],
+                    item['rated_capacity'],
+                    item['cut_in_wind_speed'],
+                    item['cut_out_wind_speed'],
+                    item['rotor_diameter'],
+                    item['hub_height'],
+                    item.get('turbine_count', 1)  # 使用查询中的count值
+                ))
+            
+            # 批量插入数据
+            affected_rows = self.execute_batch_update(insert_sql, insert_data)
+            
+            logger.info(f"成功插入/更新 {affected_rows} 条记录到info_model_turbine表")
+            return affected_rows
+            
+        except Exception as e:
+            logger.error(f"插入机型数据失败: {e}")
+            raise
+    
+    def get_model_turbine_stats(self) -> Dict[str, Any]:
+        """
+        获取info_model_turbine表的统计信息
+        
+        Returns:
+            统计信息字典
+        """
+        try:
+            # 查询统计信息
+            stats_sql = """
+            SELECT 
+                COUNT(*) as total_models,
+                COUNT(DISTINCT manufacturer) as manufacturer_count,
+                COUNT(DISTINCT model) as model_count,
+                MIN(rated_capacity) as min_capacity,
+                MAX(rated_capacity) as max_capacity,
+                AVG(rated_capacity) as avg_capacity,
+                SUM(turbine_count) as total_turbines,
+                COUNT(CASE WHEN rated_rotor_spd IS NOT NULL AND rated_rotor_spd > 0 THEN 1 END) as calculated_models,
+                COUNT(DISTINCT turbine_type) as turbine_type_count,
+                SUM(data_points) as total_data_points
+            FROM info_model_turbine
+            """
+            
+            result = self.execute_query(stats_sql)
+            return result[0] if result else {}
+            
+        except Exception as e:
+            logger.error(f"获取统计信息失败: {e}")
+            return {}
+    
+    def get_turbine_type_distribution(self) -> List[Dict[str, Any]]:
+        """
+        获取风机类型分布
+        
+        Returns:
+            类型分布列表
+        """
+        try:
+            type_sql = """
+            SELECT 
+                IFNULL(turbine_type, '未知') as turbine_type,
+                COUNT(*) as model_count,
+                SUM(turbine_count) as turbine_count,
+                AVG(transmission_ratio) as avg_ratio,
+                AVG(rated_rotor_spd) as avg_rotor_spd,
+                AVG(rated_gen_spd) as avg_gen_spd
+            FROM info_model_turbine
+            WHERE turbine_type IS NOT NULL
+            GROUP BY turbine_type
+            ORDER BY model_count DESC
+            """
+            
+            return self.execute_query(type_sql)
+            
+        except Exception as e:
+            logger.error(f"获取风机类型分布失败: {e}")
+            return []
+    
+    def print_model_summary(self, model_data: List[Dict[str, Any]]):
+        """
+        打印机型数据摘要
+        
+        Args:
+            model_data: 机型数据列表
+        """
+        if not model_data:
+            logger.info("没有机型数据")
+            return
+        
+        print("\n" + "="*80)
+        print("风机机型数据摘要")
+        print("="*80)
+        print(f"总机型数: {len(model_data)}")
+        
+        # 按制造商统计
+        manufacturers = {}
+        for item in model_data:
+            manufacturer = item.get('manufacturer', '未知')
+            manufacturers[manufacturer] = manufacturers.get(manufacturer, 0) + 1
+        
+        print(f"制造商数: {len(manufacturers)}")
+        print("\n制造商分布:")
+        for manufacturer, count in sorted(manufacturers.items(), key=lambda x: x[1], reverse=True):
+            print(f"  {manufacturer}: {count} 种机型")
+        
+        # 按额定容量统计
+        capacities = {}
+        for item in model_data:
+            capacity = item.get('rated_capacity', 0)
+            if capacity:
+                capacity_range = f"{capacity}kW"
+                capacities[capacity_range] = capacities.get(capacity_range, 0) + 1
+        
+        print("\n额定容量分布:")
+        for capacity, count in sorted(capacities.items(), key=lambda x: int(x[0].replace('kW', ''))):
+            print(f"  {capacity}: {count} 种机型")
+        
+        print("="*80)
+    
+    def print_calculation_summary(self, stats: Dict[str, Any]):
+        """打印参数计算摘要"""
+        print("\n" + "="*80)
+        print("参数计算统计")
+        print("="*80)
+        print(f"总机型数: {stats.get('total_models', 0)}")
+        print(f"成功计算的机型数: {stats.get('success_count', 0)}")
+        print(f"失败的机型数: {stats.get('failed_count', 0)}")
+        print(f"成功率: {stats.get('success_rate', 0):.1f}%")
+        
+        # 显示成功计算的部分结果
+        success_results = [r for r in stats.get('results', []) if r.get('success')]
+        if success_results:
+            print(f"\n成功计算的机型示例 (前5个):")
+            for i, result in enumerate(success_results[:5]):
+                print(f"  {i+1}. {result['no_model']}:")
+                print(f"     叶轮转速: {result['rated_rotor_spd']:.2f} rpm")
+                print(f"     发电机转速: {result['rated_gen_spd']:.2f} rpm")
+                print(f"     传动比: {result['transmission_ratio']:.2f}")
+                print(f"     类型: {result['turbine_type']}")
+                print(f"     数据点数: {result['data_points']}")
+        
+        # 显示失败的部分原因
+        failed_results = [r for r in stats.get('results', []) if not r.get('success')]
+        if failed_results:
+            print(f"\n失败的机型示例 (前5个):")
+            for i, result in enumerate(failed_results[:5]):
+                print(f"  {i+1}. {result['no_model']}: {result.get('reason', '未知原因')}")
+        
+        print("="*80)
+    
+    def run_model_extraction_pipeline(self, recreate_table: bool = False, calculate_params: bool = True) -> bool:
+        """
+        运行完整的机型数据提取和参数计算流程
+        
+        Args:
+            recreate_table: 是否重新创建表
+            calculate_params: 是否计算额定参数
+            
+        Returns:
+            bool: 整个流程是否成功
+        """
+        try:
+            logger.info("开始执行风机机型数据提取和参数计算流程...")
+            
+            # 步骤1: 检查或创建表
+            if recreate_table:
+                self.drop_model_turbine_table()
+                self.create_model_turbine_table()
+            else:
+                if not self.check_table_exists():
+                    self.create_model_turbine_table()
+                else:
+                    logger.info("info_model_turbine表已存在,将追加数据")
+                    # 检查并添加缺失的列
+                    self.add_missing_columns()
+            
+            # 步骤2: 从info_turbine表获取机型数据
+            model_data = self.get_model_data()
+            if not model_data:
+                logger.error("未获取到机型数据,流程终止")
+                return False
+            
+            # 步骤3: 打印摘要信息
+            self.print_model_summary(model_data)
+            
+            # 步骤4: 插入数据到info_model_turbine表
+            inserted_count = self.insert_model_data(model_data)
+            
+            # 步骤5: 计算额定参数
+            if calculate_params:
+                calculation_stats = self.calculate_and_update_all_parameters()
+                self.print_calculation_summary(calculation_stats)
+            
+            # 步骤6: 获取并显示最终统计信息
+            stats = self.get_model_turbine_stats()
+            if stats:
+                print("\n数据库统计信息:")
+                print(f"  总机型数: {stats.get('total_models', 0)}")
+                print(f"  制造商数: {stats.get('manufacturer_count', 0)}")
+                print(f"  总风机数: {stats.get('total_turbines', 0)}")
+                print(f"  额定容量范围: {stats.get('min_capacity', 0)}kW - {stats.get('max_capacity', 0)}kW")
+                print(f"  平均额定容量: {round(stats.get('avg_capacity', 0), 1)}kW")
+                print(f"  已计算参数的机型数: {stats.get('calculated_models', 0)}")
+                print(f"  使用的总数据点数: {stats.get('total_data_points', 0)}")
+            
+            # 显示风机类型分布
+            type_dist = self.get_turbine_type_distribution()
+            if type_dist:
+                print("\n风机类型分布:")
+                for item in type_dist:
+                    print(f"  {item['turbine_type']}: {item['model_count']} 种机型, "
+                          f"{item['turbine_count']} 台风机")
+                    if item['avg_ratio']:
+                        print(f"    平均传动比: {item['avg_ratio']:.2f}, "
+                              f"平均叶轮转速: {item['avg_rotor_spd']:.2f} rpm, "
+                              f"平均发电机转速: {item['avg_gen_spd']:.2f} rpm")
+            
+            logger.info("风机机型数据提取和参数计算流程执行完成!")
+            return True
+            
+        except Exception as e:
+            logger.error(f"流程执行失败: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+        finally:
+            # 关闭连接池
+            if self.connection_pool:
+                self.connection_pool.close_all()
+
+
+def main():
+    """主函数"""
+    # 数据库配置
+    db_config = DatabaseConfig(
+        host="106.120.102.238",
+        port=44000,
+        user='root',
+        password='123456',
+        database='wind_data',
+        charset='utf8mb4',
+        max_connections=2,
+        mem_quota=4 << 30  # 4GB
+    )
+    
+    # 创建管理器实例
+    manager = ModelTurbineManager(db_config)
+    
+    # 运行机型数据提取和参数计算流程
+    # recreate_table=True 会删除并重新创建表
+    # calculate_params=True 会计算额定参数
+    success = manager.run_model_extraction_pipeline(
+        recreate_table=False,
+        calculate_params=True
+    )
+    
+    if success:
+        logger.info("风机机型数据提取和参数计算成功完成!")
+    else:
+        logger.error("风机机型数据提取和参数计算失败!")
+
+
+if __name__ == "__main__":
+    main()

+ 268 - 74
dataStorage_datang/main.py

@@ -1,11 +1,11 @@
 # 主程序
 
-
 import os
 import sys
 import time
+import json
 from datetime import datetime
-from typing import List
+from typing import List, Set, Dict, Any
 import logging
 
 # 添加项目根目录到Python路径
@@ -29,6 +29,75 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 
+
+class ProcessedRecordManager:
+    """已处理文件记录管理器"""
+    
+    def __init__(self, record_file: str = "record_processed.json"):
+        """
+        初始化记录管理器
+        
+        Args:
+            record_file: 记录文件路径
+        """
+        self.record_file = record_file
+        self.processed_files: Set[str] = set()
+        self._load_records()
+    
+    def _load_records(self):
+        """加载已处理文件记录"""
+        try:
+            if os.path.exists(self.record_file):
+                with open(self.record_file, 'r', encoding='utf-8') as f:
+                    records = json.load(f)
+                    if isinstance(records, list):
+                        self.processed_files = set(records)
+                    logger.info(f"📁 已加载 {len(self.processed_files)} 个已处理文件记录")
+            else:
+                logger.info(f"📁 记录文件不存在,将创建新文件: {self.record_file}")
+        except Exception as e:
+            logger.warning(f"❌ 加载记录文件失败: {e}")
+            self.processed_files = set()
+    
+    def is_processed(self, file_path: str) -> bool:
+        """检查文件是否已处理过"""
+        return file_path in self.processed_files
+    
+    def add_record(self, file_path: str):
+        """添加处理记录"""
+        if file_path not in self.processed_files:
+            self.processed_files.add(file_path)
+    
+    def save_records(self):
+        """保存记录到文件"""
+        try:
+            # 转换为列表并排序,便于阅读
+            records_list = sorted(list(self.processed_files))
+            
+            with open(self.record_file, 'w', encoding='utf-8') as f:
+                json.dump(records_list, f, ensure_ascii=False, indent=2)
+            
+            logger.info(f"💾 已保存 {len(records_list)} 个处理记录到 {self.record_file}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ 保存记录文件失败: {e}")
+            return False
+    
+    def get_record_count(self) -> int:
+        """获取记录数量"""
+        return len(self.processed_files)
+    
+    def clear_records(self):
+        """清空记录"""
+        self.processed_files.clear()
+        try:
+            if os.path.exists(self.record_file):
+                os.remove(self.record_file)
+                logger.info(f"🗑️  已删除记录文件: {self.record_file}")
+        except Exception as e:
+            logger.warning(f"删除记录文件失败: {e}")
+
+
 class ParquetProcessor:
     """Parquet文件处理器主类"""
     
@@ -38,6 +107,74 @@ class ParquetProcessor:
         self.file_infos: List[ParquetFileInfo] = []
         self.db_manager = DatabaseManager(config.db_config, config.table_config)
         
+        # 初始化记录管理器
+        self.record_manager = ProcessedRecordManager()
+        
+        # 统计信息
+        self.stats = {
+            'start_time': None,
+            'end_time': None,
+            'total_files': 0,
+            'processed_files': 0,
+            'skipped_files': 0,
+            'failed_files': 0,
+            'total_rows': 0,
+            'inserted_rows': 0,
+            'updated_rows': 0,
+            'table_initial_count': 0,
+            'table_final_count': 0
+        }
+    
+    def filter_unprocessed_files(self, file_infos: List[ParquetFileInfo]) -> List[ParquetFileInfo]:
+        """
+        过滤掉已处理过的文件
+        
+        Args:
+            file_infos: 所有扫描到的文件信息
+            
+        Returns:
+            List[ParquetFileInfo]: 未处理过的文件信息
+        """
+        unprocessed_files = []
+        skipped_count = 0
+        
+        for file_info in file_infos:
+            if self.record_manager.is_processed(file_info.file_path):
+                skipped_count += 1
+                logger.debug(f"⏭️  跳过已处理文件: {os.path.basename(file_info.file_path)}")
+            else:
+                unprocessed_files.append(file_info)
+        
+        if skipped_count > 0:
+            logger.info(f"⏭️  跳过 {skipped_count} 个已处理过的文件")
+            logger.info(f"📝 需要处理 {len(unprocessed_files)} 个新文件")
+        
+        return unprocessed_files
+    
+    def update_record_for_file(self, file_info: ParquetFileInfo, success: bool = True):
+        """
+        更新文件处理记录
+        
+        Args:
+            file_info: 文件信息
+            success: 是否处理成功
+        """
+        if success:
+            self.record_manager.add_record(file_info.file_path)
+            logger.info(f"✅ 已记录成功处理文件: {os.path.basename(file_info.file_path)}")
+        else:
+            logger.warning(f"❌ 文件处理失败,不记录: {os.path.basename(file_info.file_path)}")
+    
+    def save_processed_records(self):
+        """保存处理记录到文件"""
+        try:
+            if self.record_manager.save_records():
+                logger.info(f"💾 处理记录已保存,共 {self.record_manager.get_record_count()} 个文件")
+            else:
+                logger.warning("⚠️  处理记录保存失败")
+        except Exception as e:
+            logger.error(f"❌ 保存处理记录时出错: {e}")
+    
     def run(self):
         """运行整个处理流程"""
         logger.info("=" * 60)
@@ -45,17 +182,30 @@ class ParquetProcessor:
         logger.info("=" * 60)
         
         try:
+            # 记录开始时间
+            self.stats['start_time'] = datetime.now()
+            
             # 步骤1: 扫描文件
             logger.info("\n步骤1: 扫描Parquet文件...")
-            self.file_infos = self.file_scanner.scan_files()
+            all_file_infos = self.file_scanner.scan_files()
             
             # 打印扫描结果摘要
-            if self.file_infos:
-                logger.info(f"扫描完成!共找到 {len(self.file_infos)} 个parquet文件")
-                logger.info(f"机型型号种类: {len(set(f.model_type for f in self.file_infos))}")
-                logger.info(f"风场数量: {len(set(f.farm_id for f in self.file_infos))}")
+            if all_file_infos:
+                logger.info(f"📁 扫描完成!共找到 {len(all_file_infos)} 个parquet文件")
+                logger.info(f"📊 机型型号种类: {len(set(f.model_type for f in all_file_infos))}")
+                logger.info(f"🌾 风场数量: {len(set(f.farm_id for f in all_file_infos))}")
             else:
-                logger.warning("未找到任何parquet文件,程序退出")
+                logger.warning("⚠️  未找到任何parquet文件,程序退出")
+                return
+            
+            # 过滤已处理过的文件
+            self.file_infos = self.filter_unprocessed_files(all_file_infos)
+            self.stats['skipped_files'] = len(all_file_infos) - len(self.file_infos)
+            self.stats['total_files'] = len(all_file_infos)
+            
+            if not self.file_infos:
+                logger.info("🎉 所有文件都已处理过,无需处理新文件")
+                logger.info(f"📊 已处理文件总数: {self.record_manager.get_record_count()}")
                 return
             
             # 步骤2: 读取表头并识别时间字段
@@ -66,17 +216,17 @@ class ParquetProcessor:
             
             # 确定唯一键
             unique_keys = schema_reader.get_unique_key_columns(self.config.table_config.unique_keys)
-            logger.info(f"确定三字段唯一键: {unique_keys}")
+            logger.info(f"🔑 确定三字段唯一键: {unique_keys}")
             
             if not schema_reader.identified_time_column:
-                logger.error("未识别到时间字段,无法创建三字段唯一键!")
-                logger.error("请检查数据中是否包含时间字段,或配置正确的时间字段别名")
+                logger.error("未识别到时间字段,无法创建三字段唯一键!")
+                logger.error("💡 请检查数据中是否包含时间字段,或配置正确的时间字段别名")
                 return
             
             # 显示部分字段信息
             if all_columns:
-                logger.info(f"字段数量: {len(all_columns)}")
-                logger.info("前20个字段:")
+                logger.info(f"📋 字段数量: {len(all_columns)}")
+                logger.info("📊 前20个字段:")
                 for i, col in enumerate(sorted(list(all_columns))[:20]):
                     logger.info(f"  {i+1:2d}. {col}")
             
@@ -89,25 +239,27 @@ class ParquetProcessor:
                     unique_keys
                 )
                 if not success:
-                    logger.error("创建表失败,程序退出")
+                    logger.error("创建表失败,程序退出")
                     return
             else:
-                logger.info(f"表 {self.config.table_config.table_name} 已存在")
+                logger.info(f"表 {self.config.table_config.table_name} 已存在")
                 # 检查表结构
                 stats = self.db_manager.get_table_stats(self.config.table_config.table_name)
                 if stats:
-                    logger.info(f"当前表统计: {stats}")
+                    logger.info(f"📊 当前表统计: {stats}")
                 
                 # 检查重复键
                 duplicates = self.db_manager.check_duplicate_keys(self.config.table_config.table_name)
                 if duplicates:
-                    logger.warning(f"发现重复的唯一键记录: {len(duplicates)} 组")
+                    logger.warning(f"⚠️  发现重复的唯一键记录: {len(duplicates)} 组")
                     for dup in duplicates[:5]:  # 显示前5个重复
-                        logger.warning(f"  重复: {dup}")
+                        logger.warning(f"  🔄 重复: {dup}")
             
             # 检查表初始行数
-            initial_count = self.db_manager.get_table_row_count(self.config.table_config.table_name)
-            logger.info(f"表初始行数: {initial_count:,}")
+            self.stats['table_initial_count'] = self.db_manager.get_table_row_count(
+                self.config.table_config.table_name
+            )
+            logger.info(f"📊 表初始行数: {self.stats['table_initial_count']:,}")
             
             # 步骤4: 使用线程池加载数据
             logger.info(f"\n步骤4: 使用线程池加载数据({self.config.max_workers}个线程,批量大小: {self.config.batch_size},模式: {'UPSERT' if self.config.upsert_enabled else 'INSERT'})...")
@@ -121,7 +273,7 @@ class ParquetProcessor:
             thread_pool = ThreadPoolManager(max_workers=self.config.max_workers)
             
             start_time = datetime.now()
-            logger.info(f"开始时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
+            logger.info(f"开始时间: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
             
             # 处理文件
             results = thread_pool.process_with_data_loader(self.file_infos, data_loader)
@@ -129,73 +281,114 @@ class ParquetProcessor:
             end_time = datetime.now()
             elapsed = end_time - start_time
             
-            # 统计结果
-            success_results = [r for r in results if isinstance(r[1], tuple)]
-            failed_results = [r for r in results if r[1] is None or isinstance(r[1], Exception)]
+            # 统计结果并更新记录
+            success_results = []
+            failed_results = []
             
-            total_rows_processed = 0
-            total_rows_inserted = 0
-            total_rows_updated = 0
-            
-            for _, result in success_results:
+            for file_info, result in results:
                 if isinstance(result, tuple) and len(result) == 3:
+                    # 成功处理
+                    success_results.append((file_info, result))
+                    self.update_record_for_file(file_info, success=True)
+                    
+                    # 累加统计
                     total_rows, inserted_rows, updated_rows = result
-                    total_rows_processed += total_rows
-                    total_rows_inserted += inserted_rows
-                    total_rows_updated += updated_rows
-            
-            logger.info(f"\n{'='*60}")
-            logger.info("数据处理完成!")
-            logger.info(f"{'='*60}")
-            logger.info(f"总文件数: {len(self.file_infos)}")
-            logger.info(f"成功处理: {len(success_results)} 个文件")
-            logger.info(f"失败文件: {len(failed_results)} 个文件")
-            logger.info(f"总处理行数: {total_rows_processed:,}")
-            logger.info(f"  新插入行数: {total_rows_inserted:,}")
-            logger.info(f"  更新行数: {total_rows_updated:,}")
-            logger.info(f"总耗时: {elapsed}")
-            
-            if len(self.file_infos) > 0:
-                avg_time_per_file = elapsed.total_seconds() / len(self.file_infos)
-                logger.info(f"平均每个文件处理时间: {avg_time_per_file:.2f} 秒")
+                    self.stats['total_rows'] += total_rows
+                    self.stats['inserted_rows'] += inserted_rows
+                    self.stats['updated_rows'] += updated_rows
+                    self.stats['processed_files'] += 1
+                    
+                else:
+                    # 处理失败
+                    failed_results.append((file_info, result))
+                    self.update_record_for_file(file_info, success=False)
+                    self.stats['failed_files'] += 1
+            
+            # 保存处理记录
+            self.save_processed_records()
             
             # 检查最终行数和统计
-            final_count = self.db_manager.get_table_row_count(self.config.table_config.table_name)
-            logger.info(f"表最终行数: {final_count:,}")
-            logger.info(f"实际新增行数: {final_count - initial_count:,}")
-            
-            # 获取最新统计
-            final_stats = self.db_manager.get_table_stats(self.config.table_config.table_name)
-            if final_stats:
-                logger.info("\n表最终统计:")
-                for key, value in final_stats.items():
-                    logger.info(f"  {key}: {value}")
-            
-            # 检查重复键
-            final_duplicates = self.db_manager.check_duplicate_keys(self.config.table_config.table_name)
-            if final_duplicates:
-                logger.warning(f"\n最终重复的唯一键记录: {len(final_duplicates)} 组")
-                for dup in final_duplicates[:10]:  # 显示前10个重复
-                    logger.warning(f"  重复: {dup}")
-            else:
-                logger.info("\n✓ 无重复的唯一键记录")
+            self.stats['table_final_count'] = self.db_manager.get_table_row_count(
+                self.config.table_config.table_name
+            )
+            self.stats['end_time'] = datetime.now()
+            
+            # 打印最终统计
+            self.print_summary(elapsed)
             
             # 打印失败文件
             if failed_results:
-                logger.warning("\n失败文件列表:")
-                for file_info, error in failed_results:
-                    logger.warning(f"  {file_info.file_path}: {error}")
+                self.print_failed_files(failed_results)
             
         except KeyboardInterrupt:
-            logger.warning("\n用户中断程序执行")
+            logger.warning("\n⚠️  用户中断程序执行")
+            # 尝试保存已处理的记录
+            self.save_processed_records()
+            
         except Exception as e:
-            logger.error(f"\n程序执行出错: {e}")
+            logger.error(f"\n❌ 程序执行出错: {e}")
             import traceback
             logger.error(traceback.format_exc())
+            # 尝试保存已处理的记录
+            self.save_processed_records()
+            
         finally:
             # 清理
             self.db_manager.close()
-            logger.info("程序执行完成")
+            logger.info("🏁 程序执行完成")
+    
+    def print_summary(self, elapsed):
+        """打印处理摘要"""
+        logger.info(f"\n{'='*60}")
+        logger.info("🎉 数据处理完成!")
+        logger.info(f"{'='*60}")
+        logger.info(f"📁 总文件数: {self.stats['total_files']}")
+        logger.info(f"⏭️  跳过文件: {self.stats['skipped_files']} (已处理过)")
+        logger.info(f"✅ 成功处理: {self.stats['processed_files']} 个文件")
+        logger.info(f"❌ 失败文件: {self.stats['failed_files']} 个文件")
+        logger.info(f"📊 总处理行数: {self.stats['total_rows']:,}")
+        logger.info(f"  📥 新插入行数: {self.stats['inserted_rows']:,}")
+        logger.info(f"  🔄 更新行数: {self.stats['updated_rows']:,}")
+        logger.info(f"⏱️  总耗时: {elapsed}")
+        
+        if self.stats['processed_files'] > 0:
+            avg_time_per_file = elapsed.total_seconds() / self.stats['processed_files']
+            logger.info(f"📈 平均每个文件处理时间: {avg_time_per_file:.2f} 秒")
+        
+        logger.info(f"📊 表初始行数: {self.stats['table_initial_count']:,}")
+        logger.info(f"📊 表最终行数: {self.stats['table_final_count']:,}")
+        logger.info(f"📈 实际新增行数: {self.stats['table_final_count'] - self.stats['table_initial_count']:,}")
+        
+        # 获取最新统计
+        final_stats = self.db_manager.get_table_stats(self.config.table_config.table_name)
+        if final_stats:
+            logger.info("\n📈 表最终统计:")
+            for key, value in final_stats.items():
+                logger.info(f"  {key}: {value}")
+        
+        # 检查重复键
+        final_duplicates = self.db_manager.check_duplicate_keys(self.config.table_config.table_name)
+        if final_duplicates:
+            logger.warning(f"\n⚠️  最终重复的唯一键记录: {len(final_duplicates)} 组")
+            for dup in final_duplicates[:10]:  # 显示前10个重复
+                logger.warning(f"  🔄 重复: {dup}")
+        else:
+            logger.info("\n✅ 无重复的唯一键记录")
+        
+        # 记录文件统计
+        logger.info(f"\n💾 已记录处理文件总数: {self.record_manager.get_record_count()}")
+    
+    def print_failed_files(self, failed_results):
+        """打印失败文件列表"""
+        logger.warning(f"\n⚠️  失败文件列表 ({len(failed_results)} 个):")
+        for file_info, error in failed_results:
+            logger.warning(f"  📄 {os.path.basename(file_info.file_path)}")
+            if isinstance(error, Exception):
+                logger.warning(f"    错误类型: {type(error).__name__}")
+                logger.warning(f"    错误信息: {str(error)[:200]}" + ("..." if len(str(error)) > 200 else ""))
+            else:
+                logger.warning(f"    错误: {error}")
+
 
 def main():
     """主函数"""
@@ -227,13 +420,14 @@ def main():
     
     # 检查路径是否存在
     if not os.path.exists(config.base_path):
-        logger.error(f"错误: 路径不存在: {config.base_path}")
-        logger.error("请修改config.py中的base_path为正确的路径")
+        logger.error(f"错误: 路径不存在: {config.base_path}")
+        logger.error("💡 请修改config.py中的base_path为正确的路径")
         sys.exit(1)
     
     # 运行处理器
     processor = ParquetProcessor(config)
     processor.run()
 
+
 if __name__ == "__main__":
     main()

+ 137 - 0
dataStorage_datang/sql/Script-datang.sql

@@ -0,0 +1,137 @@
+select *
+from info_curve_power_turbine icpt 
+where 1=1
+and icpt.standard_model = 'CCWE1500-82.DF'
+-- and  icpt.standard_model ='CCWE1500-87.DF'
+-- and icpt.wind_speed >=0.5 and icpt.wind_speed <3.5
+and icpt.wind_speed >=18
+order by icpt.wind_farm_id,icpt.description ,icpt.standard_model ,icpt.wind_speed 
+;
+
+
+select model,manufacturer
+from info_model_turbine imt 
+group by model,manufacturer
+;
+
+select dst.no_model_turbine 
+from data_scada_turbine dst 
+group by dst.no_model_turbine 
+;
+
+select dst.id_turbine 
+from data_scada_turbine dst 
+group by dst.id_turbine 
+;
+
+select count(1)
+from data_scada_turbine dst 
+;
+
+
+select dst.id_farm,dst.name_farm,dst.id_turbine,dst.data_time,dst.*
+from data_scada_turbine dst 
+where 1=1
+and dst.id_farm = 'Twr2SPNt'
+;
+
+
+select dst.id_farm,dst.name_farm,dst.id_turbine,dst.data_time,dst.no_model_turbine,dst.wtg_sts,dst.scada_wtg_sts,dst.hyd_station_prs , dst.wind_spd ,dst.wind_dir,dst.yaw_ang,dst.yaw_to_wind_ang  ,dst.impeller_spd ,dst.rotor_spd,dst.pitch_ang_act_1 ,dst.pitch_ang_act_2 ,dst.pitch_ang_act_3,dst.pitch_motor_cur_1  ,dst.pitch_motor_cur_2 ,dst.pitch_motor_cur_3 ,dst.gearbox_spd_1 ,dst.gearbox_spd_2  , dst.gen_spd ,dst.p_active 
+from data_scada_turbine dst 
+where 1=1
+and dst.rotor_spd is not null
+and (
+dst.gearbox_spd_1 is not null 
+or 
+dst.gearbox_spd_2  is not null 
+)
+and dst.gen_spd is not null 
+and dst.p_active is not null 
+;
+
+
+
+select *
+from info_curve_power_turbine icpt 
+where 1=1
+and icpt.standard_model ='CCWE1500-82.DF'
+;
+
+
+select 
+it.wind_farm_id ,it.model , 
+imt.no_model ,
+imt.rated_capacity ,imt.cut_in_wind_speed,imt.cut_out_wind_speed,imt.rotor_diameter,imt.hub_height ,
+icpt.rated_wind_speed
+from info_turbine it ,info_model_turbine imt ,info_curve_power_turbine icpt 
+where 1=1
+and it.model =imt.model
+and it.cut_in_wind_speed =imt.cut_in_wind_speed
+and it.cut_out_wind_speed =imt.cut_out_wind_speed
+and it.rotor_diameter =imt.rotor_diameter
+and it.hub_height =imt.hub_height 
+and it.wind_farm_id =icpt.wind_farm_id
+and it.model =icpt.standard_model
+group by 
+it.wind_farm_id ,it.model , 
+imt.no_model ,
+imt.rated_capacity ,imt.cut_in_wind_speed,imt.cut_out_wind_speed,imt.rotor_diameter,imt.hub_height ,
+icpt.rated_wind_speed
+;
+
+SET SESSION tidb_mem_quota_query = 4 << 30; -- TiDB 对单条查询sql 增加最大内存
+--
+select 
+it.wind_farm_id,it.turbine_id ,it.model,
+icpt.rated_wind_speed,
+imt.rated_capacity ,imt.cut_in_wind_speed,imt.cut_out_wind_speed,imt.rotor_diameter,imt.hub_height ,
+dst.data_time,dst.wtg_sts,dst.scada_wtg_sts,dst.hyd_station_prs , dst.wind_spd ,dst.wind_dir,dst.yaw_ang,dst.yaw_to_wind_ang  ,dst.impeller_spd ,dst.rotor_spd,dst.pitch_ang_act_1 ,dst.pitch_ang_act_2 ,dst.pitch_ang_act_3,dst.pitch_motor_cur_1  ,dst.pitch_motor_cur_2 ,dst.pitch_motor_cur_3 ,dst.gearbox_spd_1 ,dst.gearbox_spd_2  , dst.gen_spd ,dst.p_active
+from info_turbine it ,info_curve_power_turbine icpt,info_model_turbine imt ,data_scada_turbine dst  
+where 1=1
+and it.wind_farm_id =dst.id_farm
+and it.turbine_id =dst.id_turbine
+and it.model =dst.no_model_turbine
+and it.model =imt.model
+and it.cut_in_wind_speed =imt.cut_in_wind_speed
+and it.cut_out_wind_speed =imt.cut_out_wind_speed
+and it.rotor_diameter =imt.rotor_diameter
+and it.hub_height =imt.hub_height 
+and it.wind_farm_id =icpt.wind_farm_id
+and it.model =icpt.standard_model
+and dst.rotor_spd is not null
+and (
+dst.gearbox_spd_1 is not null 
+or 
+dst.gearbox_spd_2  is not null 
+)
+and dst.gen_spd is not null 
+and dst.p_active is not null 
+and it.model ='CCWE1500-82.DF'
+;
+
+-- 推算 机型的 额定叶轮转速、额定发电机转速、 传动比,获取数据sql 
+select 
+it.wind_farm_id,it.turbine_id ,
+it.model,
+icpt.rated_wind_speed,
+imt.no_model,
+imt.rated_capacity ,imt.cut_in_wind_speed,imt.cut_out_wind_speed,imt.rotor_diameter,imt.hub_height ,
+dst.data_time,
+dst.wind_spd ,dst.rotor_spd,dst.gen_spd ,dst.p_active
+
+from info_turbine it ,info_curve_power_turbine icpt,info_model_turbine imt ,data_scada_turbine dst  
+where 1=1
+and it.wind_farm_id =dst.id_farm
+and it.turbine_id =dst.id_turbine
+and it.model =dst.no_model_turbine
+and it.model =imt.model
+and it.cut_in_wind_speed =imt.cut_in_wind_speed
+and it.cut_out_wind_speed =imt.cut_out_wind_speed
+and it.rotor_diameter =imt.rotor_diameter
+and it.hub_height =imt.hub_height 
+and it.wind_farm_id =icpt.wind_farm_id
+and it.model =icpt.standard_model
+
+and dst.wind_spd>=icpt.rated_wind_speed
+and (dst.p_active>=imt.rated_capacity*0.95 and dst.p_active<=imt.rated_capacity*1.05)
+;