Healthtrain.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import os
  2. import sys
  3. import pandas as pd
  4. from typing import Optional
  5. from app.services.HealthDataFetcher import DataFetcher
  6. from app.services.HealthPretrain import WindFarmPretrainModel
  7. from app.logger import logger
  8. from app.config import dataBase
  9. from app.database import get_engine
  10. # 配置
  11. WIND_CODE = "WOF093400005" # 张崾先:WOF091200030 七台河:WOF046400029 诺木洪:WOF093400005
  12. START_DATE = "2023-12-01 00:00:00 "# 张崾先:2023-10-20 00:00:00~2024-10-20 00:00:00 七台河:2023-10-02 00:00:00~2024-10-02 00:00:00
  13. END_DATE = "2024-05-30 23:59:59" #诺木洪 2023-12-01 00:00:00 ~ 2024-05-30 23:50:00
  14. MODEL_DIR = "health_models"
  15. MIN_SAMPLES = 100 # 最小训练样本数
  16. def fetch_turbine_data(fetcher: DataFetcher, wind_code: str, turbine_code: str) -> Optional[pd.DataFrame]:
  17. """获取单个风机的完整训练数据"""
  18. try:
  19. # 获取所有可用列
  20. columns = fetcher.get_turbine_columns(wind_code)
  21. if not columns:
  22. logger.warning(f"{turbine_code} 无可用数据列")
  23. return None
  24. table = f"{wind_code}_minute"
  25. # 构建查询 - 使用参数化查询防止SQL注入
  26. query = f"""
  27. SELECT *
  28. FROM {table}
  29. WHERE `wind_turbine_number` = %s
  30. AND `time_stamp` BETWEEN %s AND %s
  31. """
  32. # 执行查询
  33. logger.info(f"正在获取风机 {turbine_code} 数据...")
  34. df = pd.read_sql(
  35. query,
  36. get_engine(dataBase.DATA_DB),
  37. params=(turbine_code, START_DATE, END_DATE)
  38. )
  39. if df.empty:
  40. logger.warning(f"{turbine_code} 无数据")
  41. return None
  42. print("数据项",df)
  43. logger.info(f"获取到 {turbine_code} 数据 {len(df)} 条")
  44. return df
  45. except Exception as e:
  46. logger.error(f"获取 {turbine_code} 数据失败: {str(e)}")
  47. return None
  48. def train_windfarm_model(
  49. fetcher: DataFetcher,
  50. wind_code: str,
  51. turbines: pd.DataFrame
  52. ) -> bool:
  53. """训练风场模型"""
  54. try:
  55. # 获取所有风机数据
  56. data_dict = {}
  57. valid_turbines = []
  58. for idx, turbine_info in turbines.iterrows():
  59. turbine_code = turbine_info['engine_code']
  60. data = fetch_turbine_data(fetcher, wind_code, turbine_code)
  61. if data is not None and len(data) >= MIN_SAMPLES:
  62. data_dict[turbine_code] = data
  63. valid_turbines.append(turbine_info)
  64. if not data_dict:
  65. logger.error("无有效风机数据,无法训练风场模型")
  66. return False
  67. # 确定主要机型(取出现次数最多的机型)
  68. mill_type_counts = {}
  69. for turbine_info in valid_turbines:
  70. mill_type_num = fetcher.get_mill_type(turbine_info['mill_type_code'])
  71. mill_type = {1: 'dfig', 2: 'direct', 3: 'semi_direct'}.get(mill_type_num, 'unknown')
  72. if mill_type != 'unknown':
  73. mill_type_counts[mill_type] = mill_type_counts.get(mill_type, 0) + 1
  74. if not mill_type_counts:
  75. logger.error("无法确定风场主要机型")
  76. return False
  77. main_mill_type = max(mill_type_counts.items(), key=lambda x: x[1])[0]
  78. # 训练模型
  79. logger.info(f"开始训练风场 {wind_code} ({main_mill_type})模型...")
  80. model = WindFarmPretrainModel(wind_code)
  81. model.train(data_dict, main_mill_type)
  82. # 保存模型
  83. model_dir = os.path.join(MODEL_DIR, wind_code)
  84. model.save(model_dir)
  85. logger.info(f"风场 {wind_code} 模型训练完成并保存")
  86. return True
  87. except Exception as e:
  88. logger.error(f"训练风场模型失败: {str(e)}", exc_info=True)
  89. return False
  90. def main():
  91. """主训练流程"""
  92. logger.info("=== 开始健康评估模型训练 ===")
  93. logger.info(f"风场: {WIND_CODE}")
  94. logger.info(f"时间范围: {START_DATE} 至 {END_DATE}")
  95. # 初始化数据获取器
  96. fetcher = DataFetcher()
  97. # 获取风场下所有风机
  98. logger.info("获取风机列表...")
  99. turbines = fetcher.get_turbines(WIND_CODE)
  100. if turbines.empty:
  101. logger.error("无风机数据,终止训练")
  102. return
  103. logger.info(f"共发现 {len(turbines)} 台风机")
  104. # 创建模型目录
  105. model_dir = os.path.join(MODEL_DIR, WIND_CODE)
  106. os.makedirs(model_dir, exist_ok=True)
  107. # 训练风场模型
  108. if train_windfarm_model(fetcher, WIND_CODE, turbines):
  109. logger.info(f"风场 {WIND_CODE} 模型训练成功")
  110. else:
  111. logger.error(f"风场 {WIND_CODE} 模型训练失败")
  112. if __name__ == "__main__":
  113. try:
  114. main()
  115. except Exception as e:
  116. logger.error(f"训练流程异常终止: {str(e)}", exc_info=True)
  117. sys.exit(1)