train.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. """
  2. 训练入口: 按机型遍历数据,训练所有异常检测模型并保存。
  3. 训练数据筛选规则(与检测逻辑对称):
  4. - '运行' 数据: 全部纳入训练
  5. - '传感器异常' 数据: 该检测器关心的测点无传感器异常时纳入训练
  6. 例如: actual_torque 有异常,但 wind_spd/p_active 正常 → 可用于风速功率模型训练
  7. - '停机' / '限功率': 不参与训练
  8. 用法:
  9. python train.py # 训练所有机型
  10. python train.py --model 机型名称 # 只训练指定机型
  11. """
  12. import argparse
  13. from pathlib import Path
  14. import joblib
  15. import pandas as pd
  16. from config import MODEL_SAVE_DIR
  17. from data_loader import list_model_types, load_model_type
  18. from labeler import get_model_statistics, label_dataframe, DETECTOR_SENSOR_COLS
  19. from models.wind_power import PowerCurveDetector, ScatterDetector
  20. from models.yaw import StaticYawDetector, CableTwistDetector
  21. from models.pitch import PitchRegulationDetector, PitchCoordDetector, MinPitchDetector
  22. from models.control_params import PowerQualityDetector, OperationStateDetector
  23. def get_model_dir(model_name: str) -> Path:
  24. d = MODEL_SAVE_DIR / model_name
  25. d.mkdir(parents=True, exist_ok=True)
  26. return d
  27. def _filter_for_detector(labeled: pd.DataFrame, detector_name: str) -> pd.DataFrame:
  28. """
  29. 返回可用于该检测器训练的数据:
  30. - status == '运行'
  31. - status 以 '传感器异常' 开头,且该检测器关心的传感器异常列全为 False
  32. """
  33. sensor_cols = DETECTOR_SENSOR_COLS.get(detector_name, [])
  34. existing_sc = [c for c in sensor_cols if c in labeled.columns]
  35. # 运行数据
  36. df_run = labeled[labeled["status"] == "运行"]
  37. # 传感器异常数据中,该检测器关心的测点无异常的行
  38. df_sensor = labeled[labeled["status"].str.startswith("传感器异常")]
  39. if not df_sensor.empty and existing_sc:
  40. no_anom = ~df_sensor[existing_sc].any(axis=1)
  41. df_sensor = df_sensor[no_anom]
  42. elif not df_sensor.empty and not existing_sc:
  43. # 该检测器无关联传感器列(如偏航),传感器异常数据也全部纳入
  44. pass
  45. else:
  46. df_sensor = pd.DataFrame()
  47. result = pd.concat([df_run, df_sensor], ignore_index=True)
  48. print(f" [训练数据] {detector_name}: 运行{len(df_run)} + 传感器异常可用{len(df_sensor)}"
  49. f" = {len(result)} 行")
  50. return result
  51. def train_wind_power(labeled: pd.DataFrame, model_dir: Path):
  52. for cls, fname, det_name, label in [
  53. (PowerCurveDetector, "wind_power_curve.pkl", "wind_power_curve", "功率曲线"),
  54. (ScatterDetector, "wind_power_scatter.pkl", "wind_power_scatter", "散点"),
  55. ]:
  56. df = _filter_for_detector(labeled, det_name)
  57. if df.empty:
  58. print(f" [风速功率] {label}跳过(无可用数据)")
  59. continue
  60. try:
  61. cls().fit(df).save(model_dir / fname)
  62. print(f" [风速功率] {label}模型已保存")
  63. except Exception as e:
  64. print(f" [风速功率] {label}训练失败: {e}")
  65. def train_yaw(labeled: pd.DataFrame, model_dir: Path):
  66. configs = [
  67. ("yaw_ang", "yaw_static.pkl", "yaw_static", "静态偏航", StaticYawDetector),
  68. ("twist_ang", "yaw_twist.pkl", "yaw_twist", "扭缆", CableTwistDetector),
  69. ]
  70. for req_col, fname, det_name, label, cls in configs:
  71. if req_col not in labeled.columns:
  72. print(f" [偏航] {label}跳过(缺少 {req_col} 列)")
  73. continue
  74. df = _filter_for_detector(labeled, det_name)
  75. if df.empty:
  76. print(f" [偏航] {label}跳过(无可用数据)")
  77. continue
  78. try:
  79. cls().fit(df).save(model_dir / fname)
  80. print(f" [偏航] {label}模型已保存")
  81. except Exception as e:
  82. print(f" [偏航] {label}训练失败: {e}")
  83. def train_pitch(labeled: pd.DataFrame, model_dir: Path):
  84. # A. 调节异常 & C. 最小桨距角
  85. for cls, fname, det_name, label in [
  86. (PitchRegulationDetector, "pitch_regulation.pkl", "pitch_regulation", "调节"),
  87. (MinPitchDetector, "pitch_min.pkl", "pitch_min", "最小桨距角"),
  88. ]:
  89. if "pitch_ang_act_1" not in labeled.columns:
  90. print(f" [变桨] {label}跳过(缺少 pitch_ang_act_1)")
  91. continue
  92. df = _filter_for_detector(labeled, det_name)
  93. if df.empty:
  94. print(f" [变桨] {label}跳过(无可用数据)")
  95. continue
  96. try:
  97. cls().fit(df).save(model_dir / fname)
  98. print(f" [变桨] {label}模型已保存")
  99. except Exception as e:
  100. print(f" [变桨] {label}训练失败: {e}")
  101. # B. 协调异常
  102. required = ["pitch_ang_act_1", "rotor_spd", "p_active"]
  103. if not all(c in labeled.columns for c in required):
  104. print(f" [变桨] 协调跳过(缺少必要列)")
  105. return
  106. df2 = _filter_for_detector(labeled, "pitch_coord")
  107. if df2.empty:
  108. print(f" [变桨] 协调跳过(无可用数据)")
  109. return
  110. try:
  111. PitchCoordDetector().fit(df2).save(model_dir / "pitch_coord.pkl")
  112. print(f" [变桨] 协调模型已保存")
  113. except Exception as e:
  114. print(f" [变桨] 协调训练失败: {e}")
  115. def train_control_params(labeled: pd.DataFrame, model_dir: Path):
  116. # A. 功率质量检测器
  117. df = _filter_for_detector(labeled, "ctrl_power_quality")
  118. if not df.empty:
  119. try:
  120. PowerQualityDetector().fit(df).save(model_dir / "ctrl_power_quality.pkl")
  121. print(f" [运行状态] 功率质量模型已保存")
  122. except Exception as e:
  123. print(f" [运行状态] 功率质量训练失败: {e}")
  124. else:
  125. print(f" [运行状态] 功率质量跳过(无可用数据)")
  126. # B. 运行状态综合检测器
  127. df2 = _filter_for_detector(labeled, "ctrl_op_state")
  128. if not df2.empty:
  129. try:
  130. OperationStateDetector().fit(df2).save(model_dir / "ctrl_op_state.pkl")
  131. print(f" [运行状态] 综合运行状态模型已保存")
  132. except Exception as e:
  133. print(f" [运行状态] 综合运行状态训练失败: {e}")
  134. else:
  135. print(f" [运行状态] 综合运行状态跳过(无可用数据)")
  136. def train_one(model_name: str):
  137. print(f"\n{'='*50}")
  138. print(f"开始训练机型: {model_name}")
  139. model_dir = get_model_dir(model_name)
  140. # ── 统一加载一次全量数据(所有列超集) ──
  141. print(f" [数据] 加载全量数据...")
  142. _optional = [
  143. "wind_spd", "gen_spd", "actual_torque",
  144. "pitch_ang_set_1", "pitch_ang_set_2", "pitch_ang_set_3",
  145. "pitch_ang_act_1", "pitch_ang_act_2", "pitch_ang_act_3",
  146. "pitch_spd_1", "pitch_spd_2", "pitch_spd_3",
  147. "rotor_spd", "yaw_ang", "twist_ang",
  148. "theory_p_active", "p_reactive", "grid_freq",
  149. "grid_ia", "grid_ib", "grid_ic",
  150. "grid_ua", "grid_ub", "grid_uc",
  151. "ambient_temp",
  152. ]
  153. df_raw = load_model_type(model_name, required_cols=["p_active"], optional_cols=_optional)
  154. if df_raw.empty or "p_active" not in df_raw.columns:
  155. print(f" [数据] 跳过(无数据或缺少 p_active)")
  156. return
  157. stats = get_model_statistics(df_raw)
  158. labeled = label_dataframe(df_raw, stats, model_name)
  159. if labeled.empty:
  160. print(f" [数据] 打标后为空,跳过")
  161. return
  162. # 保存 stats 供推理时使用,避免推理时重新加载全量数据
  163. joblib.dump(stats, model_dir / "model_stats.pkl")
  164. print(f" [数据] model_stats.pkl 已保存")
  165. train_wind_power(labeled, model_dir)
  166. train_yaw(labeled, model_dir)
  167. train_pitch(labeled, model_dir)
  168. train_control_params(labeled, model_dir)
  169. print(f"机型 {model_name} 训练完成,模型保存至: {model_dir}")
  170. def main():
  171. parser = argparse.ArgumentParser(description="风机异常检测模型训练")
  172. parser.add_argument("--model", type=str, default=None, help="指定机型名称,不填则训练所有机型")
  173. parser.add_argument("--list", action="store_true", help="列出所有可用机型后退出")
  174. args = parser.parse_args()
  175. if args.list:
  176. model_types = list_model_types()
  177. print(f"发现 {len(model_types)} 个机型:")
  178. for i, mt in enumerate(model_types, 1):
  179. print(f" {i}. {mt}")
  180. return
  181. if args.model:
  182. train_one(args.model)
  183. else:
  184. model_types = list_model_types()
  185. print(f"发现 {len(model_types)} 个机型: {model_types}")
  186. for mt in model_types:
  187. train_one(mt)
  188. print("\n全部训练完成。")
  189. if __name__ == "__main__":
  190. main()