labeler.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. """
  2. 状态打标模块
  3. 功能:
  4. 1. get_model_statistics() - 按机型统计额定功率、基准桨距角等自适应阈值
  5. 2. label_dataframe() - 对 DataFrame 打标:运行 / 限功率 / 停机 / 传感器异常-xxx异常
  6. 3. DETECTOR_SENSOR_COLS - 每个检测器所依赖的测点,用于 detect.py 判断传感器异常
  7. 状态优先级(高→低): 传感器异常 > 停机 > 限功率 > 运行
  8. 传感器异常列命名规则:
  9. d_val_power 功率值越界
  10. d_val_wind 风速值越界
  11. d_val_pitch 变桨值越界
  12. d_val_spd 转速值越界
  13. d_val_torque 扭矩值越界
  14. d_logic_wind_pwr 风速-功率逻辑悖论
  15. d_logic_torque_pwr 转速-扭矩逻辑悖论
  16. """
  17. import pandas as pd
  18. import numpy as np
  19. from typing import Dict, Optional
  20. from config import (
  21. STATUS_POWER_UPPER_RATIO, STATUS_POWER_LOWER_RATIO,
  22. STATUS_SHUTDOWN_RATIO, STATUS_CURTAIL_LOW_RATIO,
  23. STATUS_CURTAIL_HIGH_RATIO, STATUS_CURTAIL_PITCH_OFFSET,
  24. STATUS_WIND_MAX, STATUS_WIND_MIN,
  25. STATUS_PITCH_MAX, STATUS_PITCH_MIN,
  26. STATUS_SPD_UPPER_RATIO, STATUS_TORQUE_UPPER_RATIO, STATUS_TORQUE_LOWER_ABS,
  27. STATUS_LOGIC_WIND_MIN, STATUS_LOGIC_POWER_MIN,
  28. STATUS_MODEL_SPECIAL_RULES,
  29. )
  30. # ── 每个检测器依赖的传感器异常列 ──────────────────────────────────────────────
  31. # detect.py 用此映射判断:传感器异常数据中,该检测器所需测点是否有异常
  32. # 若有 → 直接输出传感器异常标签;若无 → 正常做异常检测
  33. DETECTOR_SENSOR_COLS: Dict[str, list] = {
  34. "wind_power_curve": ["d_val_wind", "d_val_power", "d_logic_wind_pwr"],
  35. "wind_power_scatter":["d_val_wind", "d_val_power", "d_logic_wind_pwr"],
  36. "yaw_static": [], # yaw_ang 无对应传感器异常列,不直接跳过
  37. "yaw_twist": [],
  38. "pitch_regulation": ["d_val_pitch"],
  39. "pitch_coord": ["d_val_pitch", "d_val_spd", "d_val_power"],
  40. "pitch_min": ["d_val_pitch"],
  41. "ctrl_power_quality": ["d_val_power"],
  42. "ctrl_op_state": ["d_val_power", "d_val_spd", "d_val_pitch"],
  43. }
  44. # 传感器异常列 → 可读标签(用于输出结果)
  45. SENSOR_COL_LABEL: Dict[str, str] = {
  46. "d_val_power": "功率值异常",
  47. "d_val_wind": "风速值异常",
  48. "d_val_pitch": "变桨值异常",
  49. "d_val_spd": "转速值异常",
  50. "d_val_torque": "扭矩值异常",
  51. "d_logic_wind_pwr": "风速功率逻辑异常",
  52. "d_logic_torque_pwr": "转速扭矩逻辑异常",
  53. }
  54. def get_model_statistics(df: pd.DataFrame) -> dict:
  55. """计算机型自适应统计阈值(需传入机型全量数据)。"""
  56. stats: dict = {}
  57. stats["p_max_observed"] = df["p_active"].quantile(0.995)
  58. stats["torque_limit"] = (
  59. df["actual_torque"].quantile(0.999) if "actual_torque" in df.columns else None
  60. )
  61. stats["spd_limit"] = (
  62. df["gen_spd"].quantile(0.999) if "gen_spd" in df.columns else None
  63. )
  64. partial_mask = (
  65. (df["p_active"] > stats["p_max_observed"] * 0.2) &
  66. (df["p_active"] < stats["p_max_observed"] * 0.6)
  67. )
  68. if partial_mask.any() and "pitch_ang_act_1" in df.columns:
  69. stats["baseline_pitch"] = df.loc[partial_mask, "pitch_ang_act_1"].median()
  70. else:
  71. stats["baseline_pitch"] = 0.0
  72. print(
  73. f" [自适应统计] 额定: {stats['p_max_observed']:.1f}kW"
  74. f" | 基准桨距: {stats['baseline_pitch']:.2f}°"
  75. )
  76. return stats
  77. def label_dataframe(df_input: pd.DataFrame, stats: dict, model_name: str) -> pd.DataFrame:
  78. """
  79. 对 DataFrame 打标,返回含以下新列的 DataFrame:
  80. - d_val_* / d_logic_* : 各传感器异常布尔列
  81. - sensor_anomaly_tags : 逗号拼接的传感器异常标签字符串(无异常为空串)
  82. - status : 运行 / 限功率 / 停机 / 传感器异常-xxx异常
  83. """
  84. df = df_input.copy()
  85. P_MAX = stats["p_max_observed"]
  86. PITCH_BASE = stats["baseline_pitch"]
  87. # ── 1. 传感器异常列 ────────────────────────────────────────────────────────
  88. df["d_val_power"] = (
  89. (df["p_active"] > P_MAX * STATUS_POWER_UPPER_RATIO) |
  90. (df["p_active"] < P_MAX * STATUS_POWER_LOWER_RATIO)
  91. )
  92. if "wind_spd" in df.columns:
  93. df["d_val_wind"] = (df["wind_spd"] > STATUS_WIND_MAX) | (df["wind_spd"] < STATUS_WIND_MIN)
  94. else:
  95. df["d_val_wind"] = False
  96. pitch_cols = [c for c in df.columns if "pitch_ang_act" in c]
  97. df["d_val_pitch"] = False
  98. for col in pitch_cols:
  99. df["d_val_pitch"] |= (df[col] > STATUS_PITCH_MAX) | (df[col] < STATUS_PITCH_MIN)
  100. df["d_val_spd"] = False
  101. if stats["spd_limit"] and "gen_spd" in df.columns:
  102. df["d_val_spd"] = (
  103. (df["gen_spd"] > stats["spd_limit"] * STATUS_SPD_UPPER_RATIO) |
  104. (df["gen_spd"] < -200)
  105. )
  106. df["d_val_torque"] = False
  107. if stats["torque_limit"] and "actual_torque" in df.columns:
  108. df["d_val_torque"] = (
  109. (df["actual_torque"] > stats["torque_limit"] * STATUS_TORQUE_UPPER_RATIO) |
  110. (df["actual_torque"] < STATUS_TORQUE_LOWER_ABS)
  111. )
  112. # ── 2. 逻辑悖论列 ──────────────────────────────────────────────────────────
  113. if "wind_spd" in df.columns:
  114. df["d_logic_wind_pwr"] = (
  115. (df["wind_spd"] < STATUS_LOGIC_WIND_MIN) &
  116. (df["p_active"] > STATUS_LOGIC_POWER_MIN)
  117. )
  118. for wind_thresh, pwr_thresh in STATUS_MODEL_SPECIAL_RULES.get(model_name, []):
  119. df["d_logic_wind_pwr"] |= (
  120. (df["wind_spd"] < wind_thresh) & (df["p_active"] > pwr_thresh)
  121. )
  122. else:
  123. df["d_logic_wind_pwr"] = False
  124. df["d_logic_torque_pwr"] = False
  125. if stats["torque_limit"] and "actual_torque" in df.columns:
  126. df["d_logic_torque_pwr"] = (
  127. (df["p_active"] > P_MAX * 0.1) &
  128. (df["actual_torque"].abs() < stats["torque_limit"] * 0.01)
  129. )
  130. # ── 3. 汇总传感器异常标签 ──────────────────────────────────────────────────
  131. all_sensor_cols = list(SENSOR_COL_LABEL.keys())
  132. existing = [c for c in all_sensor_cols if c in df.columns]
  133. # 向量化拼接:逐列生成标签,再按行合并(避免 apply axis=1)
  134. if existing:
  135. tag_matrix = np.where(
  136. df[existing].values,
  137. np.array([SENSOR_COL_LABEL[c] for c in existing]),
  138. "",
  139. )
  140. df["sensor_anomaly_tags"] = pd.Series(
  141. [",".join(t for t in row if t) for row in tag_matrix],
  142. index=df.index,
  143. )
  144. else:
  145. df["sensor_anomaly_tags"] = ""
  146. # ── 4. 业务状态打标 ────────────────────────────────────────────────────────
  147. any_sensor_anomaly = df[[c for c in existing if c in df.columns]].any(axis=1)
  148. df["status"] = "运行"
  149. shutdown_thresh = max(10.0, P_MAX * STATUS_SHUTDOWN_RATIO)
  150. mask_shutdown = (~df["d_val_power"]) & (df["p_active"] <= shutdown_thresh)
  151. df.loc[mask_shutdown, "status"] = "停机"
  152. if "pitch_ang_act_1" in df.columns:
  153. mask_curtail = (
  154. (df["status"] != "停机") &
  155. (~df["d_val_power"]) &
  156. (~df["d_val_pitch"]) &
  157. (df["p_active"] > P_MAX * STATUS_CURTAIL_LOW_RATIO) &
  158. (df["p_active"] < P_MAX * STATUS_CURTAIL_HIGH_RATIO) &
  159. (df["pitch_ang_act_1"] > (PITCH_BASE + STATUS_CURTAIL_PITCH_OFFSET))
  160. )
  161. df.loc[mask_curtail, "status"] = "限功率"
  162. # 传感器异常覆盖(最高优先级)
  163. mask_sensor = any_sensor_anomaly
  164. df.loc[mask_sensor, "status"] = (
  165. "传感器异常-" + df.loc[mask_sensor, "sensor_anomaly_tags"]
  166. )
  167. return df