cpTrendAnalyst.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. from plotly.subplots import make_subplots
  5. import plotly.graph_objects as go
  6. import matplotlib.pyplot as plt
  7. from matplotlib.ticker import MultipleLocator
  8. from behavior.analystWithGoodPoint import AnalystWithGoodPoint
  9. from utils.directoryUtil import DirectoryUtil as dir
  10. from algorithmContract.confBusiness import *
  11. from algorithmContract.contract import Contract
  12. class CpTrendAnalyst(AnalystWithGoodPoint):
  13. """
  14. 风电机组风能利用系数时序分析
  15. """
  16. def typeAnalyst(self):
  17. return "cp_trend"
  18. def turbinesAnalysis(self, outputAnalysisDir, conf: Contract,turbineCodes):
  19. dictionary = self.processTurbineData(turbineCodes, conf, [
  20. Field_DeviceCode, Field_Time, Field_WindSpeed, Field_ActiverPower])
  21. dataFrameOfTurbines = self.userDataFrame(
  22. dictionary, conf.dataContract.configAnalysis, self)
  23. # 检查所需列是否存在
  24. required_columns = {Field_Cp, Field_YearMonthDay}
  25. if not required_columns.issubset(dataFrameOfTurbines.columns):
  26. raise ValueError(f"DataFrame缺少必要的列。需要的列有: {required_columns}")
  27. return self.drawCpTrend(dataFrameOfTurbines, outputAnalysisDir, conf)
  28. def drawCpTrend(self, dataFrameOfTurbines: pd.DataFrame, outputAnalysisDir, conf: Contract):
  29. # 按设备名分组数据
  30. grouped = dataFrameOfTurbines.groupby(Field_CodeOfTurbine)
  31. result_rows = []
  32. for turbineCode, group in grouped:
  33. currTurbineInfo=self.common.getTurbineInfo(conf.dataContract.dataFilter.powerFarmID,turbineCode,self.turbineInfo)
  34. # # 计算四分位数和IQR
  35. Q1 = group[Field_Cp].quantile(0.25)
  36. Q3 = group[Field_Cp].quantile(0.75)
  37. IQR = Q3 - Q1
  38. # 定义离群值的范围
  39. lower_bound = Q1 - 1.5 * IQR
  40. upper_bound = Q3 + 1.5 * IQR
  41. # 筛选掉离群值
  42. filtered_group = group[(group[Field_Cp] >= lower_bound) & (
  43. group[Field_Cp] <= upper_bound)]
  44. # 创建箱线图
  45. fig = go.Figure()
  46. fig.add_trace(go.Box(
  47. x=filtered_group[Field_YearMonthDay], # 设置x轴数据为日期
  48. y=filtered_group[Field_Cp], # 设置y轴数据为风能利用系数
  49. # boxpoints='outliers', # 显示异常值(偏离值),不显示数据的所有点(只显示异常值)
  50. boxpoints=False, # 不显示偏离值
  51. marker=dict(color='lightgoldenrodyellow',
  52. size=1), # 设置偏离值的颜色和大小
  53. line=dict(color='lightgray', width=2), # 设置箱线和须线的颜色为灰色,粗细为2
  54. fillcolor='rgba(200, 200, 200, 0.5)', # 设置箱体的填充颜色和透明度
  55. name='风能利用系数' # 图例名称
  56. ))
  57. # 对于每个箱线图的中位数,绘制一个蓝色点
  58. medians = filtered_group.groupby(filtered_group[Field_YearMonthDay])[
  59. Field_Cp].median()
  60. fig.add_trace(go.Scatter(
  61. x=medians.index,
  62. y=medians.values,
  63. mode='markers',
  64. marker=dict(color='orange', size=3),
  65. name='风能利用系数-中位数' # 中位数标记的图例名称
  66. ))
  67. # 设置图表的标题和轴标签
  68. fig.update_layout(
  69. title={
  70. 'text': f'机组: {currTurbineInfo[Field_NameOfTurbine]}',
  71. # 'x': 0.5,
  72. },
  73. xaxis_title='时间',
  74. yaxis_title='风能利用系数',
  75. xaxis=dict(
  76. tickmode='auto', # 自动设置x轴刻度,以适应日期数据
  77. tickformat='%Y-%m-%d', # 设置x轴时间格式
  78. showgrid=True, # 显示网格线
  79. gridcolor='lightgray', # setting y-axis gridline color to black
  80. tickangle=-45,
  81. linecolor='black', # 设置y轴坐标系线颜色为黑色
  82. ticklen=5, # 设置刻度线的长度
  83. ),
  84. yaxis=dict(
  85. dtick=self.axisStepCp,
  86. range=[self.axisLowerLimitCp,
  87. self.axisUpperLimitCp], # 设置y轴的范围从0到1
  88. showgrid=True, # 显示网格线
  89. gridcolor='lightgray', # setting y-axis gridline color to black
  90. linecolor='black', # 设置y轴坐标系线颜色为黑色
  91. ticklen=5, # 设置刻度线的长度
  92. ),
  93. paper_bgcolor='white', # 设置纸张背景颜色为白色
  94. plot_bgcolor='white', # 设置图表背景颜色为白色
  95. margin=dict(t=50, b=10) # t为顶部(top)间距,b为底部(bottom)间距
  96. )
  97. # 确保从 Series 中提取的是具体的值
  98. engineTypeCode = currTurbineInfo.get(Field_MillTypeCode, "")
  99. if isinstance(engineTypeCode, pd.Series):
  100. engineTypeCode = engineTypeCode.iloc[0]
  101. engineTypeName = currTurbineInfo.get(Field_MachineTypeCode, "")
  102. if isinstance(engineTypeName, pd.Series):
  103. engineTypeName = engineTypeName.iloc[0]
  104. # 构建最终的JSON对象
  105. json_output = {
  106. "analysisTypeCode": "风能利用系数时序分析",
  107. "engineCode": engineTypeCode,
  108. "engineTypeName": engineTypeName,
  109. "xaixs": "时间",
  110. "yaixs": "风能利用系数",
  111. "data": [{
  112. "engineName":currTurbineInfo[Field_NameOfTurbine],
  113. "engineCode":turbineCode,
  114. "title":f'机组-{currTurbineInfo[Field_NameOfTurbine]}',
  115. "xData": filtered_group[Field_YearMonthDay].tolist(),
  116. "yData": filtered_group[Field_Cp].tolist(),
  117. "color":'lightgray',
  118. "width":2,
  119. "type":"box_plot",
  120. "medians": {
  121. "x": medians.index.tolist(), # 中位数的 x 轴数据
  122. "y": medians.values.tolist(), # 中位数的 y 轴数据
  123. "mode":'markers',
  124. "color":'orange',
  125. "size":3
  126. }
  127. }]
  128. }
  129. # 保存图像
  130. filePathOfImage = os.path.join(outputAnalysisDir, f"{currTurbineInfo[Field_NameOfTurbine]}.png")
  131. fig.write_image(filePathOfImage, scale=3)
  132. # filePathOfHtml = os.path.join(outputAnalysisDir, f"{currTurbineInfo[Field_NameOfTurbine]}.html")
  133. # fig.write_html(filePathOfHtml)
  134. # 将JSON对象保存到文件
  135. output_json_path = os.path.join(outputAnalysisDir, f"{currTurbineInfo[Field_NameOfTurbine]}.json")
  136. with open(output_json_path, 'w', encoding='utf-8') as f:
  137. import json
  138. json.dump(json_output, f, ensure_ascii=False, indent=4)
  139. # 如果需要返回DataFrame,可以包含文件路径
  140. result_rows.append({
  141. Field_Return_TypeAnalyst: self.typeAnalyst(),
  142. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  143. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  144. Field_CodeOfTurbine: turbineCode,
  145. Field_Return_FilePath: output_json_path,
  146. Field_Return_IsSaveDatabase: True
  147. })
  148. result_rows.append({
  149. Field_Return_TypeAnalyst: self.typeAnalyst(),
  150. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  151. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  152. Field_CodeOfTurbine: turbineCode,
  153. Field_Return_FilePath: filePathOfImage,
  154. Field_Return_IsSaveDatabase: False
  155. })
  156. # result_rows.append({
  157. # Field_Return_TypeAnalyst: self.typeAnalyst(),
  158. # Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  159. # Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  160. # Field_CodeOfTurbine: turbineCode,
  161. # Field_Return_FilePath: filePathOfHtml,
  162. # Field_Return_IsSaveDatabase: True
  163. # })
  164. result_df = pd.DataFrame(result_rows)
  165. return result_df