dataIntegrityOfSecondAnalyst.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. import plotly.graph_objects as go
  7. from plotly.subplots import make_subplots
  8. from geopy.distance import geodesic
  9. from behavior.analystNotFilter import AnalystNotFilter
  10. from utils.directoryUtil import DirectoryUtil as dir
  11. from algorithmContract.confBusiness import *
  12. import calendar
  13. import random
  14. from datetime import datetime
  15. from algorithmContract.contract import Contract
  16. class DataIntegrityOfSecondAnalyst(AnalystNotFilter):
  17. """
  18. 风电机组秒级数据完整度分析
  19. """
  20. def typeAnalyst(self):
  21. return "data_integrity_second"
  22. def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
  23. select = [Field_DeviceCode, Field_Time, Field_ActiverPower, Field_WindSpeed, Field_NacPos, Field_WindDirection, Field_RotorSpeed, Field_GeneratorSpeed, Field_GeneratorTorque, Field_AngleIncluded, Field_EnvTemp, Field_NacTemp, Field_PitchAngel1, Field_PitchAngel2, Field_PitchAngel3]
  24. dictionary = self.processTurbineData(turbineCodes, conf, select)
  25. dataFrameOfTurbines = self.userDataFrame(
  26. dictionary, conf.dataContract.configAnalysis, self)
  27. turbrineInfos = self.common.getTurbineInfos(
  28. conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
  29. currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
  30. turbineCodes)]
  31. # 将 currTurbineInfos 转换为字典
  32. currTurbineInfosDict = turbrineInfos.set_index(
  33. Field_CodeOfTurbine)[Field_NameOfTurbine].to_dict()
  34. # 使用 map 函数来填充 Field_NameOfTurbine 列
  35. currDataFrameOfTurbines[Field_NameOfTurbine] = currDataFrameOfTurbines[Field_CodeOfTurbine].map(
  36. currTurbineInfosDict).fillna("")
  37. groupedDataFrame = self.dataIntegrityByMonth(
  38. dataFrameOfTurbines, conf, Field_NameOfTurbine)
  39. print("groupedDataFrame : \n {}".format(groupedDataFrame.head()))
  40. return self.plotByAllMonth(groupedDataFrame, outputAnalysisDir, self.powerFarmInfo[Field_PowerFarmName].iloc[0], Field_NameOfTurbine, conf)
  41. def fullMonthIndex(self, start_time, end_time, turbine_name, new_frame):
  42. months = (end_time.year - start_time.year) * \
  43. 12 + end_time.month - start_time.month
  44. month_range = ['%04d-%02d' % (int(start_time.year + mon//12), int(mon % 12+1))
  45. for mon in range(start_time.month-1, start_time.month+months)]
  46. month_index = pd.DataFrame(month_range, columns=[Field_YearMonth])
  47. plot_res = pd.DataFrame()
  48. grouped = new_frame.groupby(turbine_name)
  49. for name, group in grouped:
  50. group = pd.merge(group, month_index,
  51. on=Field_YearMonth, how='outer')
  52. group['数据完整度%'] = group['数据完整度%'].fillna(0)
  53. group[turbine_name] = name
  54. group['year'] = group[Field_YearMonth].apply(
  55. lambda x: str(x).split('-')[0])
  56. group['month'] = group[Field_YearMonth].apply(
  57. lambda x: str(x).split('-')[1])
  58. plot_res = pd.concat([plot_res, group], axis=0, sort=False)
  59. return plot_res
  60. def dataIntegrityByMonth(self, dataFrameMerge: pd.DataFrame, conf: Contract, Field_NameOfTurbine):
  61. grouped = dataFrameMerge.groupby([dataFrameMerge.loc[:, Field_Time].dt.year.rename('year'),
  62. dataFrameMerge.loc[:, Field_Time].dt.month.rename(
  63. 'month'),
  64. dataFrameMerge.loc[:, Field_NameOfTurbine]]).agg({'count'})[Field_Time].rename({'count': '长度'}, axis=1)
  65. new_frame = grouped.reset_index('month')
  66. timeGranularity = self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Second][Field_TimeGranularity].iloc[0] if self.typeAnalyst(
  67. ) == "data_integrity_second" else self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Minute][Field_TimeGranularity].iloc[0]
  68. new_frame = new_frame.reset_index()
  69. new_frame['数据完整度'] = (100 * new_frame['长度'] / (new_frame.apply(lambda row: calendar.monthrange(
  70. row['year'], row['month'])[1] * 24 * 3600 / timeGranularity, axis=1))).round(decimals=0)
  71. new_frame = new_frame.rename(columns={'数据完整度': '数据完整度%'})
  72. new_frame['month'] = new_frame['month'].astype(
  73. str).apply(lambda x: x.zfill(2))
  74. new_frame[Field_YearMonth] = new_frame['year'].astype(
  75. str) + '-' + new_frame['month'].astype(str)
  76. beginTime = None
  77. if not self.common.isNone(conf.dataContract.dataFilter.beginTime):
  78. beginTime = conf.dataContract.dataFilter.beginTime
  79. else:
  80. beginTime = dataFrameMerge[Field_Time].min().strftime(
  81. '%Y-%m-%d %H:%M:%S')
  82. endTime = None
  83. if not self.common.isNone(conf.dataContract.dataFilter.endTime):
  84. endTime = conf.dataContract.dataFilter.endTime
  85. else:
  86. endTime = dataFrameMerge[Field_Time] .max().strftime(
  87. '%Y-%m-%d %H:%M:%S')
  88. beginTime = datetime.strptime(beginTime, '%Y-%m-%d %H:%M:%S')
  89. endTime = datetime.strptime(endTime, '%Y-%m-%d %H:%M:%S')
  90. new_frame = self.fullMonthIndex(
  91. beginTime, endTime, Field_NameOfTurbine, new_frame)
  92. return new_frame
  93. def plotByAllMonth(self, groupedDataFrame, outputAnalysisDir, farmName, fieldTurbineName, conf: Contract):
  94. title = '数据完整度检测(%)'
  95. # 根据场景决定索引和列的方向
  96. if len(set(groupedDataFrame[Field_YearMonth])) > len(set(groupedDataFrame[fieldTurbineName])):
  97. result = groupedDataFrame.pivot(
  98. values="数据完整度%", index=fieldTurbineName, columns=Field_YearMonth)
  99. x_labels = result.columns.tolist() # 月份
  100. y_labels = result.index.tolist() # 风机名
  101. x_axis_title = "日期"
  102. y_axis_title = "机组"
  103. else:
  104. result = groupedDataFrame.pivot(
  105. values="数据完整度%", index=Field_YearMonth, columns=fieldTurbineName)
  106. x_labels = result.columns.tolist() # 风机名
  107. y_labels = result.index.tolist() # 月份
  108. x_axis_title = "机组"
  109. y_axis_title = "日期"
  110. # # 创建热图
  111. # fig = go.Figure(data=go.Heatmap(
  112. # z=result.values,
  113. # x=x_labels,
  114. # y=y_labels,
  115. # colorscale='Viridis',
  116. # # colorbar=dict(title='数据完整度%'),
  117. # showscale=False, # 显示颜色条
  118. # text=result.values,
  119. # texttemplate="%{text}", # Format the text display inside cells
  120. # # hoverinfo='text'
  121. # ))
  122. # 创建热图
  123. fig = go.Figure(data=go.Heatmap(
  124. z=result.values,
  125. x=x_labels,
  126. y=y_labels,
  127. colorscale=[
  128. [0.0, 'rgb(255, 102, 102)'], # 柔和的红色
  129. [0.5, 'rgb(255, 102, 102)'],
  130. [0.5, 'rgb(255, 255, 153)'], # 柔和的黄色
  131. [0.85, 'rgb(255, 255, 153)'],
  132. [0.85, 'rgb(153, 255, 153)'], # 柔和的绿色
  133. [1.0, 'rgb(153, 255, 153)']
  134. ],
  135. zmin=0, # 设置颜色范围的最小值
  136. zmax=100, # 设置颜色范围的最大值
  137. showscale=True, # 显示颜色条
  138. text=result.values,
  139. texttemplate="%{text}", # Format the text display inside cells
  140. ))
  141. # 更新图形布局
  142. fig.update_layout(
  143. title={'text': title, 'x': 0.5},
  144. # xaxis_nticks=len(x_labels),
  145. xaxis=dict(tickmode='array', tickvals=x_labels,
  146. ticktext=x_labels, tickangle=-45, title=x_axis_title),
  147. yaxis=dict(tickmode='array', tickvals=y_labels,
  148. ticktext=y_labels, title=y_axis_title),
  149. # xaxis=dict(tickmode='array', tickvals=list(range(len(x_labels))), ticktext=x_labels, tickangle=-45, title=x_axis_title),
  150. # yaxis=dict(tickmode='array', tickvals=list(range(len(y_labels))), ticktext=y_labels, title=y_axis_title),
  151. autosize=True,
  152. # width=len(x_labels) * 80, # Adjust width and height as needed
  153. # height=len(y_labels) * 80,
  154. margin=dict(l=50, r=50, b=100, t=100), # 调整边距以确保标签完整显示
  155. # Transparent background to show cell borders
  156. plot_bgcolor='rgba(0,0,0,0)'
  157. )
  158. fig.update_traces(
  159. xgap=1,
  160. ygap=1
  161. )
  162. result_rows = []
  163. # 保存图像
  164. pngFileName = f'{farmName}数据完整度分析.png'
  165. pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
  166. fig.write_image(pngFilePath, scale=3)
  167. # 保存HTML
  168. htmlFileName = f'{farmName}数据完整度分析.html'
  169. htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
  170. fig.write_html(htmlFilePath)
  171. result_rows.append({
  172. Field_Return_TypeAnalyst: self.typeAnalyst(),
  173. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  174. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  175. Field_CodeOfTurbine: Const_Output_Total,
  176. Field_Return_FilePath: pngFilePath,
  177. Field_Return_IsSaveDatabase: False
  178. })
  179. result_rows.append({
  180. Field_Return_TypeAnalyst: self.typeAnalyst(),
  181. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  182. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  183. Field_CodeOfTurbine: Const_Output_Total,
  184. Field_Return_FilePath: htmlFilePath,
  185. Field_Return_IsSaveDatabase: True
  186. })
  187. result_df = pd.DataFrame(result_rows)
  188. return result_df