dataIntegrityOfSecondAnalyst.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. import plotly.graph_objects as go
  7. from plotly.subplots import make_subplots
  8. from geopy.distance import geodesic
  9. from behavior.analystNotFilter import AnalystNotFilter
  10. from utils.directoryUtil import DirectoryUtil as dir
  11. from algorithmContract.confBusiness import *
  12. import calendar
  13. import random
  14. from datetime import datetime
  15. from algorithmContract.contract import Contract
  16. class DataIntegrityOfSecondAnalyst(AnalystNotFilter):
  17. """
  18. 风电机组秒级数据完整度分析
  19. """
  20. def typeAnalyst(self):
  21. return "data_integrity_second"
  22. def turbinesAnalysis(self, outputAnalysisDir, conf: Contract, turbineCodes):
  23. select = [Field_DeviceCode, Field_Time, Field_ActiverPower, Field_WindSpeed, Field_NacPos, Field_WindDirection, Field_RotorSpeed, Field_GeneratorSpeed, Field_GeneratorTorque, Field_AngleIncluded, Field_EnvTemp, Field_NacTemp, Field_PitchAngel1, Field_PitchAngel2, Field_PitchAngel3]
  24. dictionary = self.processTurbineData(turbineCodes, conf, select)
  25. dataFrameOfTurbines = self.userDataFrame(
  26. dictionary, conf.dataContract.configAnalysis, self)
  27. turbrineInfos = self.common.getTurbineInfos(
  28. conf.dataContract.dataFilter.powerFarmID, turbineCodes, self.turbineInfo)
  29. currDataFrameOfTurbines = dataFrameOfTurbines[dataFrameOfTurbines[Field_CodeOfTurbine].isin(
  30. turbineCodes)]
  31. # 将 currTurbineInfos 转换为字典
  32. currTurbineInfosDict = turbrineInfos.set_index(
  33. Field_CodeOfTurbine)[Field_NameOfTurbine].to_dict()
  34. # 使用 map 函数来填充 Field_NameOfTurbine 列
  35. currDataFrameOfTurbines[Field_NameOfTurbine] = currDataFrameOfTurbines[Field_CodeOfTurbine].map(
  36. currTurbineInfosDict).fillna("")
  37. groupedDataFrame = self.dataIntegrityByMonth(
  38. dataFrameOfTurbines, conf, Field_NameOfTurbine)
  39. print("groupedDataFrame : \n {}".format(groupedDataFrame.head()))
  40. return self.plotByAllMonth(groupedDataFrame, outputAnalysisDir, self.powerFarmInfo[Field_PowerFarmName].iloc[0], Field_NameOfTurbine, conf)
  41. def fullMonthIndex(self, start_time, end_time, turbine_name, new_frame):
  42. months = (end_time.year - start_time.year) * \
  43. 12 + end_time.month - start_time.month
  44. month_range = ['%04d-%02d' % (int(start_time.year + mon//12), int(mon % 12+1))
  45. for mon in range(start_time.month-1, start_time.month+months)]
  46. month_index = pd.DataFrame(month_range, columns=[Field_YearMonth])
  47. plot_res = pd.DataFrame()
  48. grouped = new_frame.groupby(turbine_name)
  49. for name, group in grouped:
  50. group = pd.merge(group, month_index,
  51. on=Field_YearMonth, how='outer')
  52. group['数据完整度%'] = group['数据完整度%'].fillna(0)
  53. group[turbine_name] = name
  54. group['year'] = group[Field_YearMonth].apply(
  55. lambda x: str(x).split('-')[0])
  56. group['month'] = group[Field_YearMonth].apply(
  57. lambda x: str(x).split('-')[1])
  58. plot_res = pd.concat([plot_res, group], axis=0, sort=False)
  59. return plot_res
  60. def dataIntegrityByMonth(self, dataFrameMerge: pd.DataFrame, conf: Contract, Field_NameOfTurbine):
  61. grouped = dataFrameMerge.groupby([dataFrameMerge.loc[:, Field_Time].dt.year.rename('year'),
  62. dataFrameMerge.loc[:, Field_Time].dt.month.rename(
  63. 'month'),
  64. dataFrameMerge.loc[:, Field_NameOfTurbine]]).agg({'count'})[Field_Time].rename({'count': '长度'}, axis=1)
  65. new_frame = grouped.reset_index('month')
  66. timeGranularity = self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Second][Field_TimeGranularity].iloc[0] if self.typeAnalyst(
  67. ) == "data_integrity_second" else self.dataTransfer[self.dataTransfer[Field_TransferType] == Const_TimeGranularity_Minute][Field_TimeGranularity].iloc[0]
  68. new_frame = new_frame.reset_index()
  69. new_frame['数据完整度'] = (100 * new_frame['长度'] / (new_frame.apply(lambda row: calendar.monthrange(
  70. row['year'], row['month'])[1] * 24 * 3600 / timeGranularity, axis=1))).round(decimals=0)
  71. new_frame = new_frame.rename(columns={'数据完整度': '数据完整度%'})
  72. new_frame['month'] = new_frame['month'].astype(
  73. str).apply(lambda x: x.zfill(2))
  74. new_frame[Field_YearMonth] = new_frame['year'].astype(
  75. str) + '-' + new_frame['month'].astype(str)
  76. beginTime = None
  77. if not self.common.isNone(conf.dataContract.dataFilter.beginTime):
  78. beginTime = conf.dataContract.dataFilter.beginTime
  79. else:
  80. beginTime = dataFrameMerge[Field_Time].min().strftime(
  81. '%Y-%m-%d %H:%M:%S')
  82. endTime = None
  83. if not self.common.isNone(conf.dataContract.dataFilter.endTime):
  84. endTime = conf.dataContract.dataFilter.endTime
  85. else:
  86. endTime = dataFrameMerge[Field_Time] .max().strftime(
  87. '%Y-%m-%d %H:%M:%S')
  88. beginTime = datetime.strptime(beginTime, '%Y-%m-%d %H:%M:%S')
  89. endTime = datetime.strptime(endTime, '%Y-%m-%d %H:%M:%S')
  90. new_frame = self.fullMonthIndex(
  91. beginTime, endTime, Field_NameOfTurbine, new_frame)
  92. return new_frame
  93. def plotByAllMonth(self, groupedDataFrame, outputAnalysisDir, farmName, fieldTurbineName, conf: Contract):
  94. title = '数据完整度检测(%)'
  95. # 根据场景决定索引和列的方向
  96. if len(set(groupedDataFrame[Field_YearMonth])) > len(set(groupedDataFrame[fieldTurbineName])):
  97. result = groupedDataFrame.pivot(
  98. values="数据完整度%", index=fieldTurbineName, columns=Field_YearMonth)
  99. x_labels = result.columns.tolist() # 月份
  100. y_labels = result.index.tolist() # 风机名
  101. x_axis_title = "日期"
  102. y_axis_title = "机组"
  103. # 构建最终的JSON对象
  104. json_output = {
  105. "analysisTypeCode": "数据完整度检测(%)",
  106. "engineCode": "",
  107. "engineTypeName": "",
  108. "xaixs": "日期",
  109. "yaixs": "机组",
  110. "data": [{
  111. "engineName": "",
  112. "engineCode": "",
  113. "title": f' 数据完整度%',
  114. "xData": x_labels,
  115. "yData": y_labels,
  116. "ZData": result.values.tolist(),
  117. }]
  118. }
  119. else:
  120. result = groupedDataFrame.pivot(
  121. values="数据完整度%", index=Field_YearMonth, columns=fieldTurbineName)
  122. x_labels = result.columns.tolist() # 风机名
  123. y_labels = result.index.tolist() # 月份
  124. x_axis_title = "机组"
  125. y_axis_title = "日期"
  126. # 构建最终的JSON对象
  127. json_output = {
  128. "analysisTypeCode": "数据完整度检测(%)",
  129. "engineCode": "",
  130. "engineTypeName": "",
  131. "xaixs": "机组",
  132. "yaixs": "日期",
  133. "data": [{
  134. "engineName": "",
  135. "engineCode": "",
  136. "title": f' 数据完整度%',
  137. "xData": x_labels,
  138. "yData": y_labels,
  139. "ZData": result.values.tolist(),
  140. }]
  141. }
  142. # # 创建热图
  143. # fig = go.Figure(data=go.Heatmap(
  144. # z=result.values,
  145. # x=x_labels,
  146. # y=y_labels,
  147. # colorscale='Viridis',
  148. # # colorbar=dict(title='数据完整度%'),
  149. # showscale=False, # 显示颜色条
  150. # text=result.values,
  151. # texttemplate="%{text}", # Format the text display inside cells
  152. # # hoverinfo='text'
  153. # ))
  154. # 创建热图
  155. fig = go.Figure(data=go.Heatmap(
  156. z=result.values,
  157. x=x_labels,
  158. y=y_labels,
  159. colorscale=[
  160. [0.0, 'rgb(255, 102, 102)'], # 柔和的红色
  161. [0.5, 'rgb(255, 102, 102)'],
  162. [0.5, 'rgb(255, 255, 153)'], # 柔和的黄色
  163. [0.85, 'rgb(255, 255, 153)'],
  164. [0.85, 'rgb(153, 255, 153)'], # 柔和的绿色
  165. [1.0, 'rgb(153, 255, 153)']
  166. ],
  167. zmin=0, # 设置颜色范围的最小值
  168. zmax=100, # 设置颜色范围的最大值
  169. showscale=True, # 显示颜色条
  170. text=result.values,
  171. texttemplate="%{text}", # Format the text display inside cells
  172. ))
  173. # 更新图形布局
  174. fig.update_layout(
  175. title={'text': title, 'x': 0.5},
  176. # xaxis_nticks=len(x_labels),
  177. xaxis=dict(tickmode='array', tickvals=x_labels,
  178. ticktext=x_labels, tickangle=-45, title=x_axis_title),
  179. yaxis=dict(tickmode='array', tickvals=y_labels,
  180. ticktext=y_labels, title=y_axis_title),
  181. # xaxis=dict(tickmode='array', tickvals=list(range(len(x_labels))), ticktext=x_labels, tickangle=-45, title=x_axis_title),
  182. # yaxis=dict(tickmode='array', tickvals=list(range(len(y_labels))), ticktext=y_labels, title=y_axis_title),
  183. autosize=True,
  184. # width=len(x_labels) * 80, # Adjust width and height as needed
  185. # height=len(y_labels) * 80,
  186. margin=dict(l=50, r=50, b=100, t=100), # 调整边距以确保标签完整显示
  187. # Transparent background to show cell borders
  188. plot_bgcolor='rgba(0,0,0,0)'
  189. )
  190. fig.update_traces(
  191. xgap=1,
  192. ygap=1
  193. )
  194. result_rows = []
  195. # 将JSON对象保存到文件
  196. output_json_path = os.path.join(outputAnalysisDir, f"Data_Integrity_Of_Second_Analyst.json")
  197. with open(output_json_path, 'w', encoding='utf-8') as f:
  198. import json
  199. json.dump(json_output, f, ensure_ascii=False, indent=4)
  200. # 保存图像
  201. pngFileName = f'{farmName}数据完整度分析.png'
  202. pngFilePath = os.path.join(outputAnalysisDir, pngFileName)
  203. fig.write_image(pngFilePath, scale=3)
  204. # 保存HTML
  205. # htmlFileName = f'{farmName}数据完整度分析.html'
  206. # htmlFilePath = os.path.join(outputAnalysisDir, htmlFileName)
  207. # fig.write_html(htmlFilePath)
  208. result_rows.append({
  209. Field_Return_TypeAnalyst: self.typeAnalyst(),
  210. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  211. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  212. Field_CodeOfTurbine: Const_Output_Total,
  213. Field_Return_FilePath: pngFilePath,
  214. Field_Return_IsSaveDatabase: False
  215. })
  216. result_rows.append({
  217. Field_Return_TypeAnalyst: self.typeAnalyst(),
  218. Field_PowerFarmCode: conf.dataContract.dataFilter.powerFarmID,
  219. Field_Return_BatchCode: conf.dataContract.dataFilter.dataBatchNum,
  220. Field_CodeOfTurbine: Const_Output_Total,
  221. Field_MillTypeCode: 'total',
  222. Field_Return_FilePath: output_json_path,
  223. Field_Return_IsSaveDatabase: True
  224. })
  225. result_df = pd.DataFrame(result_rows)
  226. return result_df