temperatureLargeComponentsAnalyst.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import os
  2. import pandas as pd
  3. import numpy as np
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. from .analyst import Analyst
  8. from .utils.directoryUtil import DirectoryUtil as dir
  9. from confBusiness import ConfBusiness
  10. class TemperatureLargeComponentsAnalyst(Analyst):
  11. """
  12. 风电机组大部件温升分析
  13. """
  14. def typeAnalyst(self):
  15. return "temperature_large_components"
  16. def turbineAnalysis(self,
  17. dataFrame,
  18. outputAnalysisDir,
  19. outputFilePath,
  20. confData: ConfBusiness,
  21. turbineName):
  22. self.temp_power(dataFrame, outputFilePath,
  23. confData.field_turbine_time,confData.field_power,confData.field_temperature_large_components)
  24. def getNoneEmptyFields(self,dataFrame,temperatureFields):
  25. # 检查指定列中非全为空的列
  26. non_empty_columns = dataFrame[temperatureFields].apply(
  27. lambda x: x.notnull().any(), axis=0)
  28. # 获取非全为空的列名
  29. noneEmptyFields = non_empty_columns[non_empty_columns].index.tolist()
  30. return noneEmptyFields
  31. def temp_power(self, dataFrame, output_path, field_time, field_power_active, field_temperature_large_componts):
  32. # Convert the string list of temperature columns into a list
  33. print("field_temperature_large_componts is {}".format(field_temperature_large_componts))
  34. temperature_cols = field_temperature_large_componts.split(',')
  35. useCols = []
  36. useCols.append(field_time)
  37. useCols.append(field_power_active)
  38. useCols.extend(temperature_cols)
  39. # 获取非全为空的列名
  40. non_empty_cols =self.getNoneEmptyFields(dataFrame,temperature_cols)
  41. # 清洗数据
  42. dataFrame=dataFrame[useCols]
  43. dataFrame = dataFrame.dropna(axis=1, how='all')
  44. dataFrame = dataFrame.dropna(axis=0)
  45. # Calculate 'power_floor'
  46. dataFrame['power_floor'] = (dataFrame[field_power_active] / 10).astype(int) * 10
  47. # Initialize an empty DataFrame for aggregation
  48. agg_dict = {col: 'mean' for col in non_empty_cols}
  49. # Group by 'power_floor' and aggregate
  50. grouped = dataFrame.groupby('power_floor').agg(agg_dict).reset_index()
  51. # Sort by 'power_floor'
  52. grouped.sort_values('power_floor', inplace=True)
  53. # Write to CSV
  54. grouped.to_csv(output_path, index=False)
  55. def turbinesAnalysis(self, dataFrameMerge,outputAnalysisDir, confData: ConfBusiness):
  56. self.plot_temperature_distribution(outputAnalysisDir,confData,confData.field_temperature_large_components)
  57. def plot_temperature_distribution(self,csvFileDirOfCp, confData: ConfBusiness, field_temperature_large_componts,encoding='utf-8'):
  58. """
  59. Generates Cp distribution plots for turbines in a wind farm.
  60. Parameters:
  61. - csvFileDirOfCp: str, path to the directory containing input CSV files.
  62. - farm_name: str, name of the wind farm.
  63. - encoding: str, encoding of the input CSV files. Defaults to 'utf-8'.
  64. """
  65. field_Name_Turbine= "turbine_name"
  66. x_name = 'power_floor'
  67. y_name = 'temperature'
  68. split_way = '_temperature_large_components.csv'
  69. columns = field_temperature_large_componts.split(',')
  70. # Create output directories if they don't exist
  71. for column in columns:
  72. type_name = '{}'.format(column)
  73. output_path = os.path.join(csvFileDirOfCp, type_name)
  74. os.makedirs(output_path, exist_ok=True)
  75. print("current column {}".format(column))
  76. sns.set_palette('deep')
  77. # Initialize DataFrame to store concatenated data
  78. res = pd.DataFrame()
  79. # Iterate over files in the input path
  80. for root, dir_names, file_names in dir.list_directory(csvFileDirOfCp):
  81. for file_name in file_names:
  82. if not file_name.endswith(".csv"):
  83. continue
  84. print(os.path.join(root, file_name))
  85. frame = pd.read_csv(os.path.join(root, file_name), encoding=encoding)
  86. if column not in frame.columns:
  87. continue
  88. # 获取输出文件名(不含split_way之后的部分)
  89. turbineName = file_name.split(split_way)[0]
  90. # 添加设备名作为新列
  91. frame[field_Name_Turbine] = confData.add_W_if_starts_with_digit(turbineName)
  92. res = pd.concat([res, frame.loc[:, [field_Name_Turbine, x_name, column]]], axis=0)
  93. # Reset index and plot
  94. ress = res.reset_index()
  95. fig, ax2 = plt.subplots()
  96. ax2 = sns.lineplot(x=x_name, y=column, data=ress, hue=field_Name_Turbine)
  97. # ax2.set_xlim(-150, 2100)
  98. ax2.set_xlabel(x_name)
  99. ax2.set_ylabel(y_name)
  100. ax2.set_title('Temperature-Distribute')
  101. plt.legend(bbox_to_anchor=(1.02, 0.5), loc='center left',ncol=2, borderaxespad=0.)
  102. plt.savefig(os.path.join(output_path, "{}.png".format(column)), bbox_inches='tight', dpi=120)
  103. plt.close()
  104. # Plot individual device lines
  105. grouped = ress.groupby(field_Name_Turbine)
  106. for name, group in grouped:
  107. color = ["lightgrey"] * len(ress[field_Name_Turbine].unique())
  108. fig, ax = plt.subplots()
  109. ax = sns.lineplot(x=x_name, y=column, data=ress, hue=field_Name_Turbine, palette=sns.set_palette(color), legend=False)
  110. ax = sns.lineplot(x=x_name, y=column, data=group, color='darkblue', legend=False)
  111. ax.set_xlabel(x_name)
  112. ax.set_ylabel(y_name)
  113. ax.set_title('turbine_name={}'.format(name))
  114. # ax.set_xlim(-150, 2100)
  115. plt.savefig(os.path.join(output_path, "{}.png".format(name)), bbox_inches='tight', dpi=120)
  116. plt.close()