123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import os
- import pandas as pd
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- from .analyst import Analyst
- from .utils.directoryUtil import DirectoryUtil as dir
- from confBusiness import ConfBusiness
- class TemperatureLargeComponentsAnalyst(Analyst):
- """
- 风电机组大部件温升分析
- """
- def typeAnalyst(self):
- return "temperature_large_components"
- def turbineAnalysis(self,
- dataFrame,
- outputAnalysisDir,
- outputFilePath,
- confData: ConfBusiness,
- turbineName):
- self.temp_power(dataFrame, outputFilePath,
- confData.field_turbine_time,confData.field_power,confData.field_temperature_large_components)
-
- def getNoneEmptyFields(self,dataFrame,temperatureFields):
- # 检查指定列中非全为空的列
- non_empty_columns = dataFrame[temperatureFields].apply(
- lambda x: x.notnull().any(), axis=0)
- # 获取非全为空的列名
- noneEmptyFields = non_empty_columns[non_empty_columns].index.tolist()
- return noneEmptyFields
- def temp_power(self, dataFrame, output_path, field_time, field_power_active, field_temperature_large_componts):
- # Convert the string list of temperature columns into a list
- print("field_temperature_large_componts is {}".format(field_temperature_large_componts))
- temperature_cols = field_temperature_large_componts.split(',')
-
- useCols = []
- useCols.append(field_time)
- useCols.append(field_power_active)
- useCols.extend(temperature_cols)
- # 获取非全为空的列名
- non_empty_cols =self.getNoneEmptyFields(dataFrame,temperature_cols)
-
- # 清洗数据
- dataFrame=dataFrame[useCols]
- dataFrame = dataFrame.dropna(axis=1, how='all')
- dataFrame = dataFrame.dropna(axis=0)
-
- # Calculate 'power_floor'
- dataFrame['power_floor'] = (dataFrame[field_power_active] / 10).astype(int) * 10
- # Initialize an empty DataFrame for aggregation
- agg_dict = {col: 'mean' for col in non_empty_cols}
- # Group by 'power_floor' and aggregate
- grouped = dataFrame.groupby('power_floor').agg(agg_dict).reset_index()
- # Sort by 'power_floor'
- grouped.sort_values('power_floor', inplace=True)
- # Write to CSV
- grouped.to_csv(output_path, index=False)
-
- def turbinesAnalysis(self, dataFrameMerge,outputAnalysisDir, confData: ConfBusiness):
- self.plot_temperature_distribution(outputAnalysisDir,confData,confData.field_temperature_large_components)
-
- def plot_temperature_distribution(self,csvFileDirOfCp, confData: ConfBusiness, field_temperature_large_componts,encoding='utf-8'):
- """
- Generates Cp distribution plots for turbines in a wind farm.
- Parameters:
- - csvFileDirOfCp: str, path to the directory containing input CSV files.
- - farm_name: str, name of the wind farm.
- - encoding: str, encoding of the input CSV files. Defaults to 'utf-8'.
- """
- field_Name_Turbine= "turbine_name"
- x_name = 'power_floor'
- y_name = 'temperature'
- split_way = '_temperature_large_components.csv'
-
- columns = field_temperature_large_componts.split(',')
- # Create output directories if they don't exist
- for column in columns:
- type_name = '{}'.format(column)
- output_path = os.path.join(csvFileDirOfCp, type_name)
- os.makedirs(output_path, exist_ok=True)
- print("current column {}".format(column))
- sns.set_palette('deep')
- # Initialize DataFrame to store concatenated data
- res = pd.DataFrame()
-
- # Iterate over files in the input path
- for root, dir_names, file_names in dir.list_directory(csvFileDirOfCp):
- for file_name in file_names:
- if not file_name.endswith(".csv"):
- continue
- print(os.path.join(root, file_name))
- frame = pd.read_csv(os.path.join(root, file_name), encoding=encoding)
- if column not in frame.columns:
- continue
-
- # 获取输出文件名(不含split_way之后的部分)
- turbineName = file_name.split(split_way)[0]
- # 添加设备名作为新列
- frame[field_Name_Turbine] = confData.add_W_if_starts_with_digit(turbineName)
- res = pd.concat([res, frame.loc[:, [field_Name_Turbine, x_name, column]]], axis=0)
-
- # Reset index and plot
- ress = res.reset_index()
- fig, ax2 = plt.subplots()
- ax2 = sns.lineplot(x=x_name, y=column, data=ress, hue=field_Name_Turbine)
- # ax2.set_xlim(-150, 2100)
- ax2.set_xlabel(x_name)
- ax2.set_ylabel(y_name)
- ax2.set_title('Temperature-Distribute')
- plt.legend(bbox_to_anchor=(1.02, 0.5), loc='center left',ncol=2, borderaxespad=0.)
- plt.savefig(os.path.join(output_path, "{}.png".format(column)), bbox_inches='tight', dpi=120)
- plt.close()
-
- # Plot individual device lines
- grouped = ress.groupby(field_Name_Turbine)
- for name, group in grouped:
- color = ["lightgrey"] * len(ress[field_Name_Turbine].unique())
- fig, ax = plt.subplots()
- ax = sns.lineplot(x=x_name, y=column, data=ress, hue=field_Name_Turbine, palette=sns.set_palette(color), legend=False)
- ax = sns.lineplot(x=x_name, y=column, data=group, color='darkblue', legend=False)
- ax.set_xlabel(x_name)
- ax.set_ylabel(y_name)
- ax.set_title('turbine_name={}'.format(name))
- # ax.set_xlim(-150, 2100)
- plt.savefig(os.path.join(output_path, "{}.png".format(name)), bbox_inches='tight', dpi=120)
- plt.close()
|