| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- import pandas as pd
- import configparser
- import numpy as np
- from py_mybatis.sql.mybatis_sql_session import MybatisMapperScanner, MybatisSqlSession, PooledDB
- from py_mybatis.sql.pdbc_sql_template import *
- import pymysql
- import os
- import unittest
- import time
- def calculate_yaw_miss(filtered_data, wind_speed_range, wind_speed_step, wind_direct_step, cabin_temp_step):
- # 计算风向、机舱角度 分别和360取模运算
- filtered_data['风向'] = np.abs((filtered_data['风向'] % 360))
- filtered_data['机舱角度'] = np.abs((filtered_data['机舱角度'] % 360))
- # 计算偏航误差
- filtered_data['偏航误差'] = filtered_data['风向'] - filtered_data['机舱角度']
- # 定义风速和舱内温度的分组范围
- # wind_speed_bins = np.arange(wind_speed_range[0], wind_speed_range[1] + wind_speed_step, wind_speed_step)
- wind_speed_bins = np.arange(filtered_data['风速'].min(
- ), filtered_data['风速'].max() + wind_speed_step, wind_speed_step)
- wind_direct_bins = np.arange(filtered_data['风向'].min(
- ), filtered_data['风向'].max() + wind_direct_step, wind_direct_step)
- cabin_temp_bins = np.arange(filtered_data['舱内温度'].min(
- ), filtered_data['舱内温度'].max() + cabin_temp_step, cabin_temp_step)
- # 按风速和舱内温度分组
- grouped = filtered_data.groupby([pd.cut(filtered_data['风速'], wind_speed_bins),
- pd.cut(
- filtered_data['风向'], wind_direct_bins),
- pd.cut(filtered_data['舱内温度'], cabin_temp_bins)])
- # 从每个分组中找出有功功率最大的数据记录
- # max_powegrouped_power_maxr_rows = grouped.apply(lambda x: x[x['有功功率'] == x['有功功率'].max()])
- grouped_power_max = grouped.max()['有功功率'].reset_index()
- # 计算每个分组的偏航误差平均值
- yaw_error_avg = grouped['偏航误差'].mean().dropna()
- return grouped, grouped_power_max, yaw_error_avg
- def calculate_yaw_miss2(data, wind_speed_step, wind_direct_step, cabin_temp_step):
- # 按风速和舱内温度分组
- data['风向分组'] = np.floor(data['风向'] / wind_direct_step) * wind_direct_step
- data['风速分组'] = np.floor(data['风速'] / wind_speed_step) * wind_speed_step
- data['舱内温度分组'] = np.floor(data['舱内温度'] / cabin_temp_step) * cabin_temp_step
- # 计算每个分组的偏航误差平均值
- grouped = data.groupby(['风向分组', '风速分组', '舱内温度分组'])
- # 从每个分组中找出有功功率最大的行
- # grouped.apply(lambda x: x[x['有功功率'] == x['有功功率'].max()])
- idx = grouped["有功功率"].idxmax()
- max_power_rows = data.loc[idx]
- # yaw_error_avg = grouped['偏航误差'].mean().dropna()
- return grouped, max_power_rows, data
- def calculate_angle_deviations(array1, array2):
- """
- 计算两个相同长度角度数组中两两对应角度值的偏差。
- 结果限制在-90°到+90°之间,并保留两位小数。
- 参数:
- array1 (list): 第一个角度数组
- array2 (list): 第二个角度数组
- 返回:
- list: 两两对应角度的偏差列表
- """
- deviations = []
- for angle1, angle2 in zip(array1, array2):
- # 计算原始偏差
- deviation = angle1 - angle2
- # 调整偏差,使其位于-180°到+180°范围内
- if deviation == 0.0:
- deviation = 0.0
- else:
- deviation = (deviation + 180) % 360 - 180
- # 将偏差限制在-90°到+90°范围内
- if deviation > 90:
- deviation -= 180
- elif deviation < -90:
- deviation += 180
- # 保留两位小数
- deviations.append(round(deviation, 2))
- return deviations
- def recalculation(data):
- # 计算风向、机舱角度 分别和360取模运算
- data['风向'] = np.abs(data['风向'] % 360)
- data['机舱角度'] = np.abs(data['机舱角度'] % 360)
- # 计算偏航误差
- # data['偏航误差'] =np.where(data['风向'] < data['机舱角度'],
- # (data['风向'] - data['机舱角度'])%360,
- # -(data['风向'] - data['机舱角度']) )
- data['偏航误差'] = calculate_angle_deviations(
- data['风向'], data['机舱角度'])
-
- return data
- def load_data(file_path, encoding='utf-8'):
- # sql_template =PdbcSqlTemplate(dataSource=PooledDB(
- # creator=pymysql,
- # maxconnections=6,
- # mincached=2,
- # maxcached=5,
- # blocking=True,
- # maxusage=None,
- # setsession=[],
- # ping=0,
- # host="192.168.50.241",
- # user="root",
- # password="123456",
- # database="data2023",
- # cursorclass=pymysql.cursors.DictCursor,
- # charset='utf8'
- # ))
-
- # dic= sql_template.select_list(sql="show tables;")
- # print(dic)
- # Load the data
- data = pd.read_csv(file_path, encoding=encoding)
- return data
- def filter_data(data, wind_speed_range, active_power_range):
- # Filter criteria:
- # 1. "风机状态" (wind turbine status) must be 5
- # 2. No empty (null) values in the row
- filtered_data = data[(data['风机状态'] == 5)
- & (data["风速"] >= wind_speed_range[0])
- & (data["风速"] <= wind_speed_range[1])
- & (data['有功功率'] > 20)
- & (data['有功功率'] >= active_power_range[0])
- & (data['有功功率'] <= active_power_range[1])].dropna()
- filtered_data["时间"] = pd.to_datetime(filtered_data['时间'])
- """
- # 将时间列转换为13位长整型(Unix时间戳)
- # Excel 13位长整型转为时间 公式: =TEXT((K2/1000+8*3600)/86400+70*365+19,"yyyy-mm-dd hh:mm:ss")
- filtered_data["时间Unix"] = filtered_data['时间'].astype('int64') // 10**6
- """
- # 将时间列转换为10位长整型(Unix时间戳)
- # Excel 10位长整型转为时间 公式: =TEXT((A1+8*3600)/86400+70*365+19,"yyyy-mm-dd hh:mm:ss")
- filtered_data["时间Unix"] = filtered_data['时间'].astype('int64') // 10**9
- return filtered_data
- def read_config(config_file):
- """
- Reads configuration settings from an INI file.
- Parameters:
- config_file (str): Path to the INI configuration file.
- Returns:
- dict: Configuration settings.
- """
- config = configparser.ConfigParser()
- config.read(config_file)
- return config['DEFAULT']
- # Example usage
- # Replace with your file path
- file_path = 'E:\WorkSpace\Resource\Manage\项目\大唐\风电机组功率曲线异常检测分析服务项目\收资\data_process\data_second_scada_test.csv'
- def main():
- config_settings = read_config('config.ini')
- encoding = config_settings.get('Encoding', 'utf-8')
- wind_direct_step = float(config_settings.get('WindDirectStep', '1'))
- wind_speed_range = [float(x) for x in config_settings.get(
- 'WindSpeedRange', '3, 20').split(',')]
- wind_speed_step = float(config_settings.get('WindSpeedStep', '0.05'))
- cabin_temp_step = float(config_settings.get('CabinTempStep', '1'))
- active_power_range = [float(x) for x in config_settings.get(
- 'ActivePowerRange', '21, 1500').split(',')]
- raw_data = load_data(file_path, encoding)
- data = filter_data(raw_data, wind_speed_range, active_power_range)
- data=recalculation(data)
- # grouped,max_power_rows,yaw_error_avg = calculate_yaw_miss(data, wind_speed_range,wind_speed_step, wind_direct_step,cabin_temp_step)
- grouped, grouped_power_max, data = calculate_yaw_miss2(
- data, wind_speed_step, wind_direct_step, cabin_temp_step)
-
- yaw_miss_list=[]
-
- for index,row in data.iterrows():
- max_record = grouped_power_max[(grouped_power_max['风向分组'] == row['风向分组']) &
- (grouped_power_max['风速分组'] == row['风速分组']) &
- (grouped_power_max['舱内温度分组'] == row['舱内温度分组'])]
-
- data.at[index,'偏航误差']=row['偏航误差']-max_record['偏航误差']
- print(" grouped by 风速、风向、舱内温度 ")
- # 计算每个分组的偏航误差平均值
- grouped = data.groupby(['风向分组', '风速分组', '舱内温度分组'])
- # 将分组对象转换回 DataFrame
- grouped_df = grouped.apply(lambda x: x)
- # grouped_df=grouped.reset_index(drop=True)
- grouped_df.to_csv("./output/yaw_miss_grouped.csv",
- index=False, encoding="ansi")
- print(" max power rows ")
- # print(grouped_power_max)
- grouped_power_max.to_csv("./output/yaw_miss_max_power_rows.csv",
- index=False, encoding="ansi")
- print("number pairs of yaw miss avg : " )
- # 计算正数的平均值
- positive_mean = data[data['偏航误差'] > 0]['偏航误差'].mean()
- # 计算负数的平均值
- negative_mean = data[data['偏航误差'] < 0]['偏航误差'].mean()
- # 创建数对
- limits = (negative_mean, positive_mean)
- print("下限和上限的数对:", limits)
- if __name__ == "__main__":
- main()
|