zhouyang.xie
/
data_analysis_wind_turbine


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
							import pandas as pd
import configparser
import numpy as np
from py_mybatis.sql.mybatis_sql_session import MybatisMapperScanner, MybatisSqlSession, PooledDB
from py_mybatis.sql.pdbc_sql_template import *
import pymysql
import os
import unittest
import time

def calculate_yaw_miss(filtered_data, wind_speed_range, wind_speed_step, wind_direct_step, cabin_temp_step):
    # 计算风向、机舱角度 分别和360取模运算
    filtered_data['风向'] = np.abs((filtered_data['风向'] % 360))
    filtered_data['机舱角度'] = np.abs((filtered_data['机舱角度'] % 360))
    # 计算偏航误差
    filtered_data['偏航误差'] = filtered_data['风向'] - filtered_data['机舱角度']

    # 定义风速和舱内温度的分组范围
    # wind_speed_bins = np.arange(wind_speed_range[0], wind_speed_range[1] + wind_speed_step, wind_speed_step)
    wind_speed_bins = np.arange(filtered_data['风速'].min(
    ), filtered_data['风速'].max() + wind_speed_step, wind_speed_step)
    wind_direct_bins = np.arange(filtered_data['风向'].min(
    ), filtered_data['风向'].max() + wind_direct_step, wind_direct_step)
    cabin_temp_bins = np.arange(filtered_data['舱内温度'].min(
    ), filtered_data['舱内温度'].max() + cabin_temp_step, cabin_temp_step)

    # 按风速和舱内温度分组
    grouped = filtered_data.groupby([pd.cut(filtered_data['风速'], wind_speed_bins),
                                     pd.cut(
                                         filtered_data['风向'], wind_direct_bins),
                                     pd.cut(filtered_data['舱内温度'], cabin_temp_bins)])

    # 从每个分组中找出有功功率最大的数据记录
    # max_powegrouped_power_maxr_rows = grouped.apply(lambda x: x[x['有功功率'] == x['有功功率'].max()])
    grouped_power_max = grouped.max()['有功功率'].reset_index()

    # 计算每个分组的偏航误差平均值
    yaw_error_avg = grouped['偏航误差'].mean().dropna()

    return grouped, grouped_power_max, yaw_error_avg

def calculate_yaw_miss2(data, wind_speed_step, wind_direct_step, cabin_temp_step):       
    # 按风速和舱内温度分组
    data['风向分组'] = np.floor(data['风向'] / wind_direct_step) * wind_direct_step
    data['风速分组'] = np.floor(data['风速'] / wind_speed_step) * wind_speed_step
    data['舱内温度分组'] = np.floor(data['舱内温度'] / cabin_temp_step) * cabin_temp_step

    # 计算每个分组的偏航误差平均值
    grouped = data.groupby(['风向分组', '风速分组', '舱内温度分组'])

    # 从每个分组中找出有功功率最大的行
    # grouped.apply(lambda x: x[x['有功功率'] == x['有功功率'].max()])
    idx = grouped["有功功率"].idxmax()
    max_power_rows = data.loc[idx]
    # yaw_error_avg = grouped['偏航误差'].mean().dropna()

    return grouped, max_power_rows, data


def calculate_angle_deviations(array1, array2):
    """
    计算两个相同长度角度数组中两两对应角度值的偏差。
    结果限制在-90°到+90°之间，并保留两位小数。

    参数:
    array1 (list): 第一个角度数组
    array2 (list): 第二个角度数组

    返回:
    list: 两两对应角度的偏差列表
    """
    deviations = []
    for angle1, angle2 in zip(array1, array2):
        # 计算原始偏差
        deviation = angle1 - angle2

        # 调整偏差，使其位于-180°到+180°范围内
        if deviation == 0.0:
            deviation = 0.0
        else:
            deviation = (deviation + 180) % 360 - 180

        # 将偏差限制在-90°到+90°范围内
        if deviation > 90:
            deviation -= 180
        elif deviation < -90:
            deviation += 180

        # 保留两位小数
        deviations.append(round(deviation, 2))

    return deviations

def recalculation(data): 
    # 计算风向、机舱角度 分别和360取模运算
    data['风向'] = np.abs(data['风向'] % 360)
    data['机舱角度'] = np.abs(data['机舱角度'] % 360)
    # 计算偏航误差
    # data['偏航误差'] =np.where(data['风向'] < data['机舱角度'],
    #                               (data['风向'] - data['机舱角度'])%360,
    #                               -(data['风向'] - data['机舱角度']) )

    data['偏航误差'] = calculate_angle_deviations(
        data['风向'], data['机舱角度'])
    
    return data

def load_data(file_path, encoding='utf-8'):
    # sql_template =PdbcSqlTemplate(dataSource=PooledDB(
    #         creator=pymysql,
    #         maxconnections=6,
    #         mincached=2,
    #         maxcached=5,
    #         blocking=True,
    #         maxusage=None,
    #         setsession=[],
    #         ping=0,
    #         host="192.168.50.241",
    #         user="root",
    #         password="123456",
    #         database="data2023",
    #         cursorclass=pymysql.cursors.DictCursor,
    #         charset='utf8'
    #     ))
    
    # dic= sql_template.select_list(sql="show tables;")

    # print(dic)

    # Load the data
    data = pd.read_csv(file_path, encoding=encoding)

    return data


def filter_data(data, wind_speed_range, active_power_range):
    # Filter criteria:
    # 1. "风机状态" (wind turbine status) must be 5
    # 2. No empty (null) values in the row
    filtered_data = data[(data['风机状态'] == 5)
                         & (data["风速"] >= wind_speed_range[0])
                         & (data["风速"] <= wind_speed_range[1])
                         & (data['有功功率'] > 20)
                         & (data['有功功率'] >= active_power_range[0])
                         & (data['有功功率'] <= active_power_range[1])].dropna()

    filtered_data["时间"] = pd.to_datetime(filtered_data['时间'])

    """
    # 将时间列转换为13位长整型（Unix时间戳）
    # Excel 13位长整型转为时间 公式： =TEXT((K2/1000+8*3600)/86400+70*365+19,"yyyy-mm-dd hh:mm:ss")
    filtered_data["时间Unix"] = filtered_data['时间'].astype('int64') // 10**6
    """
    # 将时间列转换为10位长整型（Unix时间戳）
    # Excel 10位长整型转为时间 公式： =TEXT((A1+8*3600)/86400+70*365+19,"yyyy-mm-dd hh:mm:ss")
    filtered_data["时间Unix"] = filtered_data['时间'].astype('int64') // 10**9

    return filtered_data


def read_config(config_file):
    """
    Reads configuration settings from an INI file.

    Parameters:
    config_file (str): Path to the INI configuration file.

    Returns:
    dict: Configuration settings.
    """
    config = configparser.ConfigParser()
    config.read(config_file)
    return config['DEFAULT']


# Example usage
# Replace with your file path
file_path = 'E:\WorkSpace\Resource\Manage\项目\大唐\风电机组功率曲线异常检测分析服务项目\收资\data_process\data_second_scada_test.csv'

def main():
    config_settings = read_config('config.ini')

    encoding = config_settings.get('Encoding', 'utf-8')

    wind_direct_step = float(config_settings.get('WindDirectStep', '1'))
    wind_speed_range = [float(x) for x in config_settings.get(
        'WindSpeedRange', '3, 20').split(',')]
    wind_speed_step = float(config_settings.get('WindSpeedStep', '0.05'))
    cabin_temp_step = float(config_settings.get('CabinTempStep', '1'))
    active_power_range = [float(x) for x in config_settings.get(
        'ActivePowerRange', '21, 1500').split(',')]

    raw_data = load_data(file_path, encoding)
    data = filter_data(raw_data, wind_speed_range, active_power_range)
    data=recalculation(data)

    # grouped,max_power_rows,yaw_error_avg = calculate_yaw_miss(data, wind_speed_range,wind_speed_step, wind_direct_step,cabin_temp_step)
    grouped, grouped_power_max, data = calculate_yaw_miss2(
        data, wind_speed_step, wind_direct_step, cabin_temp_step)
    
    yaw_miss_list=[]
           
    for index,row in data.iterrows():
        max_record = grouped_power_max[(grouped_power_max['风向分组'] == row['风向分组']) &
                                    (grouped_power_max['风速分组'] == row['风速分组']) &
                                    (grouped_power_max['舱内温度分组'] == row['舱内温度分组'])]
        
        data.at[index,'偏航误差']=row['偏航误差']-max_record['偏航误差']

    print(" grouped by 风速、风向、舱内温度 ")
        # 计算每个分组的偏航误差平均值
    grouped = data.groupby(['风向分组', '风速分组', '舱内温度分组'])
    # 将分组对象转换回 DataFrame
    grouped_df = grouped.apply(lambda x: x)
    # grouped_df=grouped.reset_index(drop=True)
    grouped_df.to_csv("./output/yaw_miss_grouped.csv",
                    index=False, encoding="ansi")

    print(" max power rows ")
    # print(grouped_power_max)
    grouped_power_max.to_csv("./output/yaw_miss_max_power_rows.csv",
                        index=False, encoding="ansi")

    print("number pairs of yaw miss avg :  " )
    # 计算正数的平均值
    positive_mean = data[data['偏航误差'] > 0]['偏航误差'].mean()

    # 计算负数的平均值
    negative_mean = data[data['偏航误差'] < 0]['偏航误差'].mean()
    # 创建数对
    limits = (negative_mean, positive_mean)
    print("下限和上限的数对:", limits)

if __name__ == "__main__":
    main()