zhzn
/
energy-data-trans


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							import datetime
import multiprocessing
import os
import sys

sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])

import pandas as pd

from utils.file.trans_methods import read_file_to_df


def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
    """
    获取俩个时间之间的个数
    :return: 查询时间间隔
    """
    delta = end_time - start_time
    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds

    return abs(int(total_seconds / time_space)) + 1


def save_percent(value, save_decimal=7):
    return round(value, save_decimal) * 100


def read_and_select(df, wind_name, read_cols):
    result_df = pd.DataFrame()
    # wind_name = os.path.basename(file_path).split('.')[0]
    result_df['风机号'] = [wind_name]
    # df = df.query("(Time>='2024-06-01 00:00:00') & (Time<'2024-12-01 00:00:00')")
    count = get_time_space_count(df['Time'].min(), df['Time'].max(), 1)
    print(df['Time'].min(), df['Time'].max(), count)
    repeat_time_count = df.shape[0] - len(df['Time'].unique())
    print(wind_name, count, repeat_time_count)
    result_df['重复率'] = [save_percent(repeat_time_count / count)]
    result_df['重复次数'] = [repeat_time_count]
    result_df['总记录数'] = [count]

    for read_col in read_cols:

        if read_col != 'Time':
            df[read_col] = pd.to_numeric(df[read_col], errors='coerce')

    group_df = df.groupby(by=['风机号']).count()
    group_df.reset_index(inplace=True)
    count_df = pd.DataFrame(group_df)
    total_count = count_df[read_cols].values[0].sum()
    print(wind_name, total_count, count * len(read_cols))
    result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
    result_df['缺失数值'] = [
        '-'.join(
            [str(read_cols[index]) + ':' + str(count - i) for index, i in enumerate(count_df[read_cols].values[0])])]
    del group_df

    fengsu_count = 0
    fengsu_cols = [i for i in read_cols if i.find('风速') > -1]
    fengsu_str = ''
    for col in fengsu_cols:
        now_count = df[(df[col] < 0) | (df[col] > 80)].shape[0]
        fengsu_count = fengsu_count + now_count
        fengsu_str = fengsu_str + ',' + col + ':' + str(fengsu_count)
    result_df['风速异常'] = [fengsu_str]

    gonglv_cols = ['有功功率(kW)', '风机出口有功功率(kW)']
    gonglv_count = 0
    gonglv_str = ''
    for col in gonglv_cols:
        now_count = df[(df[col] < -200) | (df[col] > 3000)].shape[0]
        gonglv_count = gonglv_count + now_count
        gonglv_str = gonglv_str + ',' + col + ':' + str(gonglv_count)
    result_df['功率异常'] = [gonglv_str]

    result_df['平均异常率'] = [
        save_percent((fengsu_count + fengsu_count) / ((len(fengsu_cols) + len(gonglv_cols)) * count))]

    return result_df


if __name__ == '__main__':
    # read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
    #              '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速',
    #              '瞬时风速',
    #              '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
    #              '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
    #              '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机',
    #              '启动',
    #              '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1',
    #              '运行发电机冷却风扇2',
    #              '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']

    read_dir = r'D:\data\tmp\sec.csv'
    df = read_file_to_df(read_dir)
    print(df.columns)
    del df['Unnamed: 79']
    df.rename(columns={'Unnamed: 0': 'Time'}, inplace=True)
    print(df.columns)
    df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
    df['风机号'] = pd.to_numeric(df['风机号'], errors='coerce')
    df = df[df['风机号'].isin([i for i in range(1, 6)])]
    read_cols = list(df.columns)
    read_cols.remove('Time')
    read_cols.remove('风机号')

    wind_names = df['风机号'].unique()
    with multiprocessing.Pool(5) as pool:
        dfs = pool.starmap(read_and_select,
                           [(df[df['风机号'] == wind_name], wind_name, read_cols) for wind_name in wind_names])

    resu_df = pd.concat(dfs, ignore_index=True)
    print(resu_df.columns)
    resu_df.sort_values(by=['风机号'], inplace=True)
    resu_df.to_csv("太平里-1秒.csv", encoding='utf8', index=False)