1 month ago · b8bc5b367e
--- a/conf/etl_config_dev.yaml
+++ b/conf/etl_config_dev.yaml
@@ -1,5 +1,5 @@
 
				 plt:
			
 
				-  database: energy
			
 
				+  database: energy_ty
			
 
				   host: 192.168.50.233
			
 
				   password: admin123456
			
 
				   port: 3306
			
--- a/etl/wind_power/min_sec/MinSecTrans.py
+++ b/etl/wind_power/min_sec/MinSecTrans.py
@@ -47,27 +47,24 @@ class MinSecTrans(BaseDataTrans):
 
				             boolean_sec_to_min = read_conf(conf_map, 'boolean_sec_to_min', 0)
			
 
				             boolean_sec_to_min = int(boolean_sec_to_min) == 1
			
 
				 
			
 
				-            # self.boolean_sec_to_min = int(data['boolean_sec_to_min']) == 1 if 'boolean_sec_to_min' in data.keys() else False
			
 
				-
			
 
				             cols_trans_all = dict()
			
 
				             trans_cols = ['wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
			
 
				                           'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
			
 
				                           'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
			
 
				                           'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
			
 
				-                          'generatornon_drive_end_bearing_temperature', 'wind_turbine_status',
			
 
				-                          'wind_turbine_status2',
			
 
				+                          'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
			
 
				                           'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
			
 
				                           'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque',
			
 
				-                          'clockwise_yaw_count',
			
 
				-                          'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
			
 
				+                          'clockwise_yaw_count', 'counterclockwise_yaw_count', 'unusable', 'power_curve_available',
			
 
				                           'required_gearbox_speed',
			
 
				                           'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
			
 
				-                          'gearbox_high_speed_shaft_bearing_temperature',
			
 
				+                          'main_bearing_temperature_2', 'gearbox_high_speed_shaft_bearing_temperature',
			
 
				                           'gearboxmedium_speed_shaftbearing_temperature',
			
 
				                           'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
			
 
				                           'generator_winding2_temperature', 'generator_winding3_temperature',
			
 
				-                          'turbulence_intensity', 'param1',
			
 
				-                          'param2', 'param3', 'param4', 'param5', 'param6', 'param7', 'param8', 'param9', 'param10']
			
 
				+                          'turbulence_intensity', 'grid_a_phase_current', 'grid_b_phase_current',
			
 
				+                          'grid_c_phase_current', 'reactive_power', 'param1', 'param2', 'param3', 'param4', 'param5',
			
 
				+                          'param6', 'param7', 'param8', 'param9', 'param10']
			
 
				 
			
 
				             for col in trans_cols:
			
 
				                 cols_trans_all[col] = read_conf(conf_map, col, '')
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,4 @@ PyYAML~=6.0.1
 
				 chardet~=3.0.4
			
 
				 psutil~=6.0.0
			
 
				 openpyxl ~= 3.1.4
			
 
				-xlrd
			
 
				+xlrd ~=2.0.1
			
--- a/service/trans_service.py
+++ b/service/trans_service.py
@@ -108,6 +108,7 @@ def creat_min_sec_table(table_name, trans_type, use_tidb=False):
 
				             `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
			
 
				             `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
			
 
				             `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
			
 
				+            `main_bearing_temperature_2` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度2',
			
 
				             `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
			
 
				             `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
			
 
				             `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
			
@@ -117,6 +118,10 @@ def creat_min_sec_table(table_name, trans_type, use_tidb=False):
 
				             `wind_turbine_status` DOUBLE DEFAULT NULL COMMENT '风机状态1',
			
 
				             `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
			
 
				             `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
			
 
				+            `grid_a_phase_current` DOUBLE DEFAULT NULL COMMENT '电网A相电流',
			
 
				+            `grid_b_phase_current` DOUBLE DEFAULT NULL COMMENT '电网B相电流',
			
 
				+            `grid_c_phase_current` DOUBLE DEFAULT NULL COMMENT '电网C相电流',
			
 
				+            `reactive_power`  DOUBLE DEFAULT NULL COMMENT '无功功率',
			
 
				             `lab` int DEFAULT NULL COMMENT '-1:停机 0:好点  1:欠发功率点；2:超发功率点；3:额定风速以上的超发功率点 4: 限电',
			
 
				             `year` INT (4) DEFAULT NULL COMMENT '年',
			
 
				             `month` INT (2) DEFAULT NULL COMMENT '月',
			
--- a/utils/tmp_util/__init__.py
+++ b/utils/tmp_util/__init__.py
--- a/utils/tmp_util/神木_完整度_10分.py
+++ b/utils/tmp_util/神木_完整度_10分.py
@@ -0,0 +1,87 @@
 
				+# coding=utf-8
			
 
				+
			
 
				+import datetime
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
			
 
				+
			
 
				+import pandas as pd
			
 
				+
			
 
				+from utils.file.trans_methods import read_file_to_df, read_excel_files
			
 
				+
			
 
				+
			
 
				+def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
			
 
				+    """
			
 
				+    获取俩个时间之间的个数
			
 
				+    :return: 查询时间间隔
			
 
				+    """
			
 
				+    delta = end_time - start_time
			
 
				+    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
			
 
				+
			
 
				+    return abs(int(total_seconds / time_space)) + 1
			
 
				+
			
 
				+
			
 
				+def save_percent(value, save_decimal=7):
			
 
				+    return round(value, save_decimal) * 100
			
 
				+
			
 
				+
			
 
				+def read_and_select(file_path, read_cols_bak):
			
 
				+    try:
			
 
				+        read_cols = read_cols_bak[0:len(read_cols_bak)]
			
 
				+        result_df = pd.DataFrame()
			
 
				+        df = read_file_to_df(file_path, read_cols=read_cols)
			
 
				+        wind_name = df['名称'].values[0]
			
 
				+        df['时间'] = pd.to_datetime(df['时间'])
			
 
				+        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 600)
			
 
				+        repeat_time_count = df.shape[0] - len(df['时间'].unique())
			
 
				+        print(wind_name, count, repeat_time_count)
			
 
				+        result_df['风机号'] = [wind_name]
			
 
				+        result_df['重复率'] = [save_percent(repeat_time_count / count)]
			
 
				+        result_df['重复次数'] = [repeat_time_count]
			
 
				+        result_df['总记录数'] = [count]
			
 
				+
			
 
				+        read_cols.remove('名称')
			
 
				+        for read_col in read_cols:
			
 
				+
			
 
				+            if read_col != '时间':
			
 
				+                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
			
 
				+            else:
			
 
				+                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
			
 
				+
			
 
				+        group_df = df.groupby(by=['名称']).count()
			
 
				+        group_df.reset_index(inplace=True)
			
 
				+        count_df = pd.DataFrame(group_df)
			
 
				+        total_count = count_df[read_cols].values[0].sum()
			
 
				+        print(wind_name, total_count, count * len(read_cols))
			
 
				+        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
			
 
				+        result_df['缺失数值'] = [
			
 
				+            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
			
 
				+        del group_df
			
 
				+
			
 
				+        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
			
 
				+        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
			
 
				+
			
 
				+        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
			
 
				+    except Exception as e:
			
 
				+        print(file_path)
			
 
				+        raise e
			
 
				+
			
 
				+    return result_df
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    read_cols_str = '名称,时间,发电机有功功率,发电机转速,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机定子U相线圈温度,发电机定子V相线圈温度,发电机定子W相线圈温度,实际扭矩,设定扭矩,仪表盘风速,舱内温度,控制柜内温度,舱外温度,风向,风速,机舱风向夹角,1#桨叶片角度,1#桨设定角度,2#桨叶片角度,2#桨设定角度,3#桨叶片角度,3#桨设定角度,1#桨电机温度,2#桨电机温度,3#桨电机温度,轮毂内温度,齿轮箱油泵吸油口油压,齿轮箱分配器位置油压,偏航液压刹车系统蓄能罐压力,主轴转速,齿轮箱油路入口温度,齿轮箱中间轴驱动端轴承温度,齿轮箱中间轴非驱动端轴承温度,齿轮箱油池温度,主轴承外圈温度,可利用率,机舱位置,总扭缆角度'
			
 
				+    read_cols = [i for i in read_cols_str.split(",") if i]
			
 
				+    read_dir = r'D:\data\tmp_data\10分'
			
 
				+
			
 
				+    files = read_excel_files(read_dir)
			
 
				+
			
 
				+    with multiprocessing.Pool(4) as pool:
			
 
				+        dfs = pool.starmap(read_and_select, [(os.path.join(read_dir, i), read_cols) for i in files])
			
 
				+
			
 
				+    df = pd.concat(dfs, ignore_index=True)
			
 
				+    df.sort_values(by=['风机号'], inplace=True)
			
 
				+
			
 
				+    df.to_csv("神木风电场-10分钟.csv", encoding='utf8', index=False)
			
--- a/utils/tmp_util/神木_完整度_1分.py
+++ b/utils/tmp_util/神木_完整度_1分.py
@@ -0,0 +1,90 @@
 
				+# coding=utf-8
			
 
				+
			
 
				+import datetime
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.insert(0, os.path.abspath(__file__).split("utils")[0])
			
 
				+
			
 
				+import pandas as pd
			
 
				+
			
 
				+from utils.file.trans_methods import read_file_to_df, read_excel_files
			
 
				+
			
 
				+
			
 
				+def get_time_space_count(start_time: datetime.datetime, end_time: datetime.datetime, time_space=1):
			
 
				+    """
			
 
				+    获取俩个时间之间的个数
			
 
				+    :return: 查询时间间隔
			
 
				+    """
			
 
				+    delta = end_time - start_time
			
 
				+    total_seconds = delta.days * 24 * 60 * 60 + delta.seconds
			
 
				+
			
 
				+    return abs(int(total_seconds / time_space)) + 1
			
 
				+
			
 
				+
			
 
				+def save_percent(value, save_decimal=7):
			
 
				+    return round(value, save_decimal) * 100
			
 
				+
			
 
				+
			
 
				+def read_and_select(file_path):
			
 
				+    try:
			
 
				+        result_df = pd.DataFrame()
			
 
				+        df = read_file_to_df(file_path)
			
 
				+        read_cols_bak = df.columns.tolist()
			
 
				+
			
 
				+        wind_name = df['名称'].values[0]
			
 
				+        df['时间'] = pd.to_datetime(df['时间'])
			
 
				+        count = get_time_space_count(df['时间'].min(), df['时间'].max(), 60)
			
 
				+        repeat_time_count = df.shape[0] - len(df['时间'].unique())
			
 
				+        print(wind_name, count, repeat_time_count)
			
 
				+        result_df['风机号'] = [wind_name]
			
 
				+        result_df['重复率'] = [save_percent(repeat_time_count / count)]
			
 
				+        result_df['重复次数'] = [repeat_time_count]
			
 
				+        result_df['总记录数'] = [count]
			
 
				+
			
 
				+        read_cols_bak.remove('名称')
			
 
				+        read_cols = list()
			
 
				+        for read_col in read_cols_bak:
			
 
				+
			
 
				+            if read_col == '时间':
			
 
				+                df[read_col] = pd.to_datetime(df[read_col], errors='coerce')
			
 
				+                read_cols.append(read_col)
			
 
				+            else:
			
 
				+                df[read_col] = pd.to_numeric(df[read_col], errors='coerce')
			
 
				+                if not df[read_col].isnull().all():
			
 
				+                    read_cols.append(read_col)
			
 
				+
			
 
				+        group_df = df.groupby(by=['名称']).count()
			
 
				+        group_df.reset_index(inplace=True)
			
 
				+        count_df = pd.DataFrame(group_df)
			
 
				+        total_count = count_df[read_cols].values[0].sum()
			
 
				+        print(wind_name, total_count, count * len(read_cols))
			
 
				+        result_df['平均缺失率,单位%'] = [save_percent(1 - total_count / (count * len(read_cols)))]
			
 
				+        result_df['缺失数值'] = [
			
 
				+            '-'.join([f'{col_name}_{str(count - i)}' for col_name, i in zip(read_cols, count_df[read_cols].values[0])])]
			
 
				+        del group_df
			
 
				+
			
 
				+        error_fengsu_count = df.query("(风速 < 0) | (风速 > 80)").shape[0]
			
 
				+        error_yougong_gonglv = df.query("(发电机有功功率 < -200) | (发电机有功功率 > 2500)").shape[0]
			
 
				+
			
 
				+        result_df['平均异常率'] = [save_percent((error_fengsu_count + error_yougong_gonglv) / (2 * count))]
			
 
				+    except Exception as e:
			
 
				+        print(file_path)
			
 
				+        raise e
			
 
				+
			
 
				+    return result_df
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    read_dir = r'D:\data\tmp_data\1分\远景1min'
			
 
				+
			
 
				+    files = read_excel_files(read_dir)
			
 
				+
			
 
				+    with multiprocessing.Pool(4) as pool:
			
 
				+        dfs = pool.map(read_and_select, files)
			
 
				+
			
 
				+    df = pd.concat(dfs, ignore_index=True)
			
 
				+    df.sort_values(by=['风机号'], inplace=True)
			
 
				+
			
 
				+    df.to_csv("神木风电场-1分钟.csv", encoding='utf8', index=False)
			
--- a/utils/tmp_util/获取台账所有wind表信息.py
+++ b/utils/tmp_util/获取台账所有wind表信息.py
@@ -0,0 +1,18 @@
 
				+import sys
			
 
				+from os import path, environ
			
 
				+
			
 
				+env = 'dev'
			
 
				+if len(sys.argv) >= 2:
			
 
				+    env = sys.argv[1]
			
 
				+
			
 
				+conf_path = path.abspath(__file__).split("energy-data-trans")[0] + f"/energy-data-trans/conf/etl_config_{env}.yaml"
			
 
				+environ['ETL_CONF'] = conf_path
			
 
				+environ['env'] = env
			
 
				+
			
 
				+from service.common_connect import plt
			
 
				+
			
 
				+tables = 'wind_company,wind_engine_group,wind_engine_mill,wind_exception_count,wind_field,wind_field_batch,wind_field_contract,wind_field_resource,wind_relation'
			
 
				+
			
 
				+for table in tables.split(','):
			
 
				+    df = plt.read_sql_to_df(f"select * from {table}")
			
 
				+    df.to_csv(table + '.csv', encoding='utf8', index=False)