|
|
@@ -79,7 +79,7 @@ class StatisticsAndSaveTmpFormalFile(object):
|
|
|
|
|
|
# 删除 有功功率 和 风速均为空的情况
|
|
|
df.dropna(subset=['active_power', 'wind_velocity'], how='any', inplace=True)
|
|
|
- trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
|
|
|
+ trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速有空的情况后:", df.shape)
|
|
|
df.replace(np.nan, -999999999, inplace=True)
|
|
|
number_cols = df.select_dtypes(include=['number']).columns.tolist()
|
|
|
for col in df.columns:
|
|
|
@@ -93,15 +93,16 @@ class StatisticsAndSaveTmpFormalFile(object):
|
|
|
|
|
|
df.drop_duplicates(['wind_turbine_number', 'time_stamp'], keep='first', inplace=True)
|
|
|
|
|
|
+ df['time_stamp'] = df['time_stamp'].str.strip()
|
|
|
df['time_stamp'] = pd.to_datetime(df['time_stamp'], errors="coerce")
|
|
|
df.dropna(subset=['time_stamp'], inplace=True)
|
|
|
df.sort_values(by='time_stamp', inplace=True)
|
|
|
df = df[[i for i in self.trans_param.cols_tran.keys() if i in df.columns]]
|
|
|
|
|
|
# 删除每行有空值的行(2025-3-24)
|
|
|
- origin_count = df.shape[0]
|
|
|
- df = df.dropna()
|
|
|
- trans_print(f'原始数据量:{origin_count},去除na后数据量:{df.shape[0]}')
|
|
|
+ # origin_count = df.shape[0]
|
|
|
+ # df = df.dropna()
|
|
|
+ # trans_print(f'原始数据量:{origin_count},去除na后数据量:{df.shape[0]}')
|
|
|
|
|
|
# 如果秒级有可能合并到分钟级
|
|
|
# TODO add 秒转分钟
|
|
|
@@ -123,61 +124,68 @@ class StatisticsAndSaveTmpFormalFile(object):
|
|
|
rated_power_and_cutout_speed_tuple = read_conf(self.rated_power_and_cutout_speed_map, str(wind_col_name))
|
|
|
if rated_power_and_cutout_speed_tuple is None:
|
|
|
rated_power_and_cutout_speed_tuple = (None, None)
|
|
|
-
|
|
|
- trans_print('过滤数据前数据大小', df.shape)
|
|
|
- filter_valid_data = FilterValidData(df, rated_power_and_cutout_speed_tuple[0])
|
|
|
- df = filter_valid_data.run()
|
|
|
- trans_print('过滤数据后数据大小', df.shape)
|
|
|
-
|
|
|
- # 如果有需要处理的,先进行代码处理,在进行打标签
|
|
|
- # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
|
|
|
- # if exec_code:
|
|
|
- # if 'import ' in exec_code:
|
|
|
- # raise Exception("执行代码不支持导入包")
|
|
|
- # exec(exec_code)
|
|
|
-
|
|
|
- if power_df.shape[0] == 0:
|
|
|
- df.loc[:, 'lab'] = -1
|
|
|
+ trans_print(origin_wind_name, '未从平台匹配到额定功率')
|
|
|
else:
|
|
|
- class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
|
|
|
- rated_power=rated_power_and_cutout_speed_tuple[0],
|
|
|
- cut_out_speed=rated_power_and_cutout_speed_tuple[1])
|
|
|
- df = class_identifiler.run()
|
|
|
+ trans_print(origin_wind_name, '过滤数据前数据大小', df.shape)
|
|
|
+ trans_print(origin_wind_name, '额定功率', rated_power_and_cutout_speed_tuple[0])
|
|
|
+ # trans_print(origin_wind_name, '\n', df.head(10))
|
|
|
+ filter_valid_data = FilterValidData(df, rated_power_and_cutout_speed_tuple[0])
|
|
|
+ try:
|
|
|
+ df = filter_valid_data.run()
|
|
|
+ except:
|
|
|
+ trans_print(origin_wind_name, '过滤数据异常', filename)
|
|
|
+ raise
|
|
|
+ trans_print(origin_wind_name, '过滤数据后数据大小', df.shape)
|
|
|
+
|
|
|
+ # 如果有需要处理的,先进行代码处理,在进行打标签
|
|
|
+ # exec_code = get_trans_exec_code(self.paths_and_table.exec_id, self.paths_and_table.read_type)
|
|
|
+ # if exec_code:
|
|
|
+ # if 'import ' in exec_code:
|
|
|
+ # raise Exception("执行代码不支持导入包")
|
|
|
+ # exec(exec_code)
|
|
|
+
|
|
|
+ if power_df.shape[0] == 0:
|
|
|
+ df.loc[:, 'lab'] = -1
|
|
|
+ else:
|
|
|
+ class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
|
|
|
+ rated_power=rated_power_and_cutout_speed_tuple[0],
|
|
|
+ cut_out_speed=rated_power_and_cutout_speed_tuple[1])
|
|
|
+ df = class_identifiler.run()
|
|
|
|
|
|
- del power_df
|
|
|
+ del power_df
|
|
|
|
|
|
- df['year'] = df['time_stamp'].dt.year
|
|
|
- df['month'] = df['time_stamp'].dt.month
|
|
|
- df['day'] = df['time_stamp'].dt.day
|
|
|
- df['time_stamp'] = df['time_stamp'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
|
|
|
+ df['year'] = df['time_stamp'].dt.year
|
|
|
+ df['month'] = df['time_stamp'].dt.month
|
|
|
+ df['day'] = df['time_stamp'].dt.day
|
|
|
+ df['time_stamp'] = df['time_stamp'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
|
|
|
|
|
|
- df['wind_turbine_name'] = str(origin_wind_name)
|
|
|
- df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
|
|
|
- cols = df.columns
|
|
|
+ df['wind_turbine_name'] = str(origin_wind_name)
|
|
|
+ df['year_month'] = df[['year', 'month']].apply(lambda x: str(x['year']) + str(x['month']).zfill(2), axis=1)
|
|
|
+ cols = df.columns
|
|
|
|
|
|
- if self.paths_and_table.read_type == 'second':
|
|
|
- type_col = 'year_month'
|
|
|
- else:
|
|
|
- type_col = 'year'
|
|
|
-
|
|
|
- date_strs = df[type_col].unique().tolist()
|
|
|
- for date_str in date_strs:
|
|
|
- save_path = path.join(self.paths_and_table.get_tmp_formal_path(), str(date_str),
|
|
|
- str(origin_wind_name) + '.csv')
|
|
|
- create_file_path(save_path, is_file_path=True)
|
|
|
- now_df = df[df[type_col] == date_str][cols]
|
|
|
- if self.paths_and_table.save_zip:
|
|
|
- save_path = save_path + '.gz'
|
|
|
- now_df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
|
|
|
+ if self.paths_and_table.read_type == 'second':
|
|
|
+ type_col = 'year_month'
|
|
|
else:
|
|
|
- now_df.to_csv(save_path, index=False, encoding='utf-8')
|
|
|
+ type_col = 'year'
|
|
|
+
|
|
|
+ date_strs = df[type_col].unique().tolist()
|
|
|
+ for date_str in date_strs:
|
|
|
+ save_path = path.join(self.paths_and_table.get_tmp_formal_path(), str(date_str),
|
|
|
+ str(origin_wind_name) + '.csv')
|
|
|
+ create_file_path(save_path, is_file_path=True)
|
|
|
+ now_df = df[df[type_col] == date_str][cols]
|
|
|
+ if self.paths_and_table.save_zip:
|
|
|
+ save_path = save_path + '.gz'
|
|
|
+ now_df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
|
|
|
+ else:
|
|
|
+ now_df.to_csv(save_path, index=False, encoding='utf-8')
|
|
|
|
|
|
- del now_df
|
|
|
+ del now_df
|
|
|
|
|
|
- self.set_statistics_data(df)
|
|
|
+ self.set_statistics_data(df)
|
|
|
|
|
|
- del df
|
|
|
- trans_print("保存" + str(wind_col_name) + "成功")
|
|
|
+ del df
|
|
|
+ trans_print("保存" + str(wind_col_name) + "成功")
|
|
|
|
|
|
def mutiprocessing_to_save_file(self):
|
|
|
# 开始保存到正式文件
|