|
@@ -44,37 +44,39 @@ class ReadAndSaveTmp(object):
|
|
|
|
|
|
def save_merge_data(self, file_path):
|
|
|
df = self.read_excel_to_df(file_path)
|
|
|
-
|
|
|
if self.trans_param.wind_name_exec:
|
|
|
if valid_eval(self.trans_param.wind_name_exec):
|
|
|
exec_str = f"df['wind_turbine_number'].apply(lambda wind_name: {self.trans_param.wind_name_exec} )"
|
|
|
df['wind_turbine_number'] = eval(exec_str)
|
|
|
|
|
|
- names = set(df['wind_turbine_number'].values)
|
|
|
+ df = self.trans_df_cols(df)
|
|
|
+
|
|
|
+ wind_names = set(df['wind_turbine_number'].values)
|
|
|
cols = list(df.columns)
|
|
|
cols.sort()
|
|
|
- csv_name = base64.b64encode('-'.join(cols).encode('utf8')).decode('utf-8') + ".csv"
|
|
|
- for name in names:
|
|
|
- exist_name = name + '-' + csv_name
|
|
|
- merge_path = self.pathsAndTable.get_merge_tmp_path(name)
|
|
|
- create_file_path(merge_path)
|
|
|
-
|
|
|
- with self.lock:
|
|
|
- if exist_name in self.exist_wind_names:
|
|
|
- contains_name = True
|
|
|
- else:
|
|
|
- contains_name = False
|
|
|
- self.exist_wind_names.append(exist_name)
|
|
|
- save_path = path.join(merge_path, csv_name)
|
|
|
- now_df = df[df['wind_turbine_number'] == name]
|
|
|
- if contains_name:
|
|
|
- now_df.to_csv(save_path, index=False, encoding='utf-8', mode='a',
|
|
|
- header=False)
|
|
|
- else:
|
|
|
- now_df.to_csv(save_path, index=False, encoding='utf-8')
|
|
|
-
|
|
|
- def df_save_to_tmp_file(self, df=pd.DataFrame()):
|
|
|
+ for wind_name in wind_names:
|
|
|
+ for col in df.columns:
|
|
|
+ if col not in ['wind_turbine_number', 'time_stamp']:
|
|
|
+ csv_name = str(col) + ".csv"
|
|
|
+ exist_name = wind_name + '-' + csv_name
|
|
|
+ merge_path = self.pathsAndTable.get_merge_tmp_path(wind_name)
|
|
|
+ create_file_path(merge_path)
|
|
|
+
|
|
|
+ with self.lock:
|
|
|
+ if exist_name in self.exist_wind_names:
|
|
|
+ contains_name = True
|
|
|
+ else:
|
|
|
+ contains_name = False
|
|
|
+ self.exist_wind_names.append(exist_name)
|
|
|
+ save_path = path.join(merge_path, csv_name)
|
|
|
+ now_df = df[df['wind_turbine_number'] == wind_name][['time_stamp', col]]
|
|
|
+ if contains_name:
|
|
|
+ now_df.to_csv(save_path, index=False, encoding='utf-8', mode='a',
|
|
|
+ header=False)
|
|
|
+ else:
|
|
|
+ now_df.to_csv(save_path, index=False, encoding='utf-8')
|
|
|
|
|
|
+ def trans_df_cols(self, df):
|
|
|
if self.trans_param.is_vertical_table:
|
|
|
pass
|
|
|
else:
|
|
@@ -106,6 +108,12 @@ class ReadAndSaveTmp(object):
|
|
|
for key in del_keys:
|
|
|
df.drop(key, axis=1, inplace=True)
|
|
|
|
|
|
+ return df
|
|
|
+
|
|
|
+ def df_save_to_tmp_file(self, df=pd.DataFrame()):
|
|
|
+
|
|
|
+ df = self.trans_df_cols(df)
|
|
|
+
|
|
|
df = del_blank(df, ['wind_turbine_number'])
|
|
|
df = df[df['time_stamp'].isna() == False]
|
|
|
if self.trans_param.wind_name_exec and not self.trans_param.merge_columns:
|
|
@@ -118,7 +126,7 @@ class ReadAndSaveTmp(object):
|
|
|
def save_to_tmp_csv(self, df):
|
|
|
names = set(df['wind_turbine_number'].values)
|
|
|
if names:
|
|
|
- trans_print("开始保存", str(names), "到临时文件")
|
|
|
+ trans_print("开始保存", str(names), "到临时文件", df.shape)
|
|
|
|
|
|
for name in names:
|
|
|
self._save_to_tmp_csv_by_name(df[df['wind_turbine_number'] == name], name)
|
|
@@ -127,14 +135,17 @@ class ReadAndSaveTmp(object):
|
|
|
|
|
|
def merge_df(self, dir_path):
|
|
|
all_files = read_excel_files(dir_path)
|
|
|
+ wind_turbine_number = path.basename(dir_path)
|
|
|
df = pd.DataFrame()
|
|
|
for file in all_files:
|
|
|
now_df = read_file_to_df(file)
|
|
|
+ now_df['wind_turbine_number'] = wind_turbine_number
|
|
|
now_df.dropna(subset=['time_stamp'], inplace=True)
|
|
|
now_df.drop_duplicates(subset=['time_stamp'], inplace=True)
|
|
|
now_df.set_index(keys=['time_stamp', 'wind_turbine_number'], inplace=True)
|
|
|
df = pd.concat([df, now_df], axis=1)
|
|
|
df.reset_index(inplace=True)
|
|
|
+
|
|
|
self.df_save_to_tmp_file(df)
|
|
|
|
|
|
return df
|
|
@@ -162,7 +173,8 @@ class ReadAndSaveTmp(object):
|
|
|
dirs = [path.join(self.pathsAndTable.get_merge_tmp_path(), dir_name) for dir_name in
|
|
|
listdir(self.pathsAndTable.get_merge_tmp_path())]
|
|
|
dir_total_size = get_dir_size(dirs[0])
|
|
|
- split_count = max_file_size_get_max_cpu_count(dir_total_size)
|
|
|
+ # split_count = max_file_size_get_max_cpu_count(dir_total_size, memory_percent=1 / 12, cpu_percent=1 / 10)
|
|
|
+ split_count = 2
|
|
|
all_arrays = split_array(dirs, split_count)
|
|
|
for index, arr in enumerate(all_arrays):
|
|
|
try:
|