|
@@ -1,92 +1,106 @@
|
|
|
import multiprocessing
|
|
|
+import os
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
|
|
|
from utils.file.trans_methods import *
|
|
|
|
|
|
|
|
|
-def boolean_is_check_data(df_vas):
|
|
|
- # fault_list = ['Checked', 'Indeterminate', 'Unchecked']
|
|
|
- # for fault in fault_list:
|
|
|
- # if fault in df_vas:
|
|
|
- # return True
|
|
|
+def boolean_is_check_data(df_cols):
|
|
|
+ fault_list = ['快速停机', '故障名称', '故障代码', '故障停机', '人工停机', '风机紧急停机', '远方限功率运行状态']
|
|
|
+
|
|
|
+ df_cols = [str(i).split('_')[-1] for i in df_cols]
|
|
|
+ for fault in fault_list:
|
|
|
+ if fault in df_cols:
|
|
|
+ return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
-def compareTwoFolders(df1s, df2s):
|
|
|
- for is_falut in [False]:
|
|
|
+def compareTwoFolders(df1s, other_dfs):
|
|
|
+ for is_falut in [True, False]:
|
|
|
list1 = list()
|
|
|
- for df in df1s:
|
|
|
- tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
|
|
|
+ result_df = pd.DataFrame()
|
|
|
+ for df1 in df1s:
|
|
|
+ tmp_list = [str(i).split('_')[-1] for i in list(df1.columns) if i != 'sheet_name']
|
|
|
if is_falut:
|
|
|
- if boolean_is_check_data(df.values):
|
|
|
+ if boolean_is_check_data(df1.columns):
|
|
|
list1.extend(tmp_list)
|
|
|
else:
|
|
|
- if not boolean_is_check_data(df.values):
|
|
|
+ if not boolean_is_check_data(df1.columns):
|
|
|
list1.extend(tmp_list)
|
|
|
|
|
|
- list2 = list()
|
|
|
- for df in df2s:
|
|
|
- tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
|
|
|
- if is_falut:
|
|
|
- if boolean_is_check_data(df.values):
|
|
|
- list2.extend(tmp_list)
|
|
|
- else:
|
|
|
- if not boolean_is_check_data(df.values):
|
|
|
- list2.extend(tmp_list)
|
|
|
-
|
|
|
set1 = set(list1)
|
|
|
- set2 = set(list2)
|
|
|
|
|
|
list1 = list(set1)
|
|
|
- list2 = list(set2)
|
|
|
list1.sort()
|
|
|
- list2.sort()
|
|
|
+ list1.extend([''] * 20)
|
|
|
+
|
|
|
+ result_df['风机1'] = list1
|
|
|
|
|
|
- print(list1)
|
|
|
- print(list2)
|
|
|
+ for wind_name, dfs in other_dfs.items():
|
|
|
+ list2 = list()
|
|
|
+ for df in dfs:
|
|
|
+ tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
|
|
|
+ if is_falut:
|
|
|
+ if boolean_is_check_data(df.columns):
|
|
|
+ list2.extend(tmp_list)
|
|
|
+ else:
|
|
|
+ if not boolean_is_check_data(df.columns):
|
|
|
+ list2.extend(tmp_list)
|
|
|
|
|
|
- list3 = list(set1 - set2)
|
|
|
- list3.sort()
|
|
|
+ set2 = set(list2)
|
|
|
+ list2 = list(set2)
|
|
|
+ list2.sort()
|
|
|
|
|
|
- list4 = list(set2 - set1)
|
|
|
- list4.sort()
|
|
|
- print(list3)
|
|
|
- print(list4)
|
|
|
+ list3 = list(set1 - set2)
|
|
|
+ list3.sort()
|
|
|
|
|
|
- list5 = list(set1)
|
|
|
- list5.extend(list(set2))
|
|
|
- list5 = list(set(list5))
|
|
|
- list5.sort()
|
|
|
+ list4 = list(set2 - set1)
|
|
|
+ list4.sort()
|
|
|
+ print(list3)
|
|
|
+ print(list4)
|
|
|
|
|
|
- max_count = len(list5)
|
|
|
- list1.extend([''] * (max_count - len(list1)))
|
|
|
- list2.extend([''] * (max_count - len(list2)))
|
|
|
- list3.extend([''] * (max_count - len(list3)))
|
|
|
- list4.extend([''] * (max_count - len(list4)))
|
|
|
+ max_count = len(list1)
|
|
|
+ list1.extend([''] * (max_count - len(list1)))
|
|
|
+ list2.extend([''] * (max_count - len(list2)))
|
|
|
+ list3.extend([''] * (max_count - len(list3)))
|
|
|
+ list4.extend([''] * (max_count - len(list4)))
|
|
|
|
|
|
- file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv'
|
|
|
- with open(file_name, 'w', encoding='utf8') as f:
|
|
|
- f.write(",".join(["全部字段", "对方提供(3-25风机)", "自己获取(1-2风机)", "对方提供多的字段", "自己提供多的字段"]))
|
|
|
- f.write('\n')
|
|
|
- for e, a, b, c, d in zip(list5, list1, list2, list3, list4):
|
|
|
- f.write(",".join([e, a, b, c, d]))
|
|
|
- f.write('\n')
|
|
|
+ result_df['风机' + str(wind_name) + '_字段'] = list2
|
|
|
+ result_df['风机' + str(wind_name) + '_比风机1少字段'] = list3
|
|
|
+ result_df['风机' + str(wind_name) + '_比风机1多字段'] = list4
|
|
|
|
|
|
- f.flush()
|
|
|
+ file_name = 'col_compare.csv' if not is_falut else 'col_compare_fault.csv'
|
|
|
+ result_df.to_csv(file_name, encoding='utf-8')
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
begin = datetime.datetime.now()
|
|
|
- dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制'
|
|
|
- dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制'
|
|
|
+ dir1 = r'D:\data\新华水电\风机SCADA数据\标准'
|
|
|
+ dir2 = r'D:\data\新华水电\风机SCADA数据\9月风机数据'
|
|
|
files1 = read_excel_files(dir1)
|
|
|
files2 = read_excel_files(dir2)
|
|
|
with multiprocessing.Pool(10) as pool:
|
|
|
df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1])
|
|
|
|
|
|
- with multiprocessing.Pool(10) as pool:
|
|
|
- df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
|
|
|
-
|
|
|
- compareTwoFolders(df1s, df2s)
|
|
|
+ other_dfs = dict()
|
|
|
+ for root, dirs, files in os.walk(dir2):
|
|
|
+ if dirs:
|
|
|
+ for dir in dirs:
|
|
|
+ wind_name = dir.split('#')[0]
|
|
|
+ for file in os.listdir(dir2 + os.sep + dir):
|
|
|
+ print(dir, file)
|
|
|
+ df = read_file_to_df(os.path.join(dir2, dir, file), nrows=1)
|
|
|
+ if wind_name in other_dfs.keys():
|
|
|
+ other_dfs[wind_name].append(df)
|
|
|
+ else:
|
|
|
+ other_dfs[wind_name] = [df]
|
|
|
+
|
|
|
+ # with multiprocessing.Pool(10) as pool:
|
|
|
+ # df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
|
|
|
+ #
|
|
|
+
|
|
|
+ compareTwoFolders(df1s, other_dfs)
|
|
|
|
|
|
print(datetime.datetime.now() - begin)
|