对比文件夹列名差值.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import multiprocessing
  2. from utils.file.trans_methods import *
  3. def boolean_is_check_data(df_vas):
  4. # fault_list = ['Checked', 'Indeterminate', 'Unchecked']
  5. # for fault in fault_list:
  6. # if fault in df_vas:
  7. # return True
  8. return False
  9. def compareTwoFolders(df1s, df2s):
  10. for is_falut in [False]:
  11. list1 = list()
  12. for df in df1s:
  13. tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
  14. if is_falut:
  15. if boolean_is_check_data(df.values):
  16. list1.extend(tmp_list)
  17. else:
  18. if not boolean_is_check_data(df.values):
  19. list1.extend(tmp_list)
  20. list2 = list()
  21. for df in df2s:
  22. tmp_list = [str(i).split('_')[-1] for i in list(df.columns) if i != 'sheet_name']
  23. if is_falut:
  24. if boolean_is_check_data(df.values):
  25. list2.extend(tmp_list)
  26. else:
  27. if not boolean_is_check_data(df.values):
  28. list2.extend(tmp_list)
  29. set1 = set(list1)
  30. set2 = set(list2)
  31. list1 = list(set1)
  32. list2 = list(set2)
  33. list1.sort()
  34. list2.sort()
  35. print(list1)
  36. print(list2)
  37. list3 = list(set1 - set2)
  38. list3.sort()
  39. list4 = list(set2 - set1)
  40. list4.sort()
  41. print(list3)
  42. print(list4)
  43. list5 = list(set1)
  44. list5.extend(list(set2))
  45. list5 = list(set(list5))
  46. list5.sort()
  47. max_count = len(list5)
  48. list1.extend([''] * (max_count - len(list1)))
  49. list2.extend([''] * (max_count - len(list2)))
  50. list3.extend([''] * (max_count - len(list3)))
  51. list4.extend([''] * (max_count - len(list4)))
  52. file_name = 'col_compare.csv' if not is_falut else 'col_compare_falut.csv'
  53. with open(file_name, 'w', encoding='utf8') as f:
  54. f.write(",".join(["全部字段", "对方提供(3-25风机)", "自己获取(1-2风机)", "对方提供多的字段", "自己提供多的字段"]))
  55. f.write('\n')
  56. for e, a, b, c, d in zip(list5, list1, list2, list3, list4):
  57. f.write(",".join([e, a, b, c, d]))
  58. f.write('\n')
  59. f.flush()
  60. if __name__ == '__main__':
  61. begin = datetime.datetime.now()
  62. dir1 = r'D:\data\新华水电\风机SCADA数据\9月风机数据_对方复制'
  63. dir2 = r'D:\data\新华水电\风机SCADA数据\自己复制'
  64. files1 = read_excel_files(dir1)
  65. files2 = read_excel_files(dir2)
  66. with multiprocessing.Pool(10) as pool:
  67. df1s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files1])
  68. with multiprocessing.Pool(10) as pool:
  69. df2s = pool.starmap(read_file_to_df, [(file, list(), None, 1) for file in files2])
  70. compareTwoFolders(df1s, df2s)
  71. print(datetime.datetime.now() - begin)