张崾先风电场-非点检字段获取.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import datetime
  2. import multiprocessing
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.abspath(__file__).split("tmp_file")[0])
  6. from utils.file.trans_methods import read_excel_files, copy_to_new, read_file_to_df
  7. from utils.zip.unzip import unzip, get_desc_path, unrar
  8. import pandas as pd
  9. read_cols = ['Time', '设备主要状态', '功率曲线风速', '湍流强度', '实际风速', '有功功率', '桨叶角度A', '桨叶角度B',
  10. '桨叶角度C', '机舱内温度', '机舱外温度', '绝对风向', '机舱绝对位置', '叶轮转速', '发电机转速', '瞬时风速',
  11. '有功设定反馈', '当前理论可发最大功率', '空气密度', '偏航误差', '发电机扭矩', '瞬时功率', '风向1s',
  12. '偏航压力', '桨叶1速度', '桨叶2速度', '桨叶3速度', '桨叶1角度给定', '桨叶2角度给定', '桨叶3角度给定',
  13. '轴1电机电流', '轴2电机电流', '轴3电机电流', '轴1电机温度', '轴2电机温度', '轴3电机温度', '待机', '启动',
  14. '偏航', '并网', '限功率', '正常发电', '故障', '计入功率曲线', '运行发电机冷却风扇1', '运行发电机冷却风扇2',
  15. '激活偏航解缆阀', '激活偏航刹车阀', '激活风轮刹车阀', '激活顺时针偏航', '激活逆时针偏航', '电缆扭角']
  16. read_path = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/收资数据/sec'
  17. save_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/清理数据/点检表以外测点儿-20241210'
  18. tmp_dir = r'/data/download/collection_data/1进行中/张崾先风电场-陕西-华电/tmp/second/excel_tmp'
  19. # read_path = r'D:\data\张崾先风电场\6'
  20. # save_dir = r'D:\data\张崾先风电场\点检表以外测点儿-20241209'
  21. # tmp_dir = r'D:\data\张崾先风电场\tmp'
  22. os.makedirs(tmp_dir, exist_ok=True)
  23. os.makedirs(save_dir, exist_ok=True)
  24. def get_and_remove(file):
  25. to_path = tmp_dir
  26. if str(file).endswith("zip"):
  27. if str(file).endswith("csv.zip"):
  28. copy_to_new(file, file.replace(read_path, to_path).replace("csv.zip", 'csv.gz'))
  29. else:
  30. desc_path = file.replace(read_path, to_path)
  31. is_success, e = unzip(file, get_desc_path(desc_path))
  32. if not is_success:
  33. # raise e
  34. pass
  35. elif str(file).endswith("rar"):
  36. desc_path = file.replace(read_path, to_path)
  37. unrar(file, get_desc_path(desc_path))
  38. else:
  39. copy_to_new(file, file.replace(read_path, to_path))
  40. def get_resolve(file_path, exist_wind_names, map_lock):
  41. begin = datetime.datetime.now()
  42. df = read_file_to_df(file_path, read_cols=read_cols)
  43. wind_name = str(os.path.basename(file_path)[0:2])
  44. date = os.path.basename(file_path)[14:24]
  45. df['Time'] = df['Time'].apply(lambda x: date + ' ' + x)
  46. df = df[read_cols]
  47. with map_lock[str(wind_name)]:
  48. if wind_name in exist_wind_names:
  49. df.to_csv(save_dir + '/' + wind_name + '.csv', mode='a', index=False, header=False, encoding='utf8')
  50. else:
  51. df.to_csv(save_dir + '/' + wind_name + '.csv', index=False, encoding='utf8')
  52. exist_wind_names.append(wind_name)
  53. print(os.path.basename(file_path), '执行完成,耗时:', get_haoshi(begin))
  54. def sort_data(file_path):
  55. df = pd.read_csv(file_path, encoding='utf8')
  56. df['Time'] = pd.to_datetime(df['Time'], error='coerce')
  57. df.sort_values(by=['Time'], inplace=True)
  58. df.to_csv(file_path, index=False, encoding='utf8')
  59. def get_haoshi(begin):
  60. return datetime.datetime.now() - begin
  61. if __name__ == '__main__':
  62. begin = datetime.datetime.now()
  63. # all_files = read_files(read_path)
  64. # split_count = get_available_cpu_count_with_percent(1 / 2)
  65. # all_arrays = split_array(all_files, split_count)
  66. #
  67. # for index, arr in enumerate(all_arrays):
  68. # with multiprocessing.Pool(10) as pool:
  69. # pool.starmap(get_and_remove, [(i,) for i in arr])
  70. #
  71. # print("移动完成,耗时:", get_haoshi(begin))
  72. # exist_wind_names = multiprocessing.Manager().list()
  73. #
  74. # map_lock = dict()
  75. # for i in range(26, 42):
  76. # map_lock[str(i)] = multiprocessing.Manager().Lock()
  77. #
  78. # all_files = read_excel_files(tmp_dir)
  79. # with multiprocessing.Pool(16) as pool:
  80. # pool.starmap(get_resolve, [(i, exist_wind_names, map_lock) for i in all_files])
  81. #
  82. # print("整理完成,耗时:", get_haoshi(begin))
  83. all_files = read_excel_files(save_dir)
  84. with multiprocessing.Pool(4) as pool:
  85. pool.map(sort_data, all_files)
  86. print("排序完成,耗时:", get_haoshi(begin))
  87. # shutil.rmtree(tmp_dir)
  88. # print("移除临时文件完成,耗时:", get_haoshi(begin))