anmox 1 jaar geleden
commit
c114dc4cc5

+ 4 - 0
.gitignore

@@ -0,0 +1,4 @@
+logs
+*.pyc
+*.iml
+.idea

+ 32 - 0
app.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮
+
+
+import datetime
+
+from base.TranseParam import TranseParam
+from base.WindFarms import WindFarms
+from utils.conf.read_conf import read_yaml_file
+
+if __name__ == '__main__':
+    batch_no = datetime.datetime.now().strftime("%Y%m%d%H%M")
+    name = '招远'
+    type = 'sec'
+
+    yaml_datas = read_yaml_file(name, type)
+
+    time_col = yaml_datas['time_col']
+    wind_col = yaml_datas['turbine_col']
+    print(wind_col)
+    cols_trans_all = yaml_datas['trans_col']
+    read_cols = [v for k, v in cols_trans_all.items() if v]
+
+    zhaoyuan = WindFarms(name, batch_no=batch_no, save_path=r"/home/wzl/trans_data")
+
+    params = TranseParam(read_type="sec", read_path=r"/home/wzl/test_data/zhaoyuan/sec",
+                         read_cols=read_cols,
+                         cols_tran=cols_trans_all, time_col=time_col, wind_col=wind_col)
+
+    zhaoyuan.set_trans_param(params)
+    files = zhaoyuan.run()

+ 14 - 0
base/TranseParam.py

@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/16
+# @Author  : 魏志亮
+
+class TranseParam(object):
+
+    def __init__(self, read_type=None, read_path=None, read_cols=list(), cols_tran={}, time_col=None, wind_col=None):
+        self.read_type = read_type
+        self.read_path = read_path
+        self.read_cols = read_cols
+        self.cols_tran = cols_tran
+        self.time_col = time_col
+        self.wind_col = wind_col
+        self.is_vertical_table = False

+ 152 - 0
base/WindFarms.py

@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮
+
+import datetime
+import shutil
+
+from base.TranseParam import TranseParam
+from utils.db.trans_mysql import *
+from utils.log.trans_log import logger
+from utils.trans_methods import *
+from utils.zip.unzip import unzip
+
+
+class WindFarms(object):
+
+    def __init__(self, name, batch_no=None, save_path=None, params: TranseParam = None):
+        self.name = name
+        self.batch_no = batch_no
+        self.save_path = save_path
+        self.begin = datetime.datetime.now()
+        self.next_time = datetime.datetime.now()
+        self.is_zip = False
+        self.save_zip = False
+        self.trans_param = params
+
+    def set_trans_param(self, params: TranseParam):
+        self.trans_param = params
+
+    def __params_valid(self, not_null_list=list()):
+        for arg in not_null_list:
+            if arg is None or arg == '':
+                raise Exception("Invalid param set :" + arg)
+
+    def __get_save_path(self):
+        return os.path.join(self.save_path, self.name, self.batch_no)
+
+    def __get_zip_tmp_path(self):
+        return os.path.join(self.__get_save_path(), 'save_tmp')
+
+    def __get_read_tmp_path(self):
+        return os.path.join(self.__get_save_path(), 'read_tmp')
+
+    def get_excel_files(self):
+
+        if self.is_zip:
+            is_success, e = unzip(self.trans_param.read_path, self.__get_zip_tmp_path())
+            if is_success:
+                self.trans_param.read_path = self.__get_zip_tmp_path()
+            else:
+                raise e
+
+        return read_excel_files(self.trans_param.read_path)
+
+    def read_excel_to_df(self, file):
+
+        return read_file_to_df(file, self.trans_param.read_cols)
+
+    def save_to_csv(self, df, filename):
+        save_name = str(filename) + ('.csv' if self.save_zip else '.csv.gz')
+        save_path = os.path.join(self.save_path, self.name, self.batch_no, self.trans_param.read_type,
+                                 save_name)
+        create_file_path(save_path, is_file_path=True)
+        if self.save_zip:
+            df[df[self.trans_param.wind_col] == filename].to_csv(save_path, compression='.gzip', index=False)
+        else:
+            df[df[self.trans_param.wind_col] == filename].to_csv(save_path, index=False)
+        trans_print("保存" + str(filename) + ".csv成功")
+
+    def save_to_db(self, df, filename):
+        df.to_sql(name=str(self.batch_no), con=engine.connect(), index=False, if_exists='append',
+                  chunksize=1000000)
+        trans_print("文件:", filename, "保存数据库成功")
+
+    def run(self):
+        trans_print("开始执行", self.name, self.trans_param.read_type)
+        self.__params_valid([self.name, self.batch_no, self.save_path, self.trans_param.read_type,
+                             self.trans_param.read_path,
+                             self.trans_param.time_col, self.trans_param.wind_col])
+        # 读取文件
+        try:
+            all_files = self.get_excel_files()
+            trans_print('读取文件数量:', len(all_files))
+        except Exception as e:
+            logger.exception(e)
+            message = "读取文件列表错误:" + self.trans_param.read_path + ",系统返回错误:" + str(e)
+            update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+            raise e
+
+        # 开始读取数据
+        df = pd.DataFrame()
+        for file in all_files:
+            try:
+                df = pd.concat([df, self.read_excel_to_df(file)])
+            except Exception as e:
+                logger.exception(e)
+                message = "读取文件错误:" + file + ",系统返回错误:" + str(e)
+                update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+                raise e
+
+        # 转换字段
+        if self.trans_param.cols_tran:
+            cols_tran = self.trans_param.cols_tran
+            real_cols_trans = dict()
+            for k, v in cols_tran.items():
+                if v:
+                    real_cols_trans[v] = k
+
+            logger.info("包含转换字段,开始处理转换字段")
+            df.rename(columns=real_cols_trans, inplace=True)
+            if self.trans_param.wind_col in real_cols_trans.keys():
+                self.trans_param.wind_col = real_cols_trans[self.trans_param.wind_col]
+
+            for k in cols_tran.keys():
+                if k not in df.columns:
+                    df[k] = None
+
+        # 添加年月日
+        if self.trans_param.time_col:
+            logger.info("包含时间字段,开始处理时间字段,添加年月日")
+            df[self.trans_param.time_col] = pd.to_datetime(df[self.trans_param.time_col])
+            df['year'] = df[self.trans_param.time_col].dt.year
+            df['month'] = df[self.trans_param.time_col].dt.month
+            df['day'] = df[self.trans_param.time_col].dt.day
+            df.sort_values(by=self.trans_param.time_col, inplace=True)
+            logger.info("处理时间字段结束")
+
+        # 开始保存
+        try:
+            names = set(df[self.trans_param.wind_col])
+            trans_print(names, self.trans_param.wind_col)
+            for filename in names:
+                self.save_to_csv(df[df[self.trans_param.wind_col] == filename], filename)
+                # self.save_to_db(df[df[self.trans_param.wind_col] == filename], filename)
+
+        except Exception as e:
+            logger.exception(e)
+            message = "保存文件错误:" + self.save_path + ",系统返回错误:" + str(e)
+            update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+            raise e
+
+        update_transe_status(self.batch_no, self.trans_param.read_type, "success", "")
+
+        if self.is_zip:
+            trans_print("开始删除解压进临时文件夹")
+            shutil.rmtree(self.__get_zip_tmp_path())
+            trans_print("删除解压进临时文件夹删除成功")
+
+
+if __name__ == '__main__':
+    aa = WindFarms("test", "test_path")
+    aa.run()

+ 3 - 0
base/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮

+ 100 - 0
config_files/招远/min.yaml

@@ -0,0 +1,100 @@
+#  是否是ZIP文件
+is_zip: False
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: date_time
+#  风机编号字段
+turbine_col: turbine_name
+date_time_col: date_time
+# 表数据和数据文件的映射关系
+trans_col:
+  # 时间戳
+  time_stamp: null
+  # 有功功率
+  active_power: null
+  # 风轮转速
+  rotor_speed: null
+  # 发电机转速
+  generator_speed: null
+  # 风速
+  wind_velocity: null
+  # 桨距角1
+  pitch_angle_blade_1: null
+  # 桨距角2
+  pitch_angle_blade_2: null
+  # 桨距角3
+  pitch_angle_blade_3: null
+  # 绝对风向
+  true_wind_direction: null
+  # 对风角度
+  yaw_error1: null
+  # 机舱位置
+  cabin_position: null
+  # 环境温度
+  outside_cabin_temperature: null
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: null
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high-speed_shaft__bearing_temperature: null
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low-speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: null
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: null
+  # 发电机绕组1温度
+  generator_winding1_temperature: null
+  # 发电机绕组2温度
+  generator_winding2_temperature: null
+  # 发电机绕组3温度
+  generator_winding3_temperature: null
+  # 风机状态1
+  wind_turbine_status: null
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: null
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of__the_cabin: null
+  # 机舱左右振动
+  side_to_side_vibration__0f_the_cabin: null
+  # 风机编号
+  wind_turbine_number: null
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+

+ 99 - 0
config_files/招远/sec.yaml

@@ -0,0 +1,99 @@
+#  是否是ZIP文件
+is_zip: False
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+turbine_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  # 时间戳
+  time_stamp: 时间戳
+  # 有功功率
+  active_power: 有功功率
+  # 风轮转速
+  rotor_speed: 风轮转速
+  # 发电机转速
+  generator_speed: 发电机转速
+  # 风速
+  wind_velocity: 风速
+  # 桨距角1
+  pitch_angle_blade_1: 桨距角1
+  # 桨距角2
+  pitch_angle_blade_2: 桨距角2
+  # 桨距角3
+  pitch_angle_blade_3: 桨距角3
+  # 绝对风向
+  true_wind_direction: 绝对风向
+  # 对风角度
+  yaw_error1: 对风角度
+  # 机舱位置
+  cabin_position: 机舱位置
+  # 环境温度
+  outside_cabin_temperature: null
+  # 有功功率设定值
+  set_value_of_active_power: 有功功率设定值
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high-speed_shaft__bearing_temperature: null
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low-speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机驱动端轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机非驱动端轴承温度
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机绕组1温度
+  # 发电机绕组2温度
+  generator_winding2_temperature: null
+  # 发电机绕组3温度
+  generator_winding3_temperature: null
+  # 风机状态1
+  wind_turbine_status: 风机状态1
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 机舱内温度
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: 扭缆角度
+  # 机舱前后振动
+  front_back_vibration_of__the_cabin: 机舱前后振动
+  # 机舱左右振动
+  side_to_side_vibration__0f_the_cabin: 机舱左右振动
+  # 风机编号
+  wind_turbine_number: 风机编号
+  # 实际力矩
+  actual_torque: 实际力矩
+  # 给定力矩
+  given_torque: 给定力矩
+  # 顺时针偏航次数
+  clockwise_yaw_count: 顺时针偏航次数
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: 逆时针偏航次数
+  # 不可利用
+  unusable: 不可利用
+  # 功率曲线可用
+  power_curve_available: 功率曲线可用
+  # 齿轮箱转速
+  required_gearbox_speed: 齿轮箱转速
+  # 变频器转速(主控)
+  inverter_speed_master_control: 变频器转速(主控)
+
+

+ 37 - 0
dianchang/changqing/main.py

@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/16
+# @Author  : 魏志亮
+from base.TranseParam import TranseParam
+from base.WindFarms import WindFarms
+from utils.log.trans_log import logger
+
+wind = WindFarms('测试', 'ceshi', '123123', r'd://transdata_1')
+
+try:
+    param = TranseParam(read_type='min', read_path=r'D:\transdata\huachuang\min', read_cols=['风机编号', '时间', '风机主状态',
+                                                                                             '发电机转速', '发电机转速限定值',
+                                                                                             '变桨速度给定值',
+                                                                                             '桨叶角度给定值', '桨叶1电机位置',
+                                                                                             '桨叶2电机位置', '桨叶3电机位置'],
+                        cols_tran={},
+                        time_col='时间', wind_col='风机编号')
+
+    wind.set_trans_param(param)
+
+    wind.run()
+except Exception as e:
+    logger.exception(e)
+
+try:
+    param = TranseParam(read_type='sec', read_path=r'D:\transdata\zhaoyuan\sec', read_cols=['风机编号', '时间', '风机主状态',
+                                                                                            '发电机转速', '发电机转速限定值',
+                                                                                            '变桨速度给定值',
+                                                                                            '桨叶角度给定值', '桨叶1电机位置',
+                                                                                            '桨叶2电机位置', '桨叶3电机位置'],
+                        cols_tran={},
+                        time_col='时间', wind_col='风机编号')
+
+    wind.set_trans_param(param)
+    wind.run()
+except Exception as e:
+    logger.exception(e)

+ 35 - 0
dianchang/zhaoyuan/main.py

@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮
+import datetime
+import os
+import sys
+
+# path = os.path.abspath(__file__)
+# for i in range(3):
+#     path = os.path.dirname(path)
+sys.path.append("/home/wzl/project/energy-data-trans")
+print('\n'.join(sys.path))
+
+from base.WindFarms import WindFarms
+from base.TranseParam import TranseParam
+from utils.conf.read_conf import read_yaml_file
+
+if __name__ == '__main__':
+    batch_no = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    name = '招远'
+    type = 'sec'
+
+    yaml_datas = read_yaml_file(name, type)
+
+    cols_trans_all = yaml_datas['trans_col']
+    read_cols = [k for k, v in cols_trans_all.items() if k]
+
+    zhaoyuan = WindFarms(name, batch_no=batch_no, save_path=r"/home/wzl/trans_data")
+
+    params = TranseParam(read_type="sec", read_path=r"/home/wzl/test_data/zhaoyuan/sec",
+                         read_cols=read_cols,
+                         cols_tran=cols_trans_all)
+
+    zhaoyuan.set_trans_param(params)
+    files = zhaoyuan.run()

+ 3 - 0
utils/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮

+ 3 - 0
utils/conf/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮

+ 30 - 0
utils/conf/read_conf.py

@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮
+import os.path
+from pprint import pprint
+
+import yaml
+
+
+def read_yaml_file(filename, type):
+    path = os.path.abspath(__file__)
+    for i in range(3):
+        path = os.path.dirname(path)
+    with open(path + os.sep + "config_files" + os.sep + filename + os.sep + type + ".yaml", 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+
+if __name__ == '__main__':
+    data = read_yaml_file("招远", "sec")
+    ss_str = """
+    时间戳,有功功率,风轮转速,发电机转速,风速,风速1,风速2,风速3,风速4,风速5,风速6,风速7,桨距角1,桨距角2,桨距角3,桨距角4,桨距角5,桨距角6,桨距角7,机舱位置,绝对风向,对风角度,风向10s,风向30s,风向标1风向,风向标2风向,环 境温度,有功功率设定值,齿轮箱油温,主轴承轴承温度1,主轴承轴承温度2,齿轮箱高速轴轴承温度1,齿轮箱高速轴轴承温度2,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机绕组1温度,风机状态1,机舱内温度,湍流强度,扭缆角度, 机舱前后振动,机舱左右振动,实际力矩,给定力矩,顺时针偏航次数,逆时针偏航次数,不可利用,功率曲线可用,齿轮箱转速,变频器转速(主控),年,月,日,风机编号
+    """
+
+    ss_datas = [str(i).strip() for i in ss_str.split(",") if i]
+
+    exist_datas = [i for i in data['trans_col'].values() if i]
+
+    print(set(exist_datas) - set(ss_datas))
+
+    pprint(data)

+ 3 - 0
utils/db/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮

+ 82 - 0
utils/db/trans_mysql.py

@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮
+
+# 建立数据库连接
+import pymysql
+from sqlalchemy import create_engine
+
+from utils.log.trans_log import trans_print
+
+user = 'admin'
+password = 'admin123456'
+host = '192.168.50.233'
+port = 3306
+database = 'energy_data'
+plt_conn = pymysql.connect(
+    host=host,
+    port=3306,
+    user=user,
+    password=password,
+    database=database,
+    charset='utf8mb4'
+)
+
+engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}', echo=True)
+
+susscess_sql = "update batch_status set status = 'success' where batch_no = '{batch_no}' and trans_type = '{trans_type}'";
+
+error_sql = "update batch_status set status = 'error',message='{message}' where batch_no = '{batch_no}'  and trans_type = '{trans_type}'"
+
+
+# def __query(sql):
+#     trans_print('开始执行SQL:',sql)
+#     plt_conn.ping(reconnect=True)
+#     with plt_conn.cursor() as cursor:
+#         df = pd.read_sql(sql, cursor)
+#     return df
+#
+#
+# def __ddl_sql(sql):
+#     trans_print('开始执行SQL:',sql)
+#     plt_conn.ping(reconnect=True)
+#     with plt_conn.cursor() as cursor:
+#         cursor.execute(sql)
+#     plt_conn.commit()
+
+
+def update_transe_status(batch_no, trans_type, status, message):
+    exec_sql = susscess_sql if status == 'success' else error_sql
+    exec_sql = exec_sql.format(batch_no=batch_no, status=status, message=message, trans_type=trans_type)
+    #
+    # plt_conn.ping(reconnect=True)
+    # with plt_conn.cursor() as cursor:
+    #     cursor.execute(exec_sql)
+    # plt_conn.commit()
+    trans_print(exec_sql)
+
+
+# def insert_data(batch_no, type, status, message):
+#     exec_sql = insert_sql.format(batch_no=batch_no, type=type, status=status, message=message)
+#     plt_conn.ping(reconnect=True)
+#     with plt_conn.cursor() as cursor:
+#         cursor.execute(exec_sql)
+#
+#     plt_conn.commit()
+
+
+def create_table(batch_no, date_list=list(), fengji_list=list()):
+    pass
+
+
+def get_exec_data():
+    query_running_sql = "selecgt 1 from table where status = 'running"
+    query_next_exdc_sql = "selecgt 1 from table where status = 'waiting' order by id "
+    trans_print(query_next_exdc_sql)
+    # df = __query(query_running_sql)
+    # if df.empty:
+    #     df = __query(query_next_exdc_sql)
+    #     if df.empty:
+    #         return None
+    #     else:
+    #         return df.iloc[0]

+ 3 - 0
utils/log/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/16
+# @Author  : 魏志亮

+ 25 - 0
utils/log/trans_log.py

@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/16
+# @Author  : 魏志亮
+
+import logging
+import os
+import datetime
+import sys
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+logger.addHandler(logging.StreamHandler(sys.stdout))
+
+log_path = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + '/logs'
+if not os.path.exists(log_path):
+    os.makedirs(log_path)
+
+file_handler = logging.FileHandler(log_path + '/dianchang_' + str(datetime.date.today()) + '.log', encoding='utf-8')
+file_handler.setFormatter(logging.Formatter("%(asctime)s-%(levelname)s-%(filename)-8s:%(lineno)s: %(message)s"))
+file_handler.setLevel(logging.INFO)
+logger.addHandler(file_handler)
+
+
+def trans_print(*args):
+    logger.info((str(datetime.datetime.now())) + " ".join([str(a) for a in args]))

+ 90 - 0
utils/trans_methods.py

@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/16
+# @Author  : 魏志亮
+import os
+import re
+import warnings
+
+import chardet
+import pandas as pd
+
+from utils.log.trans_log import trans_print
+
+warnings.filterwarnings("ignore")
+
+
+# 获取文件编码
+def detect_file_encoding(filename):
+    # 读取文件的前1000个字节(足够用于大多数编码检测)
+    with open(filename, 'rb') as f:
+        rawdata = f.read(1000)
+    result = chardet.detect(rawdata)
+    return result['encoding']
+
+
+# 读取数据到df
+def read_file_to_df(file_path, read_cols=list()):
+    trans_print('开始读取文件', file_path)
+    df = pd.DataFrame()
+    encoding = detect_file_encoding(file_path)
+
+    if str(file_path).lower().endswith("csv"):
+        if read_cols:
+            df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols)
+        else:
+            df = pd.read_csv(file_path, encoding=encoding)
+    else:
+        xls = pd.ExcelFile(file_path)
+        # 获取所有的sheet名称
+        sheet_names = xls.sheet_names
+        for sheet in sheet_names:
+            if read_cols:
+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet, usecols=read_cols)])
+            else:
+                df = pd.concat([df, pd.read_excel(xls, sheet_name=sheet)])
+
+    trans_print('文件读取成功', file_path, '文件数量', df.shape)
+
+    return df
+
+
+def __build_directory_dict(directory_dict, path):
+    # 遍历目录下的所有项
+    for item in os.listdir(path):
+        item_path = os.path.join(path, item)
+        if os.path.isdir(item_path):
+            __build_directory_dict(directory_dict, item_path)
+        elif os.path.isfile(item_path):
+            if path not in directory_dict:
+                directory_dict[path] = []
+
+            types = ['xls', 'xlsx', 'csv']
+            if str(item_path).split(".")[-1] in types:
+                if str(item_path).count("~$") == 0:
+                    directory_dict[path].append(item_path)
+
+    # 读取所有文件
+
+
+# 读取路径下所有的excel文件
+def read_excel_files(read_path):
+    directory_dict = {}
+    __build_directory_dict(directory_dict, read_path)
+
+    return [path for paths in directory_dict.values() for path in paths if path]
+
+
+# 创建路径
+def create_file_path(path, is_file_path=False):
+    if is_file_path:
+        path = os.path.dirname(path)
+
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+
+# 格式化风机名称
+def generate_turbine_name(turbine_name='F0001', prefix='F'):
+    strinfo = re.compile(r"[\D*]")
+    name = strinfo.sub('', str(turbine_name))
+    return prefix + str(int(name)).zfill(3)

+ 3 - 0
utils/zip/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮

+ 39 - 0
utils/zip/unzip.py

@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/17
+# @Author  : 魏志亮
+import os
+from utils.log.trans_log import trans_print, logger
+
+import zipfile
+
+
+def unzip(zip_filepath, dest_path):
+    # 解压zip文件
+    is_success = True
+    trans_print('开始读取文件:', zip_filepath)
+    try:
+        with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
+            zip_ref.extractall(dest_path)
+    except zipfile.BadZipFile as e:
+        logger.exception(e)
+        is_success = False
+        message = str(e)
+        trans_print('不是zip文件:', zip_filepath)
+        return is_success, e
+
+        # 遍历解压后的文件
+    if is_success:
+        for root, dirs, files in os.walk(dest_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                # 检查文件是否是zip文件
+                if file_path.endswith('.zip'):
+                    # 如果是,递归解压
+                    unzip(file_path, dest_path + os.sep + str(file).split(".")[0])
+                    # 删除已解压的zip文件(可选)
+                    os.remove(file_path)
+    return is_success, ''
+
+
+if __name__ == '__main__':
+    unzip(r'C:\Users\Administrator\Desktop\test.zip', r'C:\Users\Administrator\Desktop\test')