Explorar o código

整理各类配置文件

anmox hai 1 ano
pai
achega
10412d380f

+ 70 - 19
app.py

@@ -1,32 +1,83 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2024/5/17
 # @Author  : 魏志亮
-
-
 import datetime
 
+import pandas as pd
+
 from base.TranseParam import TranseParam
 from base.WindFarms import WindFarms
-from utils.conf.read_conf import read_yaml_file
+from utils.conf.read_conf import read_yaml_file, read_param_from_yaml_file
+from utils.db.trans_mysql import get_exec_data
+from utils.log.trans_log import trans_print, logger, init_log
 
-if __name__ == '__main__':
-    batch_no = datetime.datetime.now().strftime("%Y%m%d%H%M")
-    name = '招远'
-    type = 'sec'
 
-    yaml_datas = read_yaml_file(name, type)
+def init_params() -> list[WindFarms]:
+    df = get_exec_data()
+    print(df)
+    exec_subjects = list()
+
+    path = r'D:\transdata\test\虹梯官风电场-山西-大唐\高频数据2.zip'
+    changshang_name = '虹梯官风电场'
+    # read_type = 'minute'
+    read_type = 'second'
+    batch_no = 'batch_' + str(datetime.datetime.now().strftime('%Y%m%d'))
+
+    df = pd.DataFrame(columns=['batch_no', 'transfer_type', 'transfer_file_addr', 'field_code', 'field_name'],
+                      data=[
+                          [batch_no, read_type, path, 'WOF01000002', changshang_name]])
 
-    time_col = yaml_datas['time_col']
-    wind_col = yaml_datas['turbine_col']
-    print(wind_col)
-    cols_trans_all = yaml_datas['trans_col']
-    read_cols = [v for k, v in cols_trans_all.items() if v]
+    if df is None:
+        trans_print("当前任务正在执行")
 
-    zhaoyuan = WindFarms(name, batch_no=batch_no, save_path=r"/home/wzl/trans_data")
+    if df.empty:
+        trans_print("当前没有需要执行的任务")
+    else:
+        for batch_no, transfer_type, transfer_file_addr, field_code, field_name in zip(df['batch_no'],
+                                                                                       df['transfer_type'],
+                                                                                       df['transfer_file_addr'],
+                                                                                       df['field_code'],
+                                                                                       df['field_name']):
+            init_log(batch_no, field_name, transfer_type)
 
-    params = TranseParam(read_type="sec", read_path=r"/home/wzl/test_data/zhaoyuan/sec",
-                         read_cols=read_cols,
-                         cols_tran=cols_trans_all, time_col=time_col, wind_col=wind_col)
+            yaml_datas = read_yaml_file(field_name, transfer_type)
+            time_col = read_param_from_yaml_file(yaml_datas, 'time_col')
+            wind_col = read_param_from_yaml_file(yaml_datas, 'wind_col')
+            wind_name_exec = read_param_from_yaml_file(yaml_datas, 'wind_name_exec', None)
+            cols_trans_all = read_param_from_yaml_file(yaml_datas, 'trans_col')
+            wind_full_name = read_param_from_yaml_file(yaml_datas, 'wind_full_name')
+            is_vertical_table = read_param_from_yaml_file(yaml_datas, 'is_vertical_table', False)
+            vertical_cols = read_param_from_yaml_file(yaml_datas['vertical_table_conf'], 'read_cols', list())
+            vertical_key = read_param_from_yaml_file(yaml_datas['vertical_table_conf'], 'col_key')
+            vertical_value = read_param_from_yaml_file(yaml_datas['vertical_table_conf'], 'col_value')
+            index_cols = read_param_from_yaml_file(yaml_datas['vertical_table_conf'], 'index_cols')
+            merge_columns = read_param_from_yaml_file(yaml_datas, 'merge_columns', False)
+            trans_col_exec = read_param_from_yaml_file(yaml_datas, 'trans_col_exec')
+            need_valid_cols = read_param_from_yaml_file(yaml_datas, 'need_valid_cols', True)
+
+            trans_subject = WindFarms(field_name, batch_no=batch_no, field_code=field_code,
+                                      wind_full_name=wind_full_name)
+
+            params = TranseParam(read_type=transfer_type, read_path=transfer_file_addr,
+                                 cols_tran=cols_trans_all, time_col=time_col, wind_col=wind_col,
+                                 wind_name_exec=wind_name_exec, is_vertical_table=is_vertical_table,
+                                 vertical_cols=vertical_cols, vertical_key=vertical_key,
+                                 vertical_value=vertical_value, index_cols=index_cols, merge_columns=merge_columns,
+                                 trans_col_exec=trans_col_exec, need_valid_cols=need_valid_cols)
+
+            trans_subject.set_trans_param(params)
+            exec_subjects.append(trans_subject)
+    return exec_subjects
+
+
+if __name__ == '__main__':
+    exec_subjects = init_params()
+    for exec_subject in exec_subjects:
+        try:
+            exec_subject.run()
 
-    zhaoyuan.set_trans_param(params)
-    files = zhaoyuan.run()
+            # exec_subject.delete_batch_db()
+            # exec_subject.mutiprocessing_to_save_db()
+        except Exception as e:
+            print(e)
+            logger.exception(e)

+ 14 - 3
base/TranseParam.py

@@ -2,13 +2,24 @@
 # @Time    : 2024/5/16
 # @Author  : 魏志亮
 
+
 class TranseParam(object):
 
-    def __init__(self, read_type=None, read_path=None, read_cols=list(), cols_tran={}, time_col=None, wind_col=None):
+    def __init__(self, read_type=None, read_path=None, cols_tran={}, time_col=None, wind_col=None,
+                 wind_name_exec=str(), is_vertical_table=False, vertical_cols=list(), vertical_key=None,
+                 vertical_value=None, index_cols=list(), merge_columns=False, trans_col_exec=None,
+                 need_valid_cols=True):
         self.read_type = read_type
         self.read_path = read_path
-        self.read_cols = read_cols
         self.cols_tran = cols_tran
         self.time_col = time_col
         self.wind_col = wind_col
-        self.is_vertical_table = False
+        self.is_vertical_table = is_vertical_table
+        self.wind_name_exec = wind_name_exec
+        self.vertical_cols = vertical_cols
+        self.vertical_key = vertical_key
+        self.vertical_value = vertical_value
+        self.index_cols = index_cols
+        self.merge_columns = merge_columns
+        self.trans_col_exec = trans_col_exec
+        self.need_valid_cols = need_valid_cols

+ 330 - 84
base/WindFarms.py

@@ -1,31 +1,45 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2024/5/15
 # @Author  : 魏志亮
-
+import copy
 import datetime
-import shutil
+import multiprocessing
+import sys
+import tempfile
 
 from base.TranseParam import TranseParam
-from utils.db.trans_mysql import *
+from utils.db.trans_mysql import creat_table_and_add_partition, update_trans_status, get_all_wind, \
+    rename_table, read_excel_and_save_to_db
 from utils.log.trans_log import logger
 from utils.trans_methods import *
-from utils.zip.unzip import unzip
+from utils.zip.unzip import unzip, unrar
 
 
 class WindFarms(object):
 
-    def __init__(self, name, batch_no=None, save_path=None, params: TranseParam = None):
+    def __init__(self, name, batch_no=None, field_code=None, params: TranseParam = None, wind_full_name=None):
         self.name = name
         self.batch_no = batch_no
-        self.save_path = save_path
+        self.field_code = field_code
+        self.wind_full_name = wind_full_name
         self.begin = datetime.datetime.now()
-        self.next_time = datetime.datetime.now()
-        self.is_zip = False
         self.save_zip = False
         self.trans_param = params
+        self.__exist_wind_names = set()
+        self.wind_col_trans = get_all_wind(self.field_code)
+        self.batch_count = 50000
+        self.save_path = None
 
     def set_trans_param(self, params: TranseParam):
         self.trans_param = params
+        read_path = str(params.read_path)
+
+        if read_path.find(self.wind_full_name) == -1:
+            message = "读取路径与配置路径不匹配:" + self.trans_param.read_path + ",配置文件为:" + self.wind_full_name
+            update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
+            raise ValueError(message)
+
+        self.save_path = os.path.join(read_path[0:read_path.find(self.wind_full_name)], self.wind_full_name, "清理数据")
 
     def __params_valid(self, not_null_list=list()):
         for arg in not_null_list:
@@ -33,120 +47,352 @@ class WindFarms(object):
                 raise Exception("Invalid param set :" + arg)
 
     def __get_save_path(self):
-        return os.path.join(self.save_path, self.name, self.batch_no)
+        return os.path.join(self.save_path, self.batch_no, self.trans_param.read_type)
 
-    def __get_zip_tmp_path(self):
-        return os.path.join(self.__get_save_path(), 'save_tmp')
+    def __get_save_tmp_path(self):
+        return os.path.join(tempfile.gettempdir(), self.wind_full_name, self.batch_no, self.trans_param.read_type)
+
+    def __get_excel_tmp_path(self):
+        return os.path.join(self.__get_save_tmp_path(), 'excel_tmp' + os.sep)
 
     def __get_read_tmp_path(self):
-        return os.path.join(self.__get_save_path(), 'read_tmp')
+        return os.path.join(self.__get_save_tmp_path(), 'read_tmp')
+
+    def __df_save_to_tmp_file(self, df=pd.DataFrame(), file=None):
+
+        if self.trans_param.is_vertical_table:
+            pass
+        else:
+            # 转换字段
+            if self.trans_param.cols_tran:
+                cols_tran = self.trans_param.cols_tran
+                real_cols_trans = dict()
+                for k, v in cols_tran.items():
+                    if v and not v.startswith("$"):
+                        real_cols_trans[v] = k
+
+                trans_print("包含转换字段,开始处理转换字段")
+                df.rename(columns=real_cols_trans, inplace=True)
+                if self.trans_param.wind_col in real_cols_trans.keys():
+                    self.trans_param.wind_col = real_cols_trans[self.trans_param.wind_col]
+
+                del_keys = set(df.columns) - set(cols_tran.keys())
+
+                for key in del_keys:
+                    df.drop(key, axis=1, inplace=True)
+
+        df = del_blank(df, ['wind_turbine_number'])
+        self.__save_to_tmp_csv(df, file)
 
-    def get_excel_files(self):
+    def __get_excel_files(self):
 
-        if self.is_zip:
-            is_success, e = unzip(self.trans_param.read_path, self.__get_zip_tmp_path())
-            if is_success:
-                self.trans_param.read_path = self.__get_zip_tmp_path()
+        if os.path.isfile(self.trans_param.read_path):
+            all_files = [self.trans_param.read_path]
+        else:
+            all_files = read_files(self.trans_param.read_path)
+
+        to_path = self.__get_excel_tmp_path()
+        for file in all_files:
+            if str(file).endswith("zip"):
+                if str(file).endswith("csv.zip"):
+                    copy_to_new(file, file.replace(self.trans_param.read_path, to_path).replace("csv.zip", 'csv.gz'))
+                else:
+                    is_success, e = unzip(file, file.replace(self.trans_param.read_path, to_path).split(".")[0])
+                    self.trans_param.has_zip = True
+                if not is_success:
+                    raise e
+            elif str(file).endswith("rar"):
+                is_success, e = unrar(file, file.replace(self.trans_param.read_path, to_path).split(".")[0])
+                self.trans_param.has_zip = True
+                if not is_success:
+                    raise e
             else:
-                raise e
+                copy_to_new(file, file.replace(self.trans_param.read_path, to_path))
+
+        return read_excel_files(to_path)
+
+    def __read_excel_to_df(self, file):
+
+        read_cols = [v for k, v in self.trans_param.cols_tran.items() if v and not v.startswith("$")]
+
+        trans_dict = {}
+        for k, v in self.trans_param.cols_tran.items():
+            if v and not str(v).startswith("$"):
+                trans_dict[v] = k
+
+        if self.trans_param.is_vertical_table:
+            vertical_cols = self.trans_param.vertical_cols
+            df = read_file_to_df(file, vertical_cols)
+            df = df[df[self.trans_param.vertical_key].isin(read_cols)]
+            df.rename(columns={self.trans_param.cols_tran['wind_turbine_number']: 'wind_turbine_number',
+                               self.trans_param.cols_tran['time_stamp']: 'time_stamp'}, inplace=True)
+
+            df[self.trans_param.vertical_key] = df[self.trans_param.vertical_key].map(trans_dict).fillna(
+                df[self.trans_param.vertical_key])
 
-        return read_excel_files(self.trans_param.read_path)
+            return df
 
-    def read_excel_to_df(self, file):
+        else:
+            trans_dict = dict()
+            for k, v in self.trans_param.cols_tran.items():
+                if v and v.startswith("$"):
+                    trans_dict[v] = k
+
+            if self.trans_param.merge_columns:
+                df = read_file_to_df(file)
+            else:
+                if self.trans_param.need_valid_cols:
+                    df = read_file_to_df(file, read_cols)
+                else:
+                    df = read_file_to_df(file)
+
+            # 处理列名前缀问题
+            if self.trans_param.trans_col_exec:
+                columns_dict = dict()
+                for column in df.columns:
+                    columns_dict[column] = eval(self.trans_param.trans_col_exec)
+                df.rename(columns=columns_dict, inplace=True)
+
+            for k, v in trans_dict.items():
+                if k.startswith("$file"):
+                    file_name = ".".join(os.path.basename(file).split(".")[0:-1])
+                    if k == "$file":
+                        df[v] = str(file_name)
+                    else:
+                        datas = str(k.replace("$file", "").replace("[", "").replace("]", "")).split(":")
+                        if len(datas) != 2:
+                            raise Exception("字段映射出现错误 :" + str(trans_dict))
+                        df[v] = str(file_name[int(datas[0]):int(datas[1])]).strip()
+                elif k.startswith("$folder"):
+                    folder = file
+                    cengshu = int(str(k.replace("$folder", "").replace("[", "").replace("]", "")))
+                    for i in range(cengshu):
+                        folder = os.path.dirname(folder)
+                    df[v] = str(str(folder).split(os.sep)[-1]).strip()
+
+            return df
+
+    def __save_to_tmp_csv(self, df, file):
+        trans_print("开始保存", str(file), "到临时文件成功")
+        names = set(df['wind_turbine_number'].values)
+        for name in names:
+            save_name = str(name) + '.csv'
+            save_path = os.path.join(self.__get_read_tmp_path(), save_name)
+            create_file_path(save_path, is_file_path=True)
+            if name in self.__exist_wind_names:
+                df[df[self.trans_param.wind_col] == name].to_csv(save_path, index=False, encoding='utf8', mode='a',
+                                                                 header=False)
+            else:
+                self.__exist_wind_names.add(name)
+                df[df[self.trans_param.wind_col] == name].to_csv(save_path, index=False, encoding='utf8')
+
+        del df
+        trans_print("保存", str(names), "到临时文件成功, 风机数量", len(names))
+
+    def save_to_csv(self, filename):
+        df = read_file_to_df(filename)
+
+        if self.trans_param.is_vertical_table:
+            df = df.pivot_table(index=['time_stamp', 'wind_turbine_number'], columns=self.trans_param.vertical_key,
+                                values=self.trans_param.vertical_value,
+                                aggfunc='max')
+            # 重置索引以得到普通的列
+            df.reset_index(inplace=True)
+
+        for k in self.trans_param.cols_tran.keys():
+            if k not in df.columns:
+                df[k] = None
+
+        df = df[self.trans_param.cols_tran.keys()]
+
+        # 添加年月日
+        if self.trans_param.time_col:
+            trans_print("包含时间字段,开始处理时间字段,添加年月日", filename)
+            df[self.trans_param.time_col] = pd.to_datetime(df[self.trans_param.time_col])
+            df['year'] = df[self.trans_param.time_col].dt.year
+            df['month'] = df[self.trans_param.time_col].dt.month
+            df['day'] = df[self.trans_param.time_col].dt.day
+            df.sort_values(by=self.trans_param.time_col, inplace=True)
+            df[self.trans_param.time_col] = df[self.trans_param.time_col].apply(
+                lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
+            trans_print("处理时间字段结束")
 
-        return read_file_to_df(file, self.trans_param.read_cols)
+        # 转化风机名称
+        trans_print("开始转化风机名称")
+        if self.trans_param.wind_name_exec:
+            exec_str = f"df[self.trans_param.wind_col].apply(lambda wind_name: {self.trans_param.wind_name_exec} )"
+            df[self.trans_param.wind_col] = eval(exec_str)
 
-    def save_to_csv(self, df, filename):
-        save_name = str(filename) + ('.csv' if self.save_zip else '.csv.gz')
-        save_path = os.path.join(self.save_path, self.name, self.batch_no, self.trans_param.read_type,
-                                 save_name)
+        df[self.trans_param.wind_col] = df[self.trans_param.wind_col].map(
+            self.wind_col_trans).fillna(
+            df[self.trans_param.wind_col])
+        trans_print("转化风机名称结束")
+
+        wind_col_name = str(df[self.trans_param.wind_col].values[0])
+
+        if self.save_zip:
+            save_path = os.path.join(self.__get_save_path(), str(wind_col_name) + '.csv.gz')
+        else:
+            save_path = os.path.join(self.__get_save_path(), str(wind_col_name) + '.csv')
         create_file_path(save_path, is_file_path=True)
         if self.save_zip:
-            df[df[self.trans_param.wind_col] == filename].to_csv(save_path, compression='.gzip', index=False)
+            df.to_csv(save_path, compression='gzip', index=False, encoding='utf-8')
         else:
-            df[df[self.trans_param.wind_col] == filename].to_csv(save_path, index=False)
-        trans_print("保存" + str(filename) + ".csv成功")
+            df.to_csv(save_path, index=False, encoding='utf-8')
 
-    def save_to_db(self, df, filename):
-        df.to_sql(name=str(self.batch_no), con=engine.connect(), index=False, if_exists='append',
-                  chunksize=1000000)
-        trans_print("文件:", filename, "保存数据库成功")
+        del df
+        trans_print("保存" + str(filename) + ".csv成功")
 
-    def run(self):
-        trans_print("开始执行", self.name, self.trans_param.read_type)
-        self.__params_valid([self.name, self.batch_no, self.save_path, self.trans_param.read_type,
-                             self.trans_param.read_path,
-                             self.trans_param.time_col, self.trans_param.wind_col])
+    def read_all_files(self):
         # 读取文件
         try:
-            all_files = self.get_excel_files()
+            all_files = self.__get_excel_files()
             trans_print('读取文件数量:', len(all_files))
         except Exception as e:
             logger.exception(e)
             message = "读取文件列表错误:" + self.trans_param.read_path + ",系统返回错误:" + str(e)
-            update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+            update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
             raise e
+        return all_files
 
-        # 开始读取数据
-        df = pd.DataFrame()
-        for file in all_files:
+    def __read_file_and_save_tmp(self):
+
+        all_files = self.read_all_files()
+        if self.trans_param.merge_columns:
+            # with multiprocessing.Pool(6) as pool:
+            #     dfs = pool.starmap(self.__read_excel_to_df, [(file,) for file in all_files])
+            dfs = list()
+            index_keys = [self.trans_param.cols_tran['time_stamp']]
+            wind_col = self.trans_param.cols_tran['wind_turbine_number']
+            if str(wind_col).startswith("$"):
+                wind_col = 'wind_turbine_number'
+            index_keys.append(wind_col)
+            df_map = dict()
+            for file in all_files:
+                df = self.__read_excel_to_df(file)
+
+                key = '-'.join(df.columns)
+                if key in df_map.keys():
+                    df_map[key] = pd.concat([df_map[key], df])
+                else:
+                    df_map[key] = df
+
+            for k, df in df_map.items():
+                df.drop_duplicates(inplace=True)
+                df.set_index(keys=index_keys, inplace=True)
+                df = df[~df.index.duplicated(keep='first')]
+                dfs.append(df)
+
+            df = pd.concat(dfs, axis=1)
+            df.reset_index(inplace=True)
+            names = set(df[wind_col].values)
             try:
-                df = pd.concat([df, self.read_excel_to_df(file)])
+                for name in names:
+                    self.__df_save_to_tmp_file(df[df[wind_col] == name], "")
             except Exception as e:
                 logger.exception(e)
-                message = "读取文件错误:" + file + ",系统返回错误:" + str(e)
-                update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+                message = "合并列出现错误:" + str(e)
+                update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
                 raise e
 
-        # 转换字段
-        if self.trans_param.cols_tran:
-            cols_tran = self.trans_param.cols_tran
-            real_cols_trans = dict()
-            for k, v in cols_tran.items():
-                if v:
-                    real_cols_trans[v] = k
+        else:
+            for file in all_files:
+                try:
+                    self.__df_save_to_tmp_file(self.__read_excel_to_df(file), file)
+                except Exception as e:
+                    logger.exception(e)
+                    message = "读取文件错误:" + file + ",系统返回错误:" + str(e)
+                    update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
+                    raise e
 
-            logger.info("包含转换字段,开始处理转换字段")
-            df.rename(columns=real_cols_trans, inplace=True)
-            if self.trans_param.wind_col in real_cols_trans.keys():
-                self.trans_param.wind_col = real_cols_trans[self.trans_param.wind_col]
+    def mutiprocessing_to_save_file(self):
+        # 开始保存到正式文件
+        trans_print("开始保存到excel文件")
+        all_tmp_files = read_excel_files(self.__get_read_tmp_path())
+        try:
+            with multiprocessing.Pool(6) as pool:
+                pool.starmap(self.save_to_csv, [(file,) for file in all_tmp_files])
 
-            for k in cols_tran.keys():
-                if k not in df.columns:
-                    df[k] = None
+        except Exception as e:
+            logger.exception(e)
+            message = "保存文件错误,系统返回错误:" + str(e)
+            update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
+            raise e
 
-        # 添加年月日
-        if self.trans_param.time_col:
-            logger.info("包含时间字段,开始处理时间字段,添加年月日")
-            df[self.trans_param.time_col] = pd.to_datetime(df[self.trans_param.time_col])
-            df['year'] = df[self.trans_param.time_col].dt.year
-            df['month'] = df[self.trans_param.time_col].dt.month
-            df['day'] = df[self.trans_param.time_col].dt.day
-            df.sort_values(by=self.trans_param.time_col, inplace=True)
-            logger.info("处理时间字段结束")
+        trans_print("结束保存到excel文件")
 
-        # 开始保存
+    def mutiprocessing_to_save_db(self):
+        # 开始保存到SQL文件
+        trans_print("开始保存到数据库文件")
+        all_saved_files = read_excel_files(self.__get_save_path())
+        table_name = self.batch_no + "_" + self.trans_param.read_type
+        creat_table_and_add_partition(table_name, len(all_saved_files), self.trans_param.read_type)
         try:
-            names = set(df[self.trans_param.wind_col])
-            trans_print(names, self.trans_param.wind_col)
-            for filename in names:
-                self.save_to_csv(df[df[self.trans_param.wind_col] == filename], filename)
-                # self.save_to_db(df[df[self.trans_param.wind_col] == filename], filename)
+
+            with multiprocessing.Pool(6) as pool:
+                pool.starmap(read_excel_and_save_to_db,
+                             [(table_name, file, self.batch_count) for file in all_saved_files])
 
         except Exception as e:
             logger.exception(e)
-            message = "保存文件错误:" + self.save_path + ",系统返回错误:" + str(e)
-            update_transe_status(self.batch_no, self.trans_param.read_type, "error", message)
+            message = "保存到数据库错误,系统返回错误:" + str(e)
+            update_trans_status(self.batch_no, self.trans_param.read_type, "error", message)
             raise e
+        trans_print("结束保存到数据库文件")
+
+    def __rename_file(self):
+        save_path = self.__get_save_path()
+        files = os.listdir(save_path)
+
+        files.sort(key=lambda x: int(str(x).split(os.sep)[-1].split(".")[0][1:]))
+        for index, file in enumerate(files):
+            file_path = os.path.join(save_path, 'F' + str(index + 1).zfill(3) + ".csv.gz")
+            os.rename(os.path.join(save_path, file), file_path)
+
+    def delete_batch_files(self):
+        trans_print("开始删除已存在的批次文件夹")
+        if os.path.exists(self.__get_save_path()):
+            shutil.rmtree(self.__get_save_path())
+        trans_print("删除已存在的批次文件夹")
+
+    def delete_tmp_files(self):
+        trans_print("开始删除临时文件夹")
+        if os.path.exists(self.__get_excel_tmp_path()):
+            shutil.rmtree(self.__get_excel_tmp_path())
+        if os.path.exists(self.__get_read_tmp_path()):
+            shutil.rmtree(self.__get_read_tmp_path())
+        if os.path.exists(self.__get_save_tmp_path()):
+            shutil.rmtree(self.__get_save_tmp_path())
+
+        trans_print("删除临时文件夹删除成功")
+
+    def delete_batch_db(self):
+        table_name = "_".join([self.batch_no, self.trans_param.read_type])
+        renamed_table_name = "del_" + table_name + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')
+        rename_table(table_name, renamed_table_name)
+
+    def run(self):
+        trans_print("开始执行", self.name, self.trans_param.read_type)
+
+        self.delete_batch_files()
+        self.delete_tmp_files()
+        self.delete_batch_db()
+
+        self.__params_valid([self.name, self.batch_no, self.field_code, self.save_path, self.trans_param.read_type,
+                             self.trans_param.read_path,
+                             self.trans_param.time_col, self.trans_param.wind_col, self.wind_full_name])
+
+        # 更新运行状态到运行中
+        update_trans_status(self.batch_no, self.trans_param.read_type, "running", "")
+
+        # 开始读取数据并分类保存临时文件
+        self.__read_file_and_save_tmp()
 
-        update_transe_status(self.batch_no, self.trans_param.read_type, "success", "")
+        self.mutiprocessing_to_save_file()
 
-        if self.is_zip:
-            trans_print("开始删除解压进临时文件夹")
-            shutil.rmtree(self.__get_zip_tmp_path())
-            trans_print("删除解压进临时文件夹删除成功")
+        self.mutiprocessing_to_save_db()
 
+        update_trans_status(self.batch_no, self.trans_param.read_type, "success", "",
+                            wind_count=len(read_excel_files(self.__get_read_tmp_path())))
 
-if __name__ == '__main__':
-    aa = WindFarms("test", "test_path")
-    aa.run()
+        self.delete_tmp_files()

+ 112 - 0
config_files/吉山风电场-需要点检/second.yaml

@@ -0,0 +1,112 @@
+# 没有验证 需要和 实施一起验证
+
+wind_full_name: 吉山风电场-江西-大唐
+#  是否是竖表
+is_vertical_table: False
+# 是否需要合并列,吉山风电场-重庆海装处理
+merge_columns: True
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 转化的类名是否需要处理 比如 重庆还装表头自带 风机名称
+trans_col_exec: column[column.find('_')+1:]
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号 文件夹中是风机编号,压缩包的话会多一层目录
+  # eg: /data/download/xx/风机001/cc.csv $folder[1]
+  # eg: /data/download/xx/风机001/unzip_files/cc.csv $folder[2]
+  # 如果不规则,则配置下方的 wind_name_exec
+  wind_turbine_number: $folder[1]
+  # 时间戳
+  time_stamp: 采样时间
+  # 有功功率
+  active_power: 发电机有功功率
+  # 风轮转速
+  rotor_speed: 风轮转速
+  # 发电机转速
+  generator_speed: 发电机转速
+  # 风速
+  wind_velocity: 瞬时风速
+  # 桨距角1
+  pitch_angle_blade_1: 1#叶片变桨角度
+  # 桨距角2
+  pitch_angle_blade_2: 2#叶片变桨角度
+  # 桨距角3
+  pitch_angle_blade_3: 3#叶片变桨角度
+  # 绝对风向
+  true_wind_direction: null
+  # 对风角度
+  yaw_error1: null
+  # 机舱位置
+  cabin_position: null
+  # 环境温度
+  outside_cabin_temperature: 环境温度
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油池温度
+  # 主轴承轴承温度
+  main_bearing_temperature: 主轴前轴承温度
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴前端温度
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机前轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机后轴承温度
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机绕组v2温度
+  # 发电机绕组2温度
+  generator_winding2_temperature: 发电机绕组w1温度
+  # 发电机绕组3温度
+  generator_winding3_temperature: 发电机绕组u2温度
+  # 风机状态1
+  wind_turbine_status: null
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 机舱温度
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: null
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: null
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+#wind_name_exec:
+

+ 104 - 0
config_files/唐龙三期风电场/second.yaml

@@ -0,0 +1,104 @@
+wind_full_name: 唐龙三期风电场-安徽-大唐
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号
+  wind_turbine_number: 设备名
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: 有功功率
+  # 风轮转速
+  rotor_speed: 风轮转速
+  # 发电机转速
+  generator_speed: 发电机转速
+  # 风速
+  wind_velocity: 风速
+  # 桨距角1
+  pitch_angle_blade_1: 桨距角1
+  # 桨距角2
+  pitch_angle_blade_2: 桨距角2
+  # 桨距角3
+  pitch_angle_blade_3: 桨距角3
+  # 绝对风向
+  true_wind_direction: 风向
+  # 对风角度
+  yaw_error1: 60秒平均风向角
+  # 机舱位置
+  cabin_position: 机舱位置
+  # 环境温度
+  outside_cabin_temperature: 舱外温度
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴承温度
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: 齿轮箱低速轴承温度
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机驱动端轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: null
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机定子U温度
+  # 发电机绕组2温度
+  generator_winding2_temperature: 发电机定子V温度
+  # 发电机绕组3温度
+  generator_winding3_temperature: 发电机定子W温度
+  # 风机状态1
+  wind_turbine_status: 风机标准状态
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 舱内温度
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: 扭缆位置
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: 机舱振动传感器Y
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: 机舱振动传感器X
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "wind_name.replace('唐龙风场',').replace('号风机','')"
+
+

+ 25 - 22
config_files/招远/min.yaml → config_files/密马风电场-未给点检表/second.yaml

@@ -1,7 +1,6 @@
-#  是否是ZIP文件
-is_zip: False
+wind_full_name: 密马风电场-山西-大唐
 #  是否是竖表
-is_vertical_table: False
+is_vertical_table: True
 #  如果是怎配置竖表需要查询的字段
 vertical_table_conf:
   read_cols:
@@ -9,27 +8,31 @@ vertical_table_conf:
     - 资产名称
     - 采集点名称
     - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
   col_key: 采集点名称
   col_value: 数值
 #时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
-time_col: date_time
+time_col: time_stamp
 #  风机编号字段
-turbine_col: turbine_name
-date_time_col: date_time
+wind_col: wind_turbine_number
 # 表数据和数据文件的映射关系
 trans_col:
+  # 风机编号
+  wind_turbine_number: 资产名称
   # 时间戳
-  time_stamp: null
+  time_stamp: 数据时间
   # 有功功率
-  active_power: null
+  active_power: 发电机有功功率
   # 风轮转速
   rotor_speed: null
   # 发电机转速
   generator_speed: null
   # 风速
-  wind_velocity: null
+  wind_velocity: 风速
   # 桨距角1
-  pitch_angle_blade_1: null
+  pitch_angle_blade_1: 1号桨叶片角度(桨距角)-1分钟采样值
   # 桨距角2
   pitch_angle_blade_2: null
   # 桨距角3
@@ -41,23 +44,23 @@ trans_col:
   # 机舱位置
   cabin_position: null
   # 环境温度
-  outside_cabin_temperature: null
+  outside_cabin_temperature: 舱外温度
   # 有功功率设定值
   set_value_of_active_power: null
   # 齿轮箱油温
-  gearbox_oil_temperature: null
+  gearbox_oil_temperature: 齿轮箱油池温度
   # 主轴承轴承温度
   main_bearing_temperature: null
   # 齿轮箱高速轴轴承温度
-  gearbox_high-speed_shaft__bearing_temperature: null
+  gearbox_high_speed_shaft_bearing_temperature: null
   # 齿轮箱中速轴轴承温度
   gearboxmedium_speed_shaftbearing_temperature: null
   # 齿轮箱低速轴轴承温度
-  gearbox_low-speed_shaft_bearing_temperature: null
+  gearbox_low_speed_shaft_bearing_temperature: 齿轮箱高速轴非驱动端轴承温度
   # 发电机驱动端轴承温度
-  generatordrive_end_bearing_temperature: null
+  generatordrive_end_bearing_temperature: 发电机驱动端轴承温度
   # 发电机非驱动端轴承温度
-  generatornon_drive_end_bearing_temperature: null
+  generatornon_drive_end_bearing_temperature: 发电机非驱动端轴承温度
   # 发电机绕组1温度
   generator_winding1_temperature: null
   # 发电机绕组2温度
@@ -65,21 +68,19 @@ trans_col:
   # 发电机绕组3温度
   generator_winding3_temperature: null
   # 风机状态1
-  wind_turbine_status: null
+  wind_turbine_status: 厂家风机状态
   # 风机状态2
   wind_turbine_status2: null
   # 机舱内温度
-  cabin_temperature: null
+  cabin_temperature: 舱内温度
   # 湍流强度
   turbulence_intensity: null
   # 扭缆角度
   twisted_cable_angle: null
   # 机舱前后振动
-  front_back_vibration_of__the_cabin: null
+  front_back_vibration_of_the_cabin: null
   # 机舱左右振动
-  side_to_side_vibration__0f_the_cabin: null
-  # 风机编号
-  wind_turbine_number: null
+  side_to_side_vibration_of_the_cabin: null
   # 实际力矩
   actual_torque: null
   # 给定力矩
@@ -97,4 +98,6 @@ trans_col:
   # 变频器转速(主控)
   inverter_speed_master_control: null
 
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "wind_name.replace('号风机','')"
 

+ 119 - 0
config_files/招远风电场/minute.yaml

@@ -0,0 +1,119 @@
+wind_full_name: 招远风电场-山东-大唐
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+
+# 转化的类名是否需要处理 比如 重庆还装表头自带 风机名称
+trans_col_exec: column.replace('有功功率-平均','当前理论可发最大功率-平均')
+
+# 是否只读取需要的字段,默认True ,如果多文件夹列名不一致,则需要设置为False,不然验证通不过
+need_valid_cols: False
+
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号  $file[0:3]获取文件名的前3位
+  wind_turbine_number: 风机
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: 当前理论可发最大功率-平均
+  # 风轮转速
+  rotor_speed: 叶轮转速-平均
+  # 发电机转速
+  generator_speed: 发电机转速-平均
+  # 风速
+  wind_velocity: 实际风速-平均
+  # 桨距角1
+  pitch_angle_blade_1: 桨叶角度A-平均
+  # 桨距角2
+  pitch_angle_blade_2: 桨叶角度B-平均
+  # 桨距角3
+  pitch_angle_blade_3: 桨叶角度C-平均
+  # 绝对风向
+  true_wind_direction: 绝对风向-平均
+  # 对风角度
+  yaw_error1: null
+  # 机舱位置
+  cabin_position: 机舱位置-平均
+  # 环境温度
+  outside_cabin_temperature: 机舱外温度-平均
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温度-平均
+  # 主轴承轴承温度   出现了俩  主轴轴承温度-平均、主轴轴承2(后轴承)温度-平均
+  main_bearing_temperature: 主轴轴承温度-平均
+  # 齿轮箱高速轴轴承温度   出现了俩  齿轮箱高速轴轴承(风轮侧)温度-平均、齿轮箱高速轴轴承(电机侧)温度-平均
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴轴承(风轮侧)温度-平均
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机前轴承温度-平均
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机后轴承温度-平均
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机定子绕组温度-平均
+  # 发电机绕组2温度
+  generator_winding2_temperature: null
+  # 发电机绕组3温度
+  generator_winding3_temperature: null
+  # 风机状态1
+  wind_turbine_status: null
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 机舱内温度-平均
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: 塔筒前后振动-平均
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: 塔筒左右振动-平均
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 eg: excel名称: 系统编号
+#wind_name_exec:
+#  风机A001: A001
+#  风机A002: A002
+#  风机A003: A003
+#  风机A004: A004
+#  风机A005: A005
+#  风机A006: A006
+#  风机A007: A007
+#  风机A008: A008
+
+

+ 28 - 27
config_files/招远/sec.yaml → config_files/招远风电场/second.yaml

@@ -1,5 +1,4 @@
-#  是否是ZIP文件
-is_zip: False
+wind_full_name: 招远风电场-山东-大唐
 #  是否是竖表
 is_vertical_table: False
 #  如果是怎配置竖表需要查询的字段
@@ -9,76 +8,79 @@ vertical_table_conf:
     - 资产名称
     - 采集点名称
     - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
   col_key: 采集点名称
   col_value: 数值
 #时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
 time_col: time_stamp
 #  风机编号字段
-turbine_col: wind_turbine_number
+wind_col: wind_turbine_number
 # 表数据和数据文件的映射关系
 trans_col:
+  # 风机编号  $file[0:3]获取文件名的前3位
+  wind_turbine_number: $file[0:3]
   # 时间戳
-  time_stamp: 时间戳
+  time_stamp: Time
   # 有功功率
   active_power: 有功功率
   # 风轮转速
-  rotor_speed: 轮转速
+  rotor_speed: 轮转速
   # 发电机转速
   generator_speed: 发电机转速
   # 风速
-  wind_velocity: 风速
+  wind_velocity: 实际风速
   # 桨距角1
-  pitch_angle_blade_1: 桨距角1
+  pitch_angle_blade_1: 桨叶角度A
   # 桨距角2
-  pitch_angle_blade_2: 桨距角2
+  pitch_angle_blade_2: 桨叶角度B
   # 桨距角3
-  pitch_angle_blade_3: 桨距角3
+  pitch_angle_blade_3: 桨叶角度C
   # 绝对风向
   true_wind_direction: 绝对风向
   # 对风角度
-  yaw_error1: 对风角度
+  yaw_error1: 风向1s
   # 机舱位置
   cabin_position: 机舱位置
   # 环境温度
-  outside_cabin_temperature: null
+  outside_cabin_temperature: 机舱外温度
   # 有功功率设定值
-  set_value_of_active_power: 有功功率设定值
+  set_value_of_active_power: 有功设定反馈
   # 齿轮箱油温
-  gearbox_oil_temperature: 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温
   # 主轴承轴承温度
-  main_bearing_temperature: null
+  main_bearing_temperature: 主轴轴承温度
   # 齿轮箱高速轴轴承温度
-  gearbox_high-speed_shaft__bearing_temperature: null
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴轴承(风轮侧)温度
   # 齿轮箱中速轴轴承温度
   gearboxmedium_speed_shaftbearing_temperature: null
   # 齿轮箱低速轴轴承温度
-  gearbox_low-speed_shaft_bearing_temperature: null
+  gearbox_low_speed_shaft_bearing_temperature: null
   # 发电机驱动端轴承温度
-  generatordrive_end_bearing_temperature: 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机轴承温度
   # 发电机非驱动端轴承温度
-  generatornon_drive_end_bearing_temperature: 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机轴承温度
   # 发电机绕组1温度
-  generator_winding1_temperature: 发电机绕组1温度
+  generator_winding1_temperature: 发电机定子绕组温度
   # 发电机绕组2温度
   generator_winding2_temperature: null
   # 发电机绕组3温度
   generator_winding3_temperature: null
   # 风机状态1
-  wind_turbine_status: 风机状态1
+  wind_turbine_status: 设备主要状态
   # 风机状态2
   wind_turbine_status2: null
   # 机舱内温度
   cabin_temperature: 机舱内温度
   # 湍流强度
-  turbulence_intensity: null
+  turbulence_intensity: 湍流强度
   # 扭缆角度
-  twisted_cable_angle: 扭缆角度
+  twisted_cable_angle: 电缆扭角
   # 机舱前后振动
-  front_back_vibration_of__the_cabin: 机舱前后振动
+  front_back_vibration_of_the_cabin: 塔筒前后振动
   # 机舱左右振动
-  side_to_side_vibration__0f_the_cabin: 机舱左右振动
-  # 风机编号
-  wind_turbine_number: 风机编号
+  side_to_side_vibration_of_the_cabin: 塔筒左右振动
   # 实际力矩
   actual_torque: 实际力矩
   # 给定力矩
@@ -96,4 +98,3 @@ trans_col:
   # 变频器转速(主控)
   inverter_speed_master_control: 变频器转速(主控)
 
-

+ 109 - 0
config_files/昌平坳风场/minute.yaml

@@ -0,0 +1,109 @@
+wind_full_name: 昌平坳风场-贵州-大唐
+#  是否是竖表
+is_vertical_table: False
+# 是否需要合并列,吉山风电场-重庆海装处理
+merge_columns: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 转化的类名是否需要处理 比如 重庆还装表头自带 风机名称
+#trans_col_exec: column[column.find('_')+1:]
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号 文件夹中是风机编号,压缩包的话会多一层目录
+  # eg: /data/download/xx/风机001/cc.csv $folder[1]
+  # eg: /data/download/xx/风机001/unzip_files/cc.csv $folder[2]
+  # 如果不规则,则配置下方的 wind_name_exec
+  wind_turbine_number: 风机
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: 平均功率
+  # 风轮转速
+  rotor_speed: null
+  # 发电机转速
+  generator_speed: 平均发电机转速
+  # 风速
+  wind_velocity: 平均风速(m/s)
+  # 桨距角1
+  pitch_angle_blade_1: 平均桨叶1角度
+  # 桨距角2
+  pitch_angle_blade_2: 平均桨叶2角度
+  # 桨距角3
+  pitch_angle_blade_3: 平均桨叶3角度
+  # 绝对风向
+  true_wind_direction: 平均风角度(对北)
+  # 对风角度
+  yaw_error1: 平均风向(机舱)
+  # 机舱位置
+  cabin_position: 平均机舱角度
+  # 环境温度
+  outside_cabin_temperature: null
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: 平均齿轮箱油温
+  # 主轴承轴承温度
+  main_bearing_temperature: 平均主轴轴承a温度
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 平均齿轮箱非驱动端轴承温度
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: 平均齿轮箱驱动端轴承温度
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 平均发电机驱动端轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 平均发电机非驱动端轴承温度
+  # 发电机绕组1温度
+  generator_winding1_temperature: 平均发电机u相温度
+  # 发电机绕组2温度
+  generator_winding2_temperature: 平均发电机v相温度
+  # 发电机绕组3温度
+  generator_winding3_temperature: 平均发电机w相温度
+  # 风机状态1
+  wind_turbine_status: null
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 平均机舱温度
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: 平均扭缆角度
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: 最大x方向振动值
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: 最大y方向振动值
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: 平均转矩设定值
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "win_name.replace('#','')"

+ 110 - 0
config_files/昌平坳风场/second.yaml

@@ -0,0 +1,110 @@
+wind_full_name: 昌平坳风场-贵州-大唐
+#  是否是竖表
+is_vertical_table: False
+# 是否需要合并列,吉山风电场-重庆海装处理
+merge_columns: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 转化的类名是否需要处理 比如 重庆还装表头自带 风机名称
+#trans_col_exec: column[column.find('_')+1:]
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号 文件夹中是风机编号,压缩包的话会多一层目录
+  # eg: /data/download/xx/风机001/cc.csv $folder[1]
+  # eg: /data/download/xx/风机001/unzip_files/cc.csv $folder[2]
+  # 如果不规则,则配置下方的 wind_name_exec
+  wind_turbine_number: 风机编号
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: iGenPower
+  # 风轮转速
+  rotor_speed: iRotorSpeedPDM
+  # 发电机转速
+  generator_speed: iGenSpeed
+  # 风速
+  wind_velocity: iWindSpeed_real
+  # 桨距角1
+  pitch_angle_blade_1: iPitchAngle1
+  # 桨距角2
+  pitch_angle_blade_2: iPitchAngle2
+  # 桨距角3
+  pitch_angle_blade_3: iPitchAngle3
+  # 绝对风向
+  true_wind_direction: iwindDirection
+  # 对风角度
+  yaw_error1: iVaneDiiection
+  # 机舱位置
+  cabin_position: iNacellePositionLtd
+  # 环境温度
+  outside_cabin_temperature: iTempOutdoor_1sec
+  # 有功功率设定值
+  set_value_of_active_power: iActivePoweiSetPointValue
+  # 齿轮箱油温
+  gearbox_oil_temperature: iTemp1GearOil_1sec
+  # 主轴承轴承温度
+  main_bearing_temperature: iTempRotorBearA_1sec
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: iTempGearBearNDE_1sec
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: iTempGearBearDE_1sec
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: iTempGenBearDE_1sec
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: iTempGenBearNDE_1sec
+  # 发电机绕组1温度
+  generator_winding1_temperature: iTempGenStatorU_1sec
+  # 发电机绕组2温度
+  generator_winding2_temperature: iTempGenStatorV_1sec
+  # 发电机绕组3温度
+  generator_winding3_temperature: iTempGenStatorW_1sec
+  # 风机状态1
+  wind_turbine_status: iTurbineOperationMode
+  # 风机状态2
+  wind_turbine_status2: iPowerLimit_Flag
+  # 机舱内温度
+  cabin_temperature: iTempNacelle_1sec
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: iVibrationZ
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: iVibrationY
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: iCAN_GeneratorTorqueSetPointValue
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "win_name.replace('20049','')"
+

+ 102 - 0
config_files/昌西一风电场/minute.yaml

@@ -0,0 +1,102 @@
+wind_full_name: 昌西一风电场-甘肃-大唐
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  #风机编号
+  wind_turbine_number: $file
+  #时间戳
+  time_stamp: 系统时间
+  #有功功率
+  active_power: 有功功率[kw]
+  #风轮转速
+  rotor_speed: null
+  #发电机转速
+  generator_speed: 发电机平均转速[rpm]
+  #风速
+  wind_velocity: 1秒平均风速[m/s]
+  #桨距角1
+  pitch_angle_blade_1: 叶片1角度[°]
+  #桨距角2
+  pitch_angle_blade_2: 叶片2角度[°]
+  #桨距角3
+  pitch_angle_blade_3: 叶片3角度[°]
+  #机舱位置
+  cabin_position: 平均机舱位置[°]
+  #绝对风向
+  true_wind_direction: 风向[°]
+  #对风角度
+  yaw_error1: null
+  #有功功率设定值
+  set_value_of_active_power: null
+  #齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温[℃]
+  #发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 驱动方向发电机轴承温度[℃]
+  #发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 非驱动方向发电机轴承温度[℃]
+  #风机状态1
+  wind_turbine_status: null
+  #风机状态2
+  wind_turbine_status2: null
+  #机舱内温度
+  cabin_temperature: 机舱内温度[℃]
+  #扭缆角度
+  twisted_cable_angle: null
+  #机舱前后振动
+  front_back_vibration_of_the_cabin: null
+  #机舱左右振动
+  side_to_side_vibration_of_the_cabin: null
+  #实际力矩
+  actual_torque: null
+  #给定力矩
+  given_torque: null
+  #顺时针偏航次数
+  clockwise_yaw_count: null
+  #逆时针偏航次数
+  counterclockwise_yaw_count: null
+  #不可利用
+  unusable: null
+  #功率曲线可用
+  power_curve_available: null
+  #齿轮箱转速
+  required_gearbox_speed: null
+  #变频器转速(主控)
+  inverter_speed_master_control: null
+  #环境温度
+  outside_cabin_temperature: 环境温度[℃]
+  #主轴承轴承温度
+  main_bearing_temperature: null
+  #齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱轴承温度[℃]
+  #齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  #齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: null
+  #发电机绕组1温度
+  generator_winding1_temperature: null
+  #发电机绕组2温度
+  generator_winding2_temperature: null
+  #发电机绕组3温度
+  generator_winding3_temperature: null
+  #湍流强度
+  turbulence_intensity: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "wind_name.replace('昌马','').replace('风机','')"

+ 103 - 0
config_files/昌西一风电场/second.yaml

@@ -0,0 +1,103 @@
+wind_full_name: 昌西一风电场-甘肃-大唐
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号
+  wind_turbine_number: 风机
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: 有功功率
+  # 风轮转速
+  rotor_speed: 叶轮转速
+  # 发电机转速
+  generator_speed: 发电机转速
+  # 风速
+  wind_velocity: 环境风速
+  # 桨距角1
+  pitch_angle_blade_1: 叶片1变桨角度
+  # 桨距角2
+  pitch_angle_blade_2: 叶片2变桨角度
+  # 桨距角3
+  pitch_angle_blade_3: 叶片3变桨角度
+  # 绝对风向
+  true_wind_direction: 环境风向
+  # 对风角度
+  yaw_error1: 对风角度
+  # 机舱位置
+  cabin_position: 机舱位置
+  # 环境温度
+  outside_cabin_temperature: 环境温度
+  # 有功功率设定值
+  set_value_of_active_power: 有功设定反馈值
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油温
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴承温度
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机驱动侧轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机非驱动侧轴承温度
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机1相温度
+  # 发电机绕组2温度
+  generator_winding2_temperature: 发电机2相温度
+  # 发电机绕组3温度
+  generator_winding3_temperature: 发电机3相温度
+  # 风机状态1
+  wind_turbine_status: 风机子状态
+  # 风机状态2
+  wind_turbine_status2: 转集团标准状态
+  # 机舱内温度
+  cabin_temperature: 机舱温度
+  # 湍流强度
+  turbulence_intensity: 湍流强度
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: Y方向振动幅度
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: X方向振动幅度
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "wind_name.replace('昌马','').replace('风机','')"
+

+ 103 - 0
config_files/虹梯官风电场/minute.yaml

@@ -0,0 +1,103 @@
+wind_full_name: 虹梯官风电场-山西-大唐
+#  是否是竖表
+is_vertical_table: True
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  #风机编号
+  wind_turbine_number: 资产名称
+  #时间戳
+  time_stamp: 时间
+  #有功功率
+  active_power: 电网有功功率
+  #风轮转速
+  rotor_speed: 主轴转速
+  #发电机转速
+  generator_speed: 发电机转速
+  #风速
+  wind_velocity: 风速
+  #桨距角1
+  pitch_angle_blade_1: 桨叶1电机位置
+  #桨距角2
+  pitch_angle_blade_2: 桨叶2电机位置
+  #桨距角3
+  pitch_angle_blade_3: 桨叶3电机位置
+  #机舱位置
+  cabin_position: 机舱偏北位置
+  #绝对风向
+  true_wind_direction: 30s绝对风向
+  #对风角度
+  yaw_error1: 30s机舱风向夹角
+  #有功功率设定值
+  set_value_of_active_power: 当前有功设定值
+  #齿轮箱油温
+  gearbox_oil_temperature: 齿轮油油温
+  #发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机驱动端温度
+  #发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机非驱动端温度
+  #风机状态1
+  wind_turbine_status: 风机主状态
+  #风机状态2
+  wind_turbine_status2: null
+  #机舱内温度
+  cabin_temperature: 机舱温度
+  #扭缆角度
+  twisted_cable_angle: null
+  #机舱前后振动
+  front_back_vibration_of_the_cabin: null
+  #机舱左右振动
+  side_to_side_vibration_of_the_cabin: null
+  #实际力矩
+  actual_torque: 变流器实际转矩(计算)
+  #给定力矩
+  given_torque: 主控转矩设定值
+  #顺时针偏航次数
+  clockwise_yaw_count: null
+  #逆时针偏航次数
+  counterclockwise_yaw_count: null
+  #不可利用
+  unusable: null
+  #功率曲线可用
+  power_curve_available: null
+  #齿轮箱转速
+  required_gearbox_speed: null
+  #变频器转速(主控)
+  inverter_speed_master_control: null
+  #环境温度
+  outside_cabin_temperature: 环境温度
+  #主轴承轴承温度
+  main_bearing_temperature: 主轴温度
+  #齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴温度
+  #齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  #齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: 齿轮箱低速轴温度
+  #发电机绕组1温度
+  generator_winding1_temperature: 发电机线圈1温度
+  #发电机绕组2温度
+  generator_winding2_temperature: 发电机线圈2温度
+  #发电机绕组3温度
+  generator_winding3_temperature: 发电机线圈3温度
+  #湍流强度
+  turbulence_intensity: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "'C'+wind_name.replace('号风机','').zfill(3)"
+

+ 102 - 0
config_files/虹梯官风电场/second.yaml

@@ -0,0 +1,102 @@
+wind_full_name: 虹梯官风电场-山西-大唐
+#  是否是竖表
+is_vertical_table: True
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号
+  wind_turbine_number: 资产名称
+  # 时间戳
+  time_stamp: 数据时间
+  # 有功功率
+  active_power: 发电机有功功率
+  # 风轮转速
+  rotor_speed: 风轮转速
+  # 发电机转速
+  generator_speed: 发电机转速
+  # 风速
+  wind_velocity: 风速
+  # 桨距角1
+  pitch_angle_blade_1: 桨叶片角度1
+  # 桨距角2
+  pitch_angle_blade_2: 桨叶片角度2
+  # 桨距角3
+  pitch_angle_blade_3: 桨叶片角度3
+  # 绝对风向
+  true_wind_direction: 风向
+  # 对风角度
+  yaw_error1: 机舱与风向夹角
+  # 机舱位置
+  cabin_position: 机舱角度(位置)
+  # 环境温度
+  outside_cabin_temperature: 舱外温度
+  # 有功功率设定值
+  set_value_of_active_power: null
+  # 齿轮箱油温
+  gearbox_oil_temperature: 齿轮箱油池温度
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 齿轮箱高速轴非驱动端轴承温度
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: null
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 发电机驱动端轴承温度
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 发电机非驱动端轴承温度
+  # 发电机绕组1温度
+  generator_winding1_temperature: null
+  # 发电机绕组2温度
+  generator_winding2_temperature: null
+  # 发电机绕组3温度
+  generator_winding3_temperature: null
+  # 风机状态1
+  wind_turbine_status: 厂家风机状态
+  # 风机状态2
+  wind_turbine_status2: null
+  # 机舱内温度
+  cabin_temperature: 舱内温度
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: null
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: null
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: null
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "'C'+wind_name.replace('号风机','').zfill(3)"

+ 106 - 0
config_files/长清风电场/second.yaml

@@ -0,0 +1,106 @@
+wind_full_name: 长清风电场-山东-国电
+#  是否是竖表
+is_vertical_table: False
+#  如果是怎配置竖表需要查询的字段
+vertical_table_conf:
+  read_cols:
+    - 数据时间
+    - 资产名称
+    - 采集点名称
+    - 数值
+  index_cols:
+    - 数据时间
+    - 资产名称
+  col_key: 采集点名称
+  col_value: 数值
+#时间字段,转化年月日 以及格式化为 yyyy-MM-dd HH:mm:ss使用
+time_col: time_stamp
+#  风机编号字段
+wind_col: wind_turbine_number
+# 表数据和数据文件的映射关系
+trans_col:
+  # 风机编号
+  wind_turbine_number: 风机名称
+  # 时间戳
+  time_stamp: 时间
+  # 有功功率
+  active_power: 有功功率(kW)
+  # 风轮转速
+  rotor_speed: 风轮转速(rpm)
+  # 发电机转速
+  generator_speed: 发电机转速(rpm)
+  # 风速
+  wind_velocity: 30秒平均风速(m/s)
+  # 桨距角1
+  pitch_angle_blade_1: 桨距角1(°)
+  # 桨距角2
+  pitch_angle_blade_2: 桨距角2(°)
+  # 桨距角3
+  pitch_angle_blade_3: 桨距角3(°)
+  # 绝对风向
+  true_wind_direction: null
+  # 对风角度
+  yaw_error1: 风向角(°)
+  # 机舱位置
+  cabin_position: 机舱位置(°)
+  # 环境温度
+  outside_cabin_temperature: 舱外温度(℃)
+  # 有功功率设定值
+  set_value_of_active_power: 有功设定值
+  # 齿轮箱油温
+  gearbox_oil_temperature: null
+  # 主轴承轴承温度
+  main_bearing_temperature: null
+  # 齿轮箱高速轴轴承温度
+  gearbox_high_speed_shaft_bearing_temperature: 高速轴承温度(℃)
+  # 齿轮箱中速轴轴承温度
+  gearboxmedium_speed_shaftbearing_temperature: null
+  # 齿轮箱低速轴轴承温度
+  gearbox_low_speed_shaft_bearing_temperature: 低速轴承温度(℃)
+  # 发电机驱动端轴承温度
+  generatordrive_end_bearing_temperature: 驱动前轴承温度(℃)
+  # 发电机非驱动端轴承温度
+  generatornon_drive_end_bearing_temperature: 自由后轴承温度(℃)
+  # 发电机绕组1温度
+  generator_winding1_temperature: 发电机定子U温度(℃)
+  # 发电机绕组2温度
+  generator_winding2_temperature: 发电机定子V温度(℃)
+  # 发电机绕组3温度
+  generator_winding3_temperature: 发电机定子W温度(℃)
+  # 风机状态1
+  wind_turbine_status: null
+  # 风机状态2
+  wind_turbine_status2: 限功率运行
+  # 机舱内温度
+  cabin_temperature: null
+  # 湍流强度
+  turbulence_intensity: null
+  # 扭缆角度
+  twisted_cable_angle: 扭缆位置(°)
+  # 机舱前后振动
+  front_back_vibration_of_the_cabin: 机舱振动传感器Y(g)
+  # 机舱左右振动
+  side_to_side_vibration_of_the_cabin: 机舱振动传感器X(g)
+  # 实际力矩
+  actual_torque: null
+  # 给定力矩
+  given_torque: null
+  # 顺时针偏航次数
+  clockwise_yaw_count: null
+  # 逆时针偏航次数
+  counterclockwise_yaw_count: null
+  # 不可利用
+  unusable: null
+  # 功率曲线可用
+  power_curve_available: null
+  # 齿轮箱转速
+  required_gearbox_speed: null
+  # 变频器转速(主控)
+  inverter_speed_master_control: null
+  # 偏航激活状态 (param1-param10 预留字段,每个电厂自己定于对应的含义)
+  param1: 偏航激活状态
+
+# 自定义风机编号映射 必须是 wind_name
+wind_name_exec: "'A'+wind_name.replace('号风机','').zfill(3)"
+
+

+ 0 - 37
dianchang/changqing/main.py

@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Time    : 2024/5/16
-# @Author  : 魏志亮
-from base.TranseParam import TranseParam
-from base.WindFarms import WindFarms
-from utils.log.trans_log import logger
-
-wind = WindFarms('测试', 'ceshi', '123123', r'd://transdata_1')
-
-try:
-    param = TranseParam(read_type='min', read_path=r'D:\transdata\huachuang\min', read_cols=['风机编号', '时间', '风机主状态',
-                                                                                             '发电机转速', '发电机转速限定值',
-                                                                                             '变桨速度给定值',
-                                                                                             '桨叶角度给定值', '桨叶1电机位置',
-                                                                                             '桨叶2电机位置', '桨叶3电机位置'],
-                        cols_tran={},
-                        time_col='时间', wind_col='风机编号')
-
-    wind.set_trans_param(param)
-
-    wind.run()
-except Exception as e:
-    logger.exception(e)
-
-try:
-    param = TranseParam(read_type='sec', read_path=r'D:\transdata\zhaoyuan\sec', read_cols=['风机编号', '时间', '风机主状态',
-                                                                                            '发电机转速', '发电机转速限定值',
-                                                                                            '变桨速度给定值',
-                                                                                            '桨叶角度给定值', '桨叶1电机位置',
-                                                                                            '桨叶2电机位置', '桨叶3电机位置'],
-                        cols_tran={},
-                        time_col='时间', wind_col='风机编号')
-
-    wind.set_trans_param(param)
-    wind.run()
-except Exception as e:
-    logger.exception(e)

+ 0 - 35
dianchang/zhaoyuan/main.py

@@ -1,35 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Time    : 2024/5/15
-# @Author  : 魏志亮
-import datetime
-import os
-import sys
-
-# path = os.path.abspath(__file__)
-# for i in range(3):
-#     path = os.path.dirname(path)
-sys.path.append("/home/wzl/project/energy-data-trans")
-print('\n'.join(sys.path))
-
-from base.WindFarms import WindFarms
-from base.TranseParam import TranseParam
-from utils.conf.read_conf import read_yaml_file
-
-if __name__ == '__main__':
-    batch_no = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
-    name = '招远'
-    type = 'sec'
-
-    yaml_datas = read_yaml_file(name, type)
-
-    cols_trans_all = yaml_datas['trans_col']
-    read_cols = [k for k, v in cols_trans_all.items() if k]
-
-    zhaoyuan = WindFarms(name, batch_no=batch_no, save_path=r"/home/wzl/trans_data")
-
-    params = TranseParam(read_type="sec", read_path=r"/home/wzl/test_data/zhaoyuan/sec",
-                         read_cols=read_cols,
-                         cols_tran=cols_trans_all)
-
-    zhaoyuan.set_trans_param(params)
-    files = zhaoyuan.run()

+ 11 - 0
utils/conf/generate_trans_cols.py

@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/31
+# @Author  : 魏志亮
+import numpy as np
+import pandas as pd
+
+df = pd.read_excel(r"C:\Users\Administrator\Desktop\点检表配置.xls", sheet_name="昌西一风电场-甘肃-大唐")
+
+df.replace(np.nan, 'null', inplace=True)
+for cn, en, sec, min in zip(df['中文名'], df['英文名'], df['秒级映射'], df['分钟级映射']):
+    print(f"\t#{cn}\n\t{en}: {min}")

+ 23 - 12
utils/conf/read_conf.py

@@ -2,7 +2,6 @@
 # @Time    : 2024/5/17
 # @Author  : 魏志亮
 import os.path
-from pprint import pprint
 
 import yaml
 
@@ -15,16 +14,28 @@ def read_yaml_file(filename, type):
         return yaml.safe_load(f)
 
 
-if __name__ == '__main__':
-    data = read_yaml_file("招远", "sec")
-    ss_str = """
-    时间戳,有功功率,风轮转速,发电机转速,风速,风速1,风速2,风速3,风速4,风速5,风速6,风速7,桨距角1,桨距角2,桨距角3,桨距角4,桨距角5,桨距角6,桨距角7,机舱位置,绝对风向,对风角度,风向10s,风向30s,风向标1风向,风向标2风向,环 境温度,有功功率设定值,齿轮箱油温,主轴承轴承温度1,主轴承轴承温度2,齿轮箱高速轴轴承温度1,齿轮箱高速轴轴承温度2,发电机驱动端轴承温度,发电机非驱动端轴承温度,发电机绕组1温度,风机状态1,机舱内温度,湍流强度,扭缆角度, 机舱前后振动,机舱左右振动,实际力矩,给定力矩,顺时针偏航次数,逆时针偏航次数,不可利用,功率曲线可用,齿轮箱转速,变频器转速(主控),年,月,日,风机编号
-    """
-
-    ss_datas = [str(i).strip() for i in ss_str.split(",") if i]
+def read_param_from_yaml_file(yaml_file, type, default_value=None):
+    if type in yaml_file:
+        return yaml_file[type]
+    else:
+        return default_value
 
-    exist_datas = [i for i in data['trans_col'].values() if i]
 
-    print(set(exist_datas) - set(ss_datas))
-
-    pprint(data)
+if __name__ == '__main__':
+    data = read_yaml_file("昌西一风电场", "second")
+    trans_col = data['trans_col']
+    result_col = ['wind_turbine_number', 'time_stamp', 'active_power', 'rotor_speed', 'generator_speed',
+                  'wind_velocity', 'pitch_angle_blade_1', 'pitch_angle_blade_2', 'pitch_angle_blade_3',
+                  'cabin_position', 'true_wind_direction', 'yaw_error1', 'set_value_of_active_power',
+                  'gearbox_oil_temperature', 'generatordrive_end_bearing_temperature',
+                  'generatornon_drive_end_bearing_temperature', 'wind_turbine_status', 'wind_turbine_status2',
+                  'cabin_temperature', 'twisted_cable_angle', 'front_back_vibration_of_the_cabin',
+                  'side_to_side_vibration_of_the_cabin', 'actual_torque', 'given_torque', 'clockwise_yaw_count',
+                  'counterclockwise_yaw_count', 'unusable', 'power_curve_available', 'required_gearbox_speed',
+                  'inverter_speed_master_control', 'outside_cabin_temperature', 'main_bearing_temperature',
+                  'gearbox_high_speed_shaft_bearing_temperature', 'gearboxmedium_speed_shaftbearing_temperature',
+                  'gearbox_low_speed_shaft_bearing_temperature', 'generator_winding1_temperature',
+                  'generator_winding2_temperature', 'generator_winding3_temperature', 'turbulence_intensity']
+
+    for col in result_col:
+        print((trans_col[col] if trans_col[col] is not None else 'null'))

+ 218 - 73
utils/db/trans_mysql.py

@@ -1,82 +1,227 @@
 # -*- coding: utf-8 -*-
 # @Time    : 2024/5/15
 # @Author  : 魏志亮
+import datetime
 
+import numpy as np
+import pandas as pd
 # 建立数据库连接
 import pymysql
-from sqlalchemy import create_engine
+from pandas import DataFrame
+from sqlalchemy import create_engine, Engine, text
 
 from utils.log.trans_log import trans_print
-
-user = 'admin'
-password = 'admin123456'
-host = '192.168.50.233'
-port = 3306
-database = 'energy_data'
-plt_conn = pymysql.connect(
-    host=host,
-    port=3306,
-    user=user,
-    password=password,
-    database=database,
-    charset='utf8mb4'
-)
-
-engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}:{port}/{database}', echo=True)
-
-susscess_sql = "update batch_status set status = 'success' where batch_no = '{batch_no}' and trans_type = '{trans_type}'";
-
-error_sql = "update batch_status set status = 'error',message='{message}' where batch_no = '{batch_no}'  and trans_type = '{trans_type}'"
-
-
-# def __query(sql):
-#     trans_print('开始执行SQL:',sql)
-#     plt_conn.ping(reconnect=True)
-#     with plt_conn.cursor() as cursor:
-#         df = pd.read_sql(sql, cursor)
-#     return df
-#
-#
-# def __ddl_sql(sql):
-#     trans_print('开始执行SQL:',sql)
-#     plt_conn.ping(reconnect=True)
-#     with plt_conn.cursor() as cursor:
-#         cursor.execute(sql)
-#     plt_conn.commit()
-
-
-def update_transe_status(batch_no, trans_type, status, message):
-    exec_sql = susscess_sql if status == 'success' else error_sql
-    exec_sql = exec_sql.format(batch_no=batch_no, status=status, message=message, trans_type=trans_type)
-    #
-    # plt_conn.ping(reconnect=True)
-    # with plt_conn.cursor() as cursor:
-    #     cursor.execute(exec_sql)
-    # plt_conn.commit()
-    trans_print(exec_sql)
-
-
-# def insert_data(batch_no, type, status, message):
-#     exec_sql = insert_sql.format(batch_no=batch_no, type=type, status=status, message=message)
-#     plt_conn.ping(reconnect=True)
-#     with plt_conn.cursor() as cursor:
-#         cursor.execute(exec_sql)
-#
-#     plt_conn.commit()
-
-
-def create_table(batch_no, date_list=list(), fengji_list=list()):
-    pass
-
-
+from utils.trans_methods import read_file_to_df
+
+plt_user = 'admin'
+plt_password = 'admin123456'
+plt_host = '192.168.50.233'
+plt_port = 3306
+plt_database = 'energy'
+
+plt_connect = pymysql.connect(host=plt_host,
+                              user=plt_user,
+                              password=plt_password,
+                              db=plt_database,
+                              charset='utf8mb4',
+                              cursorclass=pymysql.cursors.DictCursor)
+
+trans_user = 'admin'
+trans_password = 'admin123456'
+trans_host = '192.168.50.233'
+trans_port = 3306
+trans_database = 'energy_data'
+
+trans_connect = pymysql.connect(host=trans_host,
+                                user=trans_user,
+                                password=trans_password,
+                                db=trans_database,
+                                charset='utf8mb4',
+                                cursorclass=pymysql.cursors.DictCursor)
+
+susscess_sql = "update data_transfer set transfer_state = 1 where batch_code = '{batch_no}' and transfer_type = '{trans_type}'";
+
+error_sql = "update data_transfer set transfer_state = 2 ,err_info='{message}' where batch_code = '{batch_no}'  and transfer_type = '{trans_type}'"
+
+running_sql = "update data_transfer set transfer_state = 0,engine_count = {wind_count}  where batch_code = '{batch_no}'  and transfer_type = '{trans_type}'"
+
+
+def __query(connect: pymysql.Connection, sql):
+    trans_print('开始执行SQL:', sql)
+    connect.ping(reconnect=True)
+    with connect.cursor() as cursor:
+        cursor.execute(sql)
+        datas = cursor.fetchall()
+        df = pd.DataFrame(datas)
+    connect.close()
+    return df
+
+
+def __ddl_sql(connect: pymysql.Connection, sql):
+    trans_print('开始执行SQL:', sql)
+    connect.ping(reconnect=True)
+    with connect.cursor() as cursor:
+        cursor.execute(sql)
+    connect.commit()
+    connect.close()
+
+
+def update_trans_status(batch_no, trans_type, status, message="", wind_count=0):
+    if status == 'success':
+        exec_sql = susscess_sql
+    elif status == 'running':
+        exec_sql = running_sql
+    else:
+        exec_sql = error_sql
+
+    exec_sql = exec_sql.format(batch_no=batch_no, status=status, message=message, trans_type=trans_type,
+                               wind_count=wind_count)
+    __ddl_sql(plt_connect, exec_sql)
+
+
+# 获取执行的数据
 def get_exec_data():
-    query_running_sql = "selecgt 1 from table where status = 'running"
-    query_next_exdc_sql = "selecgt 1 from table where status = 'waiting' order by id "
-    trans_print(query_next_exdc_sql)
-    # df = __query(query_running_sql)
-    # if df.empty:
-    #     df = __query(query_next_exdc_sql)
-    #     if df.empty:
-    #         return None
-    #     else:
-    #         return df.iloc[0]
+    query_running_sql = "select 1 from data_transfer where transfer_state = 0"
+    query_next_exdc_sql = """
+    SELECT
+        t.*,a.field_name
+    FROM
+        data_transfer t INNER JOIN wind_field a on t.field_code = a.field_code
+    WHERE
+        t.transfer_state = -1
+    AND t.transfer_file_addr != ''
+    ORDER BY
+        t.update_time
+    LIMIT 1
+    """
+    df = __query(plt_connect, query_running_sql)
+    if df.empty:
+        df = __query(plt_connect, query_next_exdc_sql)
+        return df
+    else:
+        return None
+
+
+def get_all_wind(field_code):
+    query_sql = f"select engine_code,engine_name from wind_engine_group where field_code = '{field_code}' and del_state = 1"
+    df = __query(plt_connect, query_sql)
+    dict_datas = dict()
+    if df.empty:
+        return dict_datas
+    else:
+        for engine_code, engine_name in zip(df['engine_code'], df['engine_name']):
+            dict_datas[engine_name] = engine_code
+        return dict_datas
+
+
+def creat_table_and_add_partition(table_name, count, read_type):
+    query_table = f"SELECT t.TABLE_NAME FROM information_schema.`TABLES` t where t.TABLE_NAME = '{table_name}'"
+    df = __query(trans_connect, query_table)
+    if df.empty:
+        create_sql = f"""
+        CREATE TABLE
+        IF NOT EXISTS `{table_name}` (
+            `wind_turbine_number` VARCHAR (20) DEFAULT NULL COMMENT '风机编号',
+            `time_stamp` datetime NOT NULL COMMENT '时间戳',
+            `active_power` DOUBLE DEFAULT NULL COMMENT '有功功率',
+            `rotor_speed` DOUBLE DEFAULT NULL COMMENT '风轮转速',
+            `generator_speed` DOUBLE DEFAULT NULL COMMENT '发电机转速',
+            `wind_velocity` DOUBLE DEFAULT NULL COMMENT '风速',
+            `pitch_angle_blade_1` DOUBLE DEFAULT NULL COMMENT '桨距角1',
+            `pitch_angle_blade_2` DOUBLE DEFAULT NULL COMMENT '桨距角2',
+            `pitch_angle_blade_3` DOUBLE DEFAULT NULL COMMENT '桨距角3',
+            `cabin_position` DOUBLE DEFAULT NULL COMMENT '机舱位置',
+            `true_wind_direction` DOUBLE DEFAULT NULL COMMENT '绝对风向',
+            `yaw_error1` DOUBLE DEFAULT NULL COMMENT '对风角度',
+            `set_value_of_active_power` DOUBLE DEFAULT NULL COMMENT '有功功率设定值',
+            `gearbox_oil_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱油温',
+            `generatordrive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机驱动端轴承温度',
+            `generatornon_drive_end_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '发电机非驱动端轴承温度',
+            `cabin_temperature` DOUBLE DEFAULT NULL COMMENT '机舱内温度',
+            `twisted_cable_angle` DOUBLE DEFAULT NULL COMMENT '扭缆角度',
+            `front_back_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱前后振动',
+            `side_to_side_vibration_of_the_cabin` DOUBLE DEFAULT NULL COMMENT '机舱左右振动',
+            `actual_torque` DOUBLE DEFAULT NULL COMMENT '实际力矩',
+            `given_torque` DOUBLE DEFAULT NULL COMMENT '给定力矩',
+            `clockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '顺时针偏航次数',
+            `counterclockwise_yaw_count` DOUBLE DEFAULT NULL COMMENT '逆时针偏航次数',
+            `unusable` BIGINT (20) DEFAULT NULL COMMENT '不可利用',
+            `power_curve_available` BIGINT (20) DEFAULT NULL COMMENT '功率曲线可用',
+            `required_gearbox_speed` DOUBLE DEFAULT NULL COMMENT '齿轮箱转速',
+            `inverter_speed_master_control` DOUBLE DEFAULT NULL COMMENT '变频器转速(主控)',
+            `outside_cabin_temperature` DOUBLE DEFAULT NULL COMMENT '环境温度',
+            `main_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '主轴承轴承温度',
+            `gearbox_high_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱高速轴轴承温度',
+            `gearboxmedium_speed_shaftbearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱中速轴轴承温度',
+            `gearbox_low_speed_shaft_bearing_temperature` DOUBLE DEFAULT NULL COMMENT '齿轮箱低速轴轴承温度',
+            `generator_winding1_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组1温度',
+            `generator_winding2_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组2温度',
+            `generator_winding3_temperature` DOUBLE DEFAULT NULL COMMENT '发电机绕组3温度',
+            `wind_turbine_status` BIGINT (20) DEFAULT NULL COMMENT '风机状态1',
+            `wind_turbine_status2` DOUBLE DEFAULT NULL COMMENT '风机状态2',
+            `turbulence_intensity` DOUBLE DEFAULT NULL COMMENT '湍流强度',
+            `year` INT (4) DEFAULT NULL COMMENT '年',
+            `month` INT (2) DEFAULT NULL COMMENT '月',
+            `day` INT (2) DEFAULT NULL COMMENT '日',
+            `param1` DOUBLE DEFAULT NULL COMMENT '预留1',
+            `param2` DOUBLE DEFAULT NULL COMMENT '预留2',
+            `param3` DOUBLE DEFAULT NULL COMMENT '预留3',
+            `param4` DOUBLE DEFAULT NULL COMMENT '预留4',
+            `param5` DOUBLE DEFAULT NULL COMMENT '预留5',
+            `param6` DOUBLE DEFAULT NULL COMMENT '预留6',
+            `param7` DOUBLE DEFAULT NULL COMMENT '预留7',
+            `param8` DOUBLE DEFAULT NULL COMMENT '预留8',
+            `param9` DOUBLE DEFAULT NULL COMMENT '预留9',
+            `param10` DOUBLE DEFAULT NULL COMMENT '预留10',
+             KEY `time_stamp` (`time_stamp`),
+             KEY `wind_turbine_number` (`wind_turbine_number`)
+        ) ENGINE = INNODB DEFAULT CHARSET = utf8mb4
+        """
+
+        if read_type == 'second':
+            create_sql = create_sql + f" PARTITION BY KEY (`wind_turbine_number`) PARTITIONS {count}"
+
+        __ddl_sql(trans_connect, create_sql)
+
+
+def rename_table(table_name, renamed_table_name):
+    rename_sql = f"RENAME TABLE {table_name} TO {renamed_table_name}"
+    try:
+        __ddl_sql(trans_connect, rename_sql)
+    except Exception as e:
+        trans_print(e)
+
+
+def read_excel_and_save_to_db(table_name: str, file_path, batch_count=20000):
+    df = read_file_to_df(file_path)
+    trans_print("开始保存文件", file_path, "到数据库")
+    begin = datetime.datetime.now()
+    save_df_to_db(trans_connect, table_name, df, batch_count)
+    trans_print("保存文件", file_path, "到数据库成功,耗时:", str(datetime.datetime.now() - begin))
+
+
+def save_df_to_db(connection: pymysql.Connection, table_name: str, df: DataFrame, batch_count=20000):
+    col_str = ",".join(df.columns)
+    data_s_str = ",".join(["%s"] * len(df.columns))
+
+    insert_sql = f"INSERT INTO {table_name} ({col_str}) values ({data_s_str})"
+
+    # 转化nan到null
+    df.replace(np.nan, None, inplace=True)
+
+    total_count = df.shape[0]
+    for i in range(0, total_count + 1, batch_count):
+        connection.ping(reconnect=True)
+        with connection.cursor() as cursor:
+            query_df = df.iloc[i:i + batch_count]
+            values = [tuple(data) for data in query_df.values]
+            cursor.executemany(insert_sql, values)
+        connection.commit()
+        connection.close()
+        trans_print("保存条数成功,总条数", str(total_count), "已完成条数:",
+                    str(total_count if (i + batch_count) > total_count else (i + batch_count)))
+
+
+if __name__ == '__main__':
+    df = get_exec_data()
+    print(df)

+ 65 - 0
utils/db/trans_pg.py

@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+# @Time    : 2024/5/15
+# @Author  : 魏志亮
+
+import pandas as pd
+# 建立数据库连接
+from sqlalchemy import create_engine, Engine, text
+
+from utils.log.trans_log import trans_print
+
+trans_user = 'postgres'
+trans_password = 'admin123456'
+trans_host = '192.168.50.235'
+trans_port = 5432
+trans_database = 'postgres'
+
+trans_engine = create_engine(
+    f'postgresql://{trans_user}:{trans_password}@{trans_host}:{trans_port}/{trans_database}', echo=True)
+
+
+def __query(engine: Engine, sql):
+    trans_print('开始执行SQL:', sql)
+    with engine.connect() as conn:
+        df = pd.read_sql(text(sql), conn)
+    return df
+
+
+def __ddl_sql(engine: Engine, sql):
+    trans_print('开始执行SQL:', sql)
+    with engine.connect() as conn:
+        conn.execute(text(sql))
+        conn.commit()
+
+
+def creat_table_and_add_partition(table_name, partition):
+    lower_partition = partition.lower()
+    query_table = f"""
+    select  c.relname
+    from pg_class c
+             join pg_inherits pi on pi.inhrelid = c. oid
+             join pg_class c2 on c2.oid = pi.inhparent
+    where
+            c2.relname = '{table_name}' and c.relname = '{table_name}_{lower_partition}'
+    """
+    df = __query(trans_engine, query_table)
+
+    if df.empty:
+        add_partition_sql = f"""
+            create table {table_name}_{partition} PARTITION OF {table_name} FOR VALUES  IN ('{partition}');
+        """
+        __ddl_sql(trans_engine, add_partition_sql)
+
+
+if __name__ == '__main__':
+    # creat_table_and_add_partition("test_11", "123_002")
+    df = __query(trans_engine, """
+    select  c.relname
+from pg_class c
+         join pg_inherits pi on pi.inhrelid = c. oid
+         join pg_class c2 on c2.oid = pi.inhparent
+where
+        c2.relname = 'energy_data_second' and c.relname = 'energy_data_second_202405201453_a006';
+    """)
+    print(df)
+    print(df.empty)

+ 17 - 9
utils/log/trans_log.py

@@ -9,17 +9,25 @@ import sys
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-logger.addHandler(logging.StreamHandler(sys.stdout))
+stout_handle = logging.StreamHandler(sys.stdout)
+stout_handle.setFormatter(logging.Formatter("%(asctime)s-%(levelname)s-%(filename)-8s:%(lineno)s: %(message)s"))
+stout_handle.setLevel(logging.INFO)
+logger.addHandler(stout_handle)
 
-log_path = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + '/logs'
-if not os.path.exists(log_path):
-    os.makedirs(log_path)
 
-file_handler = logging.FileHandler(log_path + '/dianchang_' + str(datetime.date.today()) + '.log', encoding='utf-8')
-file_handler.setFormatter(logging.Formatter("%(asctime)s-%(levelname)s-%(filename)-8s:%(lineno)s: %(message)s"))
-file_handler.setLevel(logging.INFO)
-logger.addHandler(file_handler)
+def init_log(batch_no, name, type):
+    log_path = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + '/logs'
+    file_path = os.path.join(log_path, str(name), str(batch_no), str(type))
+
+    if not os.path.exists(file_path):
+        os.makedirs(file_path, exist_ok=True)
+    file_name = file_path + os.sep + str(datetime.date.today()) + '.log'
+
+    file_handler = logging.FileHandler(file_name, encoding='utf-8')
+    file_handler.setFormatter(logging.Formatter("%(asctime)s-%(levelname)s-%(filename)-8s:%(lineno)s: %(message)s"))
+    file_handler.setLevel(logging.INFO)
+    logger.addHandler(file_handler)
 
 
 def trans_print(*args):
-    logger.info((str(datetime.datetime.now())) + " ".join([str(a) for a in args]))
+    logger.info("  ".join([str(a) for a in args]))

+ 56 - 12
utils/trans_methods.py

@@ -3,6 +3,7 @@
 # @Author  : 魏志亮
 import os
 import re
+import shutil
 import warnings
 
 import chardet
@@ -19,20 +20,44 @@ def detect_file_encoding(filename):
     with open(filename, 'rb') as f:
         rawdata = f.read(1000)
     result = chardet.detect(rawdata)
-    return result['encoding']
+    encoding = result['encoding']
+
+    trans_print("文件类型:", filename, encoding)
+
+    if encoding is None:
+        encoding = 'gb18030'
+
+    if encoding and encoding.lower() == 'gb2312' or encoding.lower().startswith("windows"):
+        encoding = 'gb18030'
+    return encoding
+
+
+def del_blank(df=pd.DataFrame(), cols=list()):
+    for col in cols:
+        if df[col].dtype == object:
+            df[col] = df[col].str.strip()
+    return df
 
 
 # 读取数据到df
 def read_file_to_df(file_path, read_cols=list()):
     trans_print('开始读取文件', file_path)
     df = pd.DataFrame()
-    encoding = detect_file_encoding(file_path)
-
-    if str(file_path).lower().endswith("csv"):
+    if str(file_path).lower().endswith("csv") or str(file_path).lower().endswith("gz"):
+        encoding = detect_file_encoding(file_path)
+        end_with_gz = str(file_path).lower().endswith("gz")
         if read_cols:
-            df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols)
+            if end_with_gz:
+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols, compression='gzip')
+            else:
+                df = pd.read_csv(file_path, encoding=encoding, usecols=read_cols)
         else:
-            df = pd.read_csv(file_path, encoding=encoding)
+
+            if end_with_gz:
+                df = pd.read_csv(file_path, encoding=encoding, compression='gzip')
+            else:
+                df = pd.read_csv(file_path, encoding=encoding)
+
     else:
         xls = pd.ExcelFile(file_path)
         # 获取所有的sheet名称
@@ -48,18 +73,19 @@ def read_file_to_df(file_path, read_cols=list()):
     return df
 
 
-def __build_directory_dict(directory_dict, path):
+def __build_directory_dict(directory_dict, path, filter_types=None):
     # 遍历目录下的所有项
     for item in os.listdir(path):
         item_path = os.path.join(path, item)
         if os.path.isdir(item_path):
-            __build_directory_dict(directory_dict, item_path)
+            __build_directory_dict(directory_dict, item_path, filter_types=filter_types)
         elif os.path.isfile(item_path):
             if path not in directory_dict:
                 directory_dict[path] = []
 
-            types = ['xls', 'xlsx', 'csv']
-            if str(item_path).split(".")[-1] in types:
+            if filter_types is None or len(filter_types) == 0:
+                directory_dict[path].append(item_path)
+            elif str(item_path).split(".")[-1] in filter_types:
                 if str(item_path).count("~$") == 0:
                     directory_dict[path].append(item_path)
 
@@ -69,18 +95,36 @@ def __build_directory_dict(directory_dict, path):
 # 读取路径下所有的excel文件
 def read_excel_files(read_path):
     directory_dict = {}
-    __build_directory_dict(directory_dict, read_path)
+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz'])
+
+    return [path for paths in directory_dict.values() for path in paths if path]
+
+
+# 读取路径下所有的文件
+def read_files(read_path):
+    directory_dict = {}
+    __build_directory_dict(directory_dict, read_path, filter_types=['xls', 'xlsx', 'csv', 'gz', 'zip', 'rar'])
 
     return [path for paths in directory_dict.values() for path in paths if path]
 
 
+def copy_to_new(from_path, to_path):
+    is_file = False
+    if to_path.count('.') > 0:
+        is_file = True
+
+    create_file_path(to_path, is_file_path=is_file)
+
+    shutil.copy(from_path, to_path)
+
+
 # 创建路径
 def create_file_path(path, is_file_path=False):
     if is_file_path:
         path = os.path.dirname(path)
 
     if not os.path.exists(path):
-        os.makedirs(path)
+        os.makedirs(path, exist_ok=True)
 
 
 # 格式化风机名称

+ 76 - 6
utils/zip/unzip.py

@@ -2,17 +2,33 @@
 # @Time    : 2024/5/17
 # @Author  : 魏志亮
 import os
+import zipfile
+
+import rarfile
+
 from utils.log.trans_log import trans_print, logger
 
-import zipfile
+
+def __support_gbk(zip_file: zipfile.ZipFile):
+    name_to_info = zip_file.NameToInfo
+    # copy map first
+    for name, info in name_to_info.copy().items():
+        real_name = name.encode('cp437').decode('gbk')
+        if real_name != name:
+            info.filename = real_name
+            del name_to_info[name]
+            name_to_info[real_name] = info
+    return zip_file
 
 
 def unzip(zip_filepath, dest_path):
     # 解压zip文件
     is_success = True
     trans_print('开始读取文件:', zip_filepath)
+    trans_print("解压到:", dest_path)
+
     try:
-        with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
+        with __support_gbk(zipfile.ZipFile(zip_filepath, 'r')) as zip_ref:
             zip_ref.extractall(dest_path)
     except zipfile.BadZipFile as e:
         logger.exception(e)
@@ -21,19 +37,73 @@ def unzip(zip_filepath, dest_path):
         trans_print('不是zip文件:', zip_filepath)
         return is_success, e
 
-        # 遍历解压后的文件
+    # 遍历解压后的文件
+    dest_path = dest_path
+    print('解压再次读取', dest_path)
     if is_success:
         for root, dirs, files in os.walk(dest_path):
             for file in files:
                 file_path = os.path.join(root, file)
                 # 检查文件是否是zip文件
                 if file_path.endswith('.zip'):
+                    if file_path.endswith('.csv.zip'):
+                        os.rename(file_path, file_path.replace(".csv.zip", ".csv.gz"))
+                    else:
+                        # 如果是,递归解压
+                        unzip(file_path, dest_path + os.sep + str(file).split(".")[0])
+                        # 删除已解压的zip文件(可选)
+                        os.remove(file_path)
+                    # 检查文件是否是zip文件
+                if file_path.endswith('.rar'):
                     # 如果是,递归解压
-                    unzip(file_path, dest_path + os.sep + str(file).split(".")[0])
+                    unrar(file_path, dest_path + os.sep + str(file).split(".")[0])
                     # 删除已解压的zip文件(可选)
                     os.remove(file_path)
+
     return is_success, ''
 
 
-if __name__ == '__main__':
-    unzip(r'C:\Users\Administrator\Desktop\test.zip', r'C:\Users\Administrator\Desktop\test')
+def unrar(rar_file_path, dest_dir):
+    # 检查目标目录是否存在,如果不存在则创建
+    # 解压zip文件
+    is_success = True
+    trans_print('开始读取文件:', rar_file_path)
+    dest_path = dest_dir
+    trans_print("解压到:", dest_path)
+    if not os.path.exists(dest_path):
+        os.makedirs(dest_path)
+
+    try:
+        # 打开RAR文件
+        with rarfile.RarFile(rar_file_path) as rf:
+            # 循环遍历RAR文件中的所有成员(文件和目录)
+            for member in rf.infolist():
+                # 解压文件到目标目录
+                rf.extract(member, dest_path)
+    except Exception as e:
+        logger.exception(e)
+        is_success = False
+        message = str(e)
+        trans_print('不是rar文件:', rar_file_path)
+        return is_success, e
+
+    # 遍历解压后的文件
+    print('解压再次读取', dest_path)
+    if is_success:
+        for root, dirs, files in os.walk(dest_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                # 检查文件是否是zip文件
+                if file_path.endswith('.rar'):
+                    # 如果是,递归解压
+                    unrar(file_path, file_path.split(".")[0])
+                    # 删除已解压的zip文件(可选)
+                    os.remove(file_path)
+
+                if file_path.endswith('.zip'):
+                    # 如果是,递归解压
+                    unzip(file_path, file_path.split(".")[0])
+                    # 删除已解压的zip文件(可选)
+                    os.remove(file_path)
+
+    return is_success, ''