|
@@ -8,24 +8,26 @@ import traceback
|
|
|
|
|
|
from etl.base.TranseParam import TranseParam
|
|
|
from service.plt_service import get_all_wind, update_trans_status_error, update_trans_status_running, \
|
|
|
- update_trans_status_success
|
|
|
-from service.trans_service import creat_table_and_add_partition, rename_table, save_file_to_db, drop_table
|
|
|
+ update_trans_status_success, update_trans_transfer_progress
|
|
|
+from service.trans_service import creat_table_and_add_partition, save_file_to_db, drop_table
|
|
|
+from utils.df_utils.util import get_time_space
|
|
|
from utils.file.trans_methods import *
|
|
|
from utils.zip.unzip import unzip, unrar, get_desc_path
|
|
|
|
|
|
|
|
|
class WindFarms(object):
|
|
|
|
|
|
- def __init__(self, batch_no=None, field_code=None, params: TranseParam = None, wind_full_name=None,
|
|
|
+ def __init__(self, batch_no=None, batch_name=None, field_code=None, params: TranseParam = None, wind_full_name=None,
|
|
|
save_db=True, header=0):
|
|
|
self.batch_no = batch_no
|
|
|
+ self.batch_name = batch_name
|
|
|
self.field_code = field_code
|
|
|
self.wind_full_name = wind_full_name
|
|
|
self.save_zip = False
|
|
|
self.trans_param = params
|
|
|
self.exist_wind_names = multiprocessing.Manager().list()
|
|
|
self.wind_col_trans = get_all_wind(self.field_code)
|
|
|
- self.batch_count = 50000
|
|
|
+ self.batch_count = 200000
|
|
|
self.save_path = None
|
|
|
self.save_db = save_db
|
|
|
self.lock = multiprocessing.Manager().Lock()
|
|
@@ -49,10 +51,11 @@ class WindFarms(object):
|
|
|
raise Exception("Invalid param set :" + arg)
|
|
|
|
|
|
def get_save_path(self):
|
|
|
- return os.path.join(self.save_path, self.batch_no, self.trans_param.read_type)
|
|
|
+ return os.path.join(self.save_path, self.batch_no + "_" + self.batch_name, self.trans_param.read_type)
|
|
|
|
|
|
def get_save_tmp_path(self):
|
|
|
- return os.path.join(tempfile.gettempdir(), self.wind_full_name, self.batch_no, self.trans_param.read_type)
|
|
|
+ return os.path.join(tempfile.gettempdir(), self.wind_full_name, self.batch_no + "_" + self.batch_name,
|
|
|
+ self.trans_param.read_type)
|
|
|
|
|
|
def get_excel_tmp_path(self):
|
|
|
return os.path.join(self.get_save_tmp_path(), 'excel_tmp' + os.sep)
|
|
@@ -210,8 +213,9 @@ class WindFarms(object):
|
|
|
def set_statistics_data(self, df):
|
|
|
|
|
|
if not df.empty:
|
|
|
- min_date = pd.to_datetime(df['time_stamp']).min()
|
|
|
- max_date = pd.to_datetime(df['time_stamp']).max()
|
|
|
+ df['time_stamp'] = pd.to_datetime(df['time_stamp'])
|
|
|
+ min_date = df['time_stamp'].min()
|
|
|
+ max_date = df['time_stamp'].max()
|
|
|
with self.lock:
|
|
|
|
|
|
if 'min_date' in self.statistics_map.keys():
|
|
@@ -231,6 +235,9 @@ class WindFarms(object):
|
|
|
else:
|
|
|
self.statistics_map['total_count'] = df.shape[0]
|
|
|
|
|
|
+ if 'time_granularity' not in self.statistics_map.keys():
|
|
|
+ self.statistics_map['time_granularity'] = get_time_space(df, 'time_stamp')
|
|
|
+
|
|
|
def save_statistics_file(self):
|
|
|
save_path = os.path.join(os.path.dirname(self.get_save_path()),
|
|
|
self.trans_param.read_type + '_statistics.txt')
|
|
@@ -458,7 +465,7 @@ class WindFarms(object):
|
|
|
# if self.trans_param.wind_name_exec:
|
|
|
# wind_name = "测试"
|
|
|
# eval(self.trans_param.wind_name_exec)
|
|
|
-
|
|
|
+ update_trans_transfer_progress(self.batch_no, self.trans_param.read_type, 5, self.save_db)
|
|
|
trans_print("初始化字段结束,耗时:", str(datetime.datetime.now() - tmp_begin), ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|
|
|
|
|
@@ -469,6 +476,7 @@ class WindFarms(object):
|
|
|
trans_print("开始保存到临时路径")
|
|
|
# 开始读取数据并分类保存临时文件
|
|
|
self.remove_file_to_tmp_path()
|
|
|
+ update_trans_transfer_progress(self.batch_no, self.trans_param.read_type, 20, self.save_db)
|
|
|
trans_print("保存到临时路径结束,耗时:", str(datetime.datetime.now() - tmp_begin), ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|
|
|
|
|
@@ -479,6 +487,7 @@ class WindFarms(object):
|
|
|
|
|
|
# 开始读取数据并分类保存临时文件
|
|
|
self.read_file_and_save_tmp()
|
|
|
+ update_trans_transfer_progress(self.batch_no, self.trans_param.read_type, 50, self.save_db)
|
|
|
trans_print("保存到临时文件结束,耗时:", str(datetime.datetime.now() - tmp_begin), ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|
|
|
|
|
@@ -487,6 +496,7 @@ class WindFarms(object):
|
|
|
trans_print("开始保存到文件")
|
|
|
self.mutiprocessing_to_save_file()
|
|
|
self.save_statistics_file()
|
|
|
+ update_trans_transfer_progress(self.batch_no, self.trans_param.read_type, 70, self.save_db)
|
|
|
trans_print("保存到文件结束,耗时:", str(datetime.datetime.now() - tmp_begin), ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|
|
|
|
|
@@ -497,12 +507,12 @@ class WindFarms(object):
|
|
|
self.mutiprocessing_to_save_db()
|
|
|
trans_print("保存到数据库结束,耗时:", str(datetime.datetime.now() - tmp_begin), ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|
|
|
+ update_trans_transfer_progress(self.batch_no, self.trans_param.read_type, 100, self.save_db)
|
|
|
# 如果end==0 则说明只是进行了验证
|
|
|
if end != 0:
|
|
|
update_trans_status_success(self.batch_no, self.trans_param.read_type,
|
|
|
- len(read_excel_files(self.get_read_tmp_path())), self.save_db)
|
|
|
-
|
|
|
- self.delete_tmp_files()
|
|
|
+ len(read_excel_files(self.get_read_tmp_path())),
|
|
|
+ self.statistics_map['time_granularity'], self.save_db)
|
|
|
|
|
|
trans_print("结束执行", self.trans_param.read_type, ",总耗时:",
|
|
|
str(datetime.datetime.now() - begin))
|