Przeglądaj źródła

删除每行有空值的行(2025-3-24)

魏志亮 2 miesięcy temu
rodzic
commit
af3d215f60

+ 1 - 2
.gitignore

@@ -3,11 +3,10 @@ logs
 *.iml
 .idea
 test
-tmp
 venv
 wheels
 build
 dist
 etl_tool.spec
-
+tmp_file
 /test

+ 16 - 6
etl/wind_power/min_sec/StatisticsAndSaveTmpFormalFile.py

@@ -77,7 +77,7 @@ class StatisticsAndSaveTmpFormalFile(object):
                            'param9', 'param10']
 
         # 删除 有功功率 和 风速均为空的情况
-        df.dropna(subset=['active_power', 'wind_velocity'], how='all', inplace=True)
+        df.dropna(subset=['active_power', 'wind_velocity'], how='any', inplace=True)
         trans_print(origin_wind_name, wind_col_name, "删除有功功率和风速均为空的情况后:", df.shape)
         df.replace(np.nan, -999999999, inplace=True)
         number_cols = df.select_dtypes(include=['number']).columns.tolist()
@@ -97,6 +97,11 @@ class StatisticsAndSaveTmpFormalFile(object):
         df.sort_values(by='time_stamp', inplace=True)
         df = df[[i for i in self.trans_param.cols_tran.keys() if i in df.columns]]
 
+        # 删除每行有空值的行(2025-3-24)
+        origin_count = df.shape[0]
+        df = df.dropna()
+        trans_print(f'原始数据量:{origin_count},去除na后数据量:{df.shape[0]}')
+
         # 如果秒级有可能合并到分钟级
         # TODO add 秒转分钟
         if self.trans_param.boolean_sec_to_min:
@@ -108,7 +113,6 @@ class StatisticsAndSaveTmpFormalFile(object):
         trans_print(origin_wind_name, wind_col_name, "功率大于0的数量:", power_df.shape)
         power = power_df.sample(int(power_df.shape[0] / 100))['active_power'].median()
 
-        del power_df
         trans_print(origin_wind_name, wind_col_name, '有功功率,中位数', power)
         if power > 100000:
             df['active_power'] = df['active_power'] / 1000
@@ -126,10 +130,16 @@ class StatisticsAndSaveTmpFormalFile(object):
         #         raise Exception("执行代码不支持导入包")
         #     exec(exec_code)
 
-        class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
-                                            rated_power=rated_power_and_cutout_speed_tuple[0],
-                                            cut_out_speed=rated_power_and_cutout_speed_tuple[1])
-        df = class_identifiler.run()
+        if power_df.shape[0] == 0:
+            df.loc[:, 'lab'] = -1
+        else:
+            class_identifiler = ClassIdentifier(wind_turbine_number=origin_wind_name, origin_df=df,
+                                                rated_power=rated_power_and_cutout_speed_tuple[0],
+                                                cut_out_speed=rated_power_and_cutout_speed_tuple[1])
+            df = class_identifiler.run()
+
+        del power_df
+
         df['year'] = df['time_stamp'].dt.year
         df['month'] = df['time_stamp'].dt.month
         df['day'] = df['time_stamp'].dt.day

+ 11 - 268
requirements.txt

@@ -1,268 +1,11 @@
-aiofiles==22.1.0
-aiohttp==3.9.5
-aiosignal==1.3.1
-alabaster==0.7.16
-aliyun-python-sdk-core==2.15.1
-aliyun-python-sdk-kms==2.16.3
-aniso8601==9.0.1
-anyio==4.6.0
-APScheduler==3.10.4
-argcomplete==1.10.3
-arrow==1.3.0
-asgiref==3.8.1
-astroid==3.2.2
-asttokens==2.4.1
-async-timeout==4.0.3
-asyncio==3.4.3
-atomicwrites==1.4.1
-attrs==23.2.0
-autopep8==2.0.4
-Babel==2.15.0
-backcall==0.2.0
-backports-datetime-fromisoformat==2.0.1
-backports.tarfile==1.2.0
-bcrypt==4.1.3
-beautifulsoup4==4.8.2
-binaryornot==0.4.4
-black==24.4.2
-bleach==6.1.0
-blinker==1.8.2
-cache==1.0.3
-cachelib==0.9.0
-certifi==2024.6.2
-cffi==1.16.0
-chardet==5.2.0
-charset-normalizer==3.3.2
-click==8.1.7
-cloudpickle==3.0.0
-colorama==0.4.6
-comm==0.2.2
-compressed_rtf==1.0.6
-contourpy==1.2.1
-cookiecutter==2.6.0
-crcmod==1.7
-cryptography==41.0.2
-cycler==0.12.1
-DBUtils==3.1.0
-debugpy==1.8.2
-decorator==5.1.1
-defusedxml==0.7.1
-diff-match-patch==20230430
-dill==0.3.8
-distro==1.9.0
-Django==4.1.13
-docopt==0.6.2
-docstring-to-markdown==0.15
-docutils==0.21.2
-docx2txt==0.8
-ebcdic==1.1.1
-et-xmlfile==1.1.0
-exceptiongroup==1.2.1
-executing==2.0.1
-extract-msg==0.28.7
-fastapi==0.115.0
-fastapi-offline==1.7.3
-fastjsonschema==2.20.0
-flake8==7.1.0
-Flask==3.0.3
-Flask-APScheduler==1.13.1
-Flask-Caching==2.3.0
-Flask-Cors==4.0.1
-Flask-Excel==0.0.7
-Flask-Executor==1.0.0
-Flask-HTTPAuth==4.8.0
-Flask-Login==0.6.3
-flask-restx==1.3.0
-Flask-Script==2.0.6
-flask-siwadoc==0.2.2
-Flask-SQLAlchemy==3.1.1
-Flask-WTF==1.2.1
-fonttools==4.53.0
-frozenlist==1.4.1
-fsspec==2024.12.0
-greenlet==3.0.3
-h11==0.14.0
-idna==3.7
-imagesize==1.4.1
-IMAPClient==2.1.0
-importlib_metadata==8.0.0
-importlib_resources==6.4.0
-inflection==0.5.1
-iniconfig==2.0.0
-intervaltree==3.1.0
-ipykernel==6.29.4
-ipython==8.12.3
-isort==5.13.2
-itsdangerous==2.2.0
-jaraco.classes==3.4.0
-jaraco.context==5.3.0
-jaraco.functools==4.0.1
-jedi==0.19.1
-jellyfish==1.0.4
-Jinja2==3.1.4
-jmespath==0.10.0
-joblib==1.4.2
-jsonschema==4.22.0
-jsonschema-specifications==2023.12.1
-jupyter_client==8.6.2
-jupyter_core==5.7.2
-jupyterlab_pygments==0.3.0
-keyring==25.2.1
-kiwisolver==1.4.5
-lml==0.1.0
-loguru==0.7.2
-lxml==5.2.2
-m3u8==5.1.0
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-matplotlib==3.9.0
-matplotlib-inline==0.1.7
-mccabe==0.7.0
-mdurl==0.1.2
-mistune==3.0.2
-more-itertools==10.3.0
-multidict==6.0.5
-mypy-extensions==1.0.0
-mysqlclient==2.2.4
-nbclient==0.10.0
-nbconvert==7.16.4
-nbformat==5.10.4
-nest-asyncio==1.6.0
-networkx==3.2.1
-numpy==2.0.0
-numpydoc==1.7.0
-olefile==0.47
-openpyxl==3.1.4
-oss2==2.18.6
-packaging==24.1
-pandas==2.2.2
-pandocfilters==1.5.1
-paramiko==3.4.0
-parso==0.8.4
-pathspec==0.12.1
-pdfminer==20191125
-pdfminer.six==20191110
-pdfminer3k==1.3.4
-peewee==3.17.5
-pexpect==4.9.0
-pickleshare==0.7.5
-pillow==10.3.0
-pipreqs==0.5.0
-platformdirs==4.2.2
-pluggy==1.5.0
-ply==3.11
-prompt_toolkit==3.0.47
-psutil==6.0.0
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pyarrow==18.1.0
-pycodestyle==2.11.1
-pycparser==2.22
-pycryptodome==3.20.0
-pydantic==1.9.0
-pydocstyle==6.3.0
-pyexcel==0.7.0
-pyexcel-io==0.6.6
-pyexcel-webio==0.1.4
-pyflakes==3.2.0
-Pygments==2.18.0
-PyJWT==2.8.0
-pylint==3.2.3
-pylint-venv==3.0.3
-pyls-spyder==0.4.0
-PyMySQL==1.1.0
-PyNaCl==1.5.0
-pyparsing==3.1.2
-PyPDF2==3.0.1
-pypdfium2==4.30.0
-pyperclip==1.9.0
-PyQt-SiliconUI==1.0.1
-PyQt5==5.15.10
-PyQt5-Qt5==5.15.2
-PyQt5-sip==12.13.0
-PyQtWebEngine==5.15.6
-PyQtWebEngine-Qt5==5.15.2
-pytest==8.3.2
-python-calamine==0.2.3
-python-dateutil==2.9.0.post0
-python-lsp-black==2.0.0
-python-lsp-jsonrpc==1.1.2
-python-lsp-server==1.11.0
-python-pptx==0.6.23
-python-slugify==8.0.4
-pytoolconfig==1.3.1
-pytz==2024.1
-pywin32==306
-pywin32-ctypes==0.2.2
-pyxxl==0.3.6
-PyYAML==6.0.1
-pyzmq==26.0.3
-QDarkStyle==3.2.3
-qstylizer==0.2.3
-QtAwesome==1.3.1
-qtconsole==5.5.2
-QtPy==2.4.1
-rarfile==4.2
-redis==5.0.7
-referencing==0.35.1
-requests==2.32.3
-rich==13.7.1
-rope==1.13.0
-rpds-py==0.18.1
-Rtree==1.2.0
-scikit-learn==1.5.1
-scipy==1.13.1
-six==1.12.0
-sniffio==1.3.1
-snowballstemmer==2.2.0
-sortedcontainers==2.4.0
-soupsieve==2.5
-SpeechRecognition==3.8.1
-Sphinx==7.3.7
-sphinxcontrib-applehelp==1.0.8
-sphinxcontrib-devhelp==1.0.6
-sphinxcontrib-htmlhelp==2.0.5
-sphinxcontrib-jsmath==1.0.1
-sphinxcontrib-qthelp==1.0.7
-sphinxcontrib-serializinghtml==1.1.10
-spyder==5.5.5
-spyder-kernels==2.5.2
-SQLAlchemy==2.0.30
-sqlparse==0.5.0
-stack-data==0.6.3
-starlette==0.38.6
-tabula-py==2.9.3
-tabulate==0.9.0
-text-unidecode==1.3
-textdistance==4.6.2
-textract==1.6.5
-texttable==1.7.0
-threadpoolctl==3.5.0
-three-merge==0.1.1
-tinycss2==1.3.0
-tomli==2.0.1
-tomlkit==0.12.5
-tornado==6.4.1
-traitlets==5.14.3
-types-python-dateutil==2.9.0.20240316
-typing_extensions==4.12.2
-tzdata==2024.1
-tzlocal==5.2
-ufile==3.2.9
-ujson==5.10.0
-urllib3==2.2.2
-uvicorn==0.32.1
-watchdog==4.0.1
-wcwidth==0.2.13
-web.py==0.40.dev1
-webencodings==0.5.1
-Werkzeug==3.0.3
-whatthepatch==1.0.5
-win32-setctime==1.1.0
-WTForms==3.1.2
-xlrd==2.0.1
-XlsxWriter==3.2.0
-yapf==0.40.2
-yarg==0.1.9
-yarl==1.9.4
-zipp==3.19.2
+pandas~=2.2.2
+numpy~=2.0.0
+PyMySQL~=1.1.0
+SQLAlchemy~=2.0.30
+rarfile~=4.2
+PyYAML~=6.0.1
+matplotlib~=3.9.0
+chardet~=3.0.4
+psutil~=6.0.0
+openpyxl ~= 3.1.4
+xlrd

+ 6 - 3
service/trans_conf_service.py

@@ -55,14 +55,16 @@ def update_trans_status_success(id, wind_count=0, time_granularity=0,
     if save_db:
         if min_date is not None:
             exec_sql = """
-            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
-            data_min_time= %s,data_max_time= %s,transfer_data_count=%s
+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress=100,err_info = '',
+            engine_count =%s,time_granularity=%s,transfer_finish_time=now(),
+            data_min_time= %s,data_max_time= %s,transfer_data_count=%s,is_high_priority=0
             where id = %s  
             """
             trans.execute(exec_sql, (wind_count, time_granularity, min_date, max_date, total_count, id))
         else:
             exec_sql = """
-            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',engine_count =%s,time_granularity=%s,transfer_finish_time=now()
+            update data_transfer set transfer_status = 1,trans_sys_status = 1,transfer_progress = 100,err_info = '',
+            engine_count =%s,time_granularity=%s,transfer_finish_time=now(),is_high_priority=0
             where id = %s 
             """
             trans.execute(exec_sql, (wind_count, time_granularity, id))
@@ -97,6 +99,7 @@ def get_batch_exec_data() -> dict:
          t.trans_sys_status in (-1,1,2) and t.transfer_status = -1
     AND t.read_dir != ''
     ORDER BY
+        t.is_high_priority desc,
         t.update_time
     LIMIT 1
     """

+ 0 - 0
wind_farm/中广核/__init__.py → wind_farm/CGN/__init__.py


+ 0 - 0
wind_farm/中广核/minute_data.py → wind_farm/CGN/minute_data.py


+ 0 - 0
wind_farm/中广核/purge_history_data.py → wind_farm/CGN/purge_history_data.py


+ 0 - 0
wind_farm/中广核/second_data.py → wind_farm/CGN/second_data.py