delete_repeat_data.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. from service import plt_service
  2. wind_farms = {
  3. # "WOF35900004": "平陆风电场",
  4. # "WOF35100072": "阳曲风电场",
  5. "WOF35100073": "古交风电场",
  6. "WOF35200074": "马家梁风电场",
  7. "WOF35400075": "太谷风电场",
  8. "WOF35900076": "坡底风电场",
  9. "WOF35300077": "西潘风电场",
  10. "WOF35900078": "芮城风电场",
  11. "WOF34900079": "右玉风光互补电站",
  12. "WOF35800080": "贺家沟",
  13. "WOF35800081": "石楼风电场",
  14. "WOF35800082": "盂县风光互补电站"
  15. }
  16. for k, v in wind_farms.items():
  17. wind_infos, _ = plt_service.get_wind_info(k, False)
  18. for type in ['second', 'minute']:
  19. for name, wind_code in wind_infos.items():
  20. print('-- ', v, type, wind_code)
  21. sql = f"""
  22. SET tidb_mem_quota_query = 8589934592;
  23. DELETE t FROM {k}_{type} t
  24. JOIN(
  25. SELECT wind_turbine_number,time_stamp, ROW_NUMBER() OVER (PARTITION BY wind_turbine_number,time_stamp ORDER BY wind_turbine_number,time_stamp) AS rn
  26. FROM {k}_{type} a
  27. WHERE
  28. a.wind_turbine_number = '{wind_code}'
  29. and a.time_stamp in (
  30. SELECT time_stamp from {k}_{type} PARTITION(p202504) b where b.time_stamp BETWEEN '2025-04-01 00:00:00' and '2025-04-30 23:59:59'
  31. and b.wind_turbine_number = '{wind_code}'
  32. GROUP BY b.time_stamp HAVING count(1) > 1
  33. )) t2 on t.time_stamp = t2.time_stamp and t.wind_turbine_number = t2.wind_turbine_number where t2.rn > 1;
  34. """
  35. print(sql)
  36. # trans.execute(sql)