sysinfo.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. import os
  2. from typing import List
  3. import psutil
  4. from conf.constants import ParallelProcessing
  5. from utils.log.trans_log import info, debug
  6. def print_memory_usage(detail: str = "") -> None:
  7. """
  8. 打印内存使用情况
  9. Args:
  10. detail: 详细信息
  11. """
  12. # 获取当前进程ID
  13. pid = os.getpid()
  14. # 获取进程信息
  15. py = psutil.Process(pid)
  16. # 获取内存信息
  17. memory_info = py.memory_info()
  18. # RSS (Resident Set Size) 是进程实际占用的物理内存大小
  19. memory_usage_rss = memory_info.rss
  20. # VMS (Virtual Memory Size) 是进程使用的虚拟内存大小
  21. memory_usage_vms = memory_info.vms
  22. # 将字节转换为更易读的单位
  23. memory_usage_rss_mb = memory_usage_rss / (1024 ** 2)
  24. memory_usage_vms_mb = memory_usage_vms / (1024 ** 2)
  25. debug(f"{detail},Memory usage (RSS): {memory_usage_rss_mb:.2f} MB")
  26. debug(f"{detail},Memory usage (VMS): {memory_usage_vms_mb:.2f} MB")
  27. def get_cpu_count() -> int:
  28. """
  29. 获取CPU核心数
  30. Returns:
  31. CPU核心数
  32. """
  33. return psutil.cpu_count()
  34. def get_available_cpu_count_with_percent(percent: float = 1) -> int:
  35. """
  36. 根据百分比获取可用CPU数
  37. Args:
  38. percent: CPU使用百分比
  39. Returns:
  40. 可用CPU数
  41. """
  42. cpu_count = get_cpu_count()
  43. return int(cpu_count * percent)
  44. def get_file_size(file_path: str) -> int:
  45. """
  46. 获取文件大小
  47. Args:
  48. file_path: 文件路径
  49. Returns:
  50. 文件大小(字节)
  51. """
  52. return os.path.getsize(file_path)
  53. def get_dir_size(dir_path: str) -> int:
  54. """
  55. 获取目录大小
  56. Args:
  57. dir_path: 目录路径
  58. Returns:
  59. 目录大小(字节)
  60. """
  61. return sum(get_file_size(os.path.join(dir_path, file)) for file in os.listdir(dir_path) if
  62. os.path.isfile(os.path.join(dir_path, file)))
  63. def get_available_memory_with_percent(percent: float = 1) -> int:
  64. """
  65. 根据百分比获取可用内存
  66. Args:
  67. percent: 内存使用百分比
  68. Returns:
  69. 可用内存(字节)
  70. """
  71. memory_info = psutil.virtual_memory()
  72. return int(memory_info.available * percent)
  73. def get_max_file_size(file_paths: List[str]) -> int:
  74. """
  75. 获取文件列表中的最大文件大小
  76. Args:
  77. file_paths: 文件路径列表
  78. Returns:
  79. 最大文件大小(字节)
  80. """
  81. max_size = 0
  82. for file_path in file_paths:
  83. file_size = get_file_size(file_path)
  84. if file_size > max_size:
  85. max_size = file_size
  86. return max_size
  87. def use_files_get_max_cpu_count(file_paths: List[str], memory_percent: float = 1 / 12,
  88. cpu_percent: float = 2 / 5) -> int:
  89. """
  90. 根据文件大小和内存情况计算最大进程数
  91. Args:
  92. file_paths: 文件路径列表
  93. memory_percent: 内存使用百分比
  94. cpu_percent: CPU使用百分比
  95. Returns:
  96. 最大进程数
  97. """
  98. max_file_size = get_max_file_size(file_paths)
  99. free_memory = get_available_memory_with_percent(memory_percent)
  100. count = int(free_memory / max_file_size)
  101. max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
  102. # 限制最大进程数
  103. max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
  104. result = count if count <= max_cpu_count else max_cpu_count
  105. if result == 0:
  106. result = 1
  107. if result > len(file_paths):
  108. result = len(file_paths)
  109. info("总文件数:", len(file_paths), ",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
  110. "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
  111. "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
  112. ",最终确定使用进程数:", result)
  113. return result
  114. def max_file_size_get_max_cpu_count(max_file_size: int, memory_percent: float = 1 / 6,
  115. cpu_percent: float = 2 / 5) -> int:
  116. """
  117. 根据最大文件大小和内存情况计算最大进程数
  118. Args:
  119. max_file_size: 最大文件大小
  120. memory_percent: 内存使用百分比
  121. cpu_percent: CPU使用百分比
  122. Returns:
  123. 最大进程数
  124. """
  125. free_memory = get_available_memory_with_percent(memory_percent)
  126. count = int(free_memory / max_file_size)
  127. max_cpu_count = get_available_cpu_count_with_percent(cpu_percent)
  128. # 限制最大进程数
  129. max_cpu_count = min(max_cpu_count, ParallelProcessing.MAX_PROCESSES)
  130. result = count if count <= max_cpu_count else max_cpu_count
  131. if result == 0:
  132. result = 1
  133. info(",获取最大文件大小:", str(round(max_file_size / 2 ** 20, 2)) + "M",
  134. "可用内存:", str(get_available_memory_with_percent(1) / 2 ** 20) + "M",
  135. "总CPU数:", get_cpu_count(), "CPU使用比例:", round(cpu_percent, 2), "CPU可用数量:", max_cpu_count,
  136. ",最终确定使用进程数:", result)
  137. return result
  138. if __name__ == '__main__':
  139. from utils.file.trans_methods import read_files
  140. import datetime
  141. read_path = r"Z:\collection_data\1进行中\密马风电场-山西-大唐\收资数据\scada\秒级数据"
  142. begin = datetime.datetime.now()
  143. all_files = read_files(read_path)
  144. print(datetime.datetime.now() - begin)
  145. print(use_files_get_max_cpu_count(all_files))
  146. print(get_available_memory_with_percent(1) / 2 ** 20)
  147. print(get_available_memory_with_percent(2 / 3) / 2 ** 20)
  148. begin = datetime.datetime.now()
  149. print(len(all_files))
  150. print(get_max_file_size(all_files) / 2 ** 20)
  151. print(datetime.datetime.now() - begin)