memory_benchmark.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import argparse
  2. import time
  3. from typing import Callable
  4. import py3nvml.py3nvml as nvml
  5. from memory_profiler import memory_usage
  6. from utils import MyThread, get_logger, inference
  7. logger = get_logger("faster-whisper")
  8. parser = argparse.ArgumentParser(description="Memory benchmark")
  9. parser.add_argument(
  10. "--gpu_memory", action="store_true", help="Measure GPU memory usage"
  11. )
  12. parser.add_argument("--device-index", type=int, default=0, help="GPU device index")
  13. parser.add_argument(
  14. "--interval",
  15. type=float,
  16. default=0.5,
  17. help="Interval at which measurements are collected",
  18. )
  19. args = parser.parse_args()
  20. device_idx = args.device_index
  21. interval = args.interval
  22. def measure_memory(func: Callable[[], None]):
  23. if args.gpu_memory:
  24. logger.info(
  25. "Measuring maximum GPU memory usage on GPU device."
  26. " Make sure to not have additional processes running on the same GPU."
  27. )
  28. # init nvml
  29. nvml.nvmlInit()
  30. handle = nvml.nvmlDeviceGetHandleByIndex(device_idx)
  31. gpu_name = nvml.nvmlDeviceGetName(handle)
  32. gpu_memory_limit = nvml.nvmlDeviceGetMemoryInfo(handle).total >> 20
  33. gpu_power_limit = nvml.nvmlDeviceGetPowerManagementLimit(handle) / 1000.0
  34. info = {"gpu_memory_usage": [], "gpu_power_usage": []}
  35. def _get_gpu_info():
  36. while True:
  37. info["gpu_memory_usage"].append(
  38. nvml.nvmlDeviceGetMemoryInfo(handle).used >> 20
  39. )
  40. info["gpu_power_usage"].append(
  41. nvml.nvmlDeviceGetPowerUsage(handle) / 1000
  42. )
  43. time.sleep(interval)
  44. if stop:
  45. break
  46. return info
  47. stop = False
  48. thread = MyThread(_get_gpu_info, params=())
  49. thread.start()
  50. func()
  51. stop = True
  52. thread.join()
  53. result = thread.get_result()
  54. # shutdown nvml
  55. nvml.nvmlShutdown()
  56. max_memory_usage = max(result["gpu_memory_usage"])
  57. max_power_usage = max(result["gpu_power_usage"])
  58. print("GPU name: %s" % gpu_name)
  59. print("GPU device index: %s" % device_idx)
  60. print(
  61. "Maximum GPU memory usage: %dMiB / %dMiB (%.2f%%)"
  62. % (
  63. max_memory_usage,
  64. gpu_memory_limit,
  65. (max_memory_usage / gpu_memory_limit) * 100,
  66. )
  67. )
  68. print(
  69. "Maximum GPU power usage: %dW / %dW (%.2f%%)"
  70. % (
  71. max_power_usage,
  72. gpu_power_limit,
  73. (max_power_usage / gpu_power_limit) * 100,
  74. )
  75. )
  76. else:
  77. logger.info("Measuring maximum increase of memory usage.")
  78. max_usage = memory_usage(func, max_usage=True, interval=interval)
  79. print("Maximum increase of RAM memory usage: %d MiB" % max_usage)
  80. if __name__ == "__main__":
  81. measure_memory(inference)