resources.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import re
  2. from typing import Any, TYPE_CHECKING
  3. if TYPE_CHECKING:
  4. from render import Render
  5. try:
  6. from .error import RenderError
  7. except ImportError:
  8. from error import RenderError
  9. DEFAULT_CPUS = 2.0
  10. DEFAULT_MEMORY = 4096
  11. class Resources:
  12. def __init__(self, render_instance: "Render"):
  13. self._render_instance = render_instance
  14. self._limits: dict = {}
  15. self._reservations: dict = {}
  16. self._nvidia_ids: set[str] = set()
  17. self._auto_add_cpu_from_values()
  18. self._auto_add_memory_from_values()
  19. self._auto_add_gpus_from_values()
  20. def _set_cpu(self, cpus: Any):
  21. c = str(cpus)
  22. if not re.match(r"^[1-9][0-9]*(\.[0-9]+)?$", c):
  23. raise RenderError(f"Expected cpus to be a number or a float (minimum 1.0), got [{cpus}]")
  24. self._limits.update({"cpus": c})
  25. def _set_memory(self, memory: Any):
  26. m = str(memory)
  27. if not re.match(r"^[1-9][0-9]*$", m):
  28. raise RenderError(f"Expected memory to be a number, got [{memory}]")
  29. self._limits.update({"memory": f"{m}M"})
  30. def _auto_add_cpu_from_values(self):
  31. resources = self._render_instance.values.get("resources", {})
  32. self._set_cpu(resources.get("limits", {}).get("cpus", DEFAULT_CPUS))
  33. def _auto_add_memory_from_values(self):
  34. resources = self._render_instance.values.get("resources", {})
  35. self._set_memory(resources.get("limits", {}).get("memory", DEFAULT_MEMORY))
  36. def _auto_add_gpus_from_values(self):
  37. resources = self._render_instance.values.get("resources", {})
  38. gpus = resources.get("gpus", {}).get("nvidia_gpu_selection", {})
  39. if not gpus:
  40. return
  41. for pci, gpu in gpus.items():
  42. if gpu.get("use_gpu", False):
  43. if not gpu.get("uuid"):
  44. raise RenderError(f"Expected [uuid] to be set for GPU in slot [{pci}] in [nvidia_gpu_selection]")
  45. self._nvidia_ids.add(gpu["uuid"])
  46. if self._nvidia_ids:
  47. if not self._reservations:
  48. self._reservations["devices"] = []
  49. self._reservations["devices"].append(
  50. {
  51. "capabilities": ["gpu"],
  52. "driver": "nvidia",
  53. "device_ids": sorted(self._nvidia_ids),
  54. }
  55. )
  56. # This is only used on ix-app that we allow
  57. # disabling cpus and memory. GPUs are only added
  58. # if the user has requested them.
  59. def remove_cpus_and_memory(self):
  60. self._limits.pop("cpus", None)
  61. self._limits.pop("memory", None)
  62. # Mainly will be used from dependencies
  63. # There is no reason to pass devices to
  64. # redis or postgres for example
  65. def remove_devices(self):
  66. self._reservations.pop("devices", None)
  67. def set_profile(self, profile: str):
  68. cpu, memory = profile_mapping(profile)
  69. self._set_cpu(cpu)
  70. self._set_memory(memory)
  71. def has_resources(self):
  72. return len(self._limits) > 0 or len(self._reservations) > 0
  73. def has_gpus(self):
  74. gpu_devices = [d for d in self._reservations.get("devices", []) if "gpu" in d["capabilities"]]
  75. return len(gpu_devices) > 0
  76. def render(self):
  77. result = {}
  78. if self._limits:
  79. result["limits"] = self._limits
  80. if self._reservations:
  81. result["reservations"] = self._reservations
  82. return result
  83. def profile_mapping(profile: str):
  84. profiles = {
  85. "low": (1, 512),
  86. "medium": (2, 1024),
  87. }
  88. if profile not in profiles:
  89. raise RenderError(
  90. f"Resource profile [{profile}] is not valid. Valid options are: [{', '.join(profiles.keys())}]"
  91. )
  92. return profiles[profile]