Skip to content

Commit 1d4237c

Browse files
committed
use parse_quantity to convert string to raw resource
Signed-off-by: Kevin <[email protected]>
1 parent 4f5a930 commit 1d4237c

File tree

5 files changed

+54
-41
lines changed

5 files changed

+54
-41
lines changed

Diff for: src/codeflare_sdk/cluster/cluster.py

+14-20
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
cluster setup queue, a list of all existing clusters, and the user's working namespace.
1919
"""
2020

21+
import re
2122
from time import sleep
2223
from typing import List, Optional, Tuple, Dict
2324

@@ -41,6 +42,7 @@
4142
RayClusterStatus,
4243
)
4344
from kubernetes import client, config
45+
from kubernetes.utils import parse_quantity
4446
import yaml
4547
import os
4648
import requests
@@ -488,26 +490,18 @@ def from_k8_cluster_object(
488490
namespace=rc["metadata"]["namespace"],
489491
machine_types=machine_types,
490492
num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
491-
min_cpus=int(
492-
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
493-
"resources"
494-
]["requests"]["cpu"]
495-
),
496-
max_cpus=int(
497-
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
498-
"resources"
499-
]["limits"]["cpu"]
500-
),
501-
min_memory=int(
502-
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
503-
"resources"
504-
]["requests"]["memory"][:-1]
505-
),
506-
max_memory=int(
507-
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
508-
"resources"
509-
]["limits"]["memory"][:-1]
510-
),
493+
min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
494+
"containers"
495+
][0]["resources"]["requests"]["cpu"],
496+
max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
497+
"containers"
498+
][0]["resources"]["limits"]["cpu"],
499+
min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
500+
"containers"
501+
][0]["resources"]["requests"]["memory"],
502+
max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
503+
"containers"
504+
][0]["resources"]["limits"]["memory"],
511505
num_gpus=int(
512506
rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
513507
"resources"

Diff for: src/codeflare_sdk/cluster/config.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from dataclasses import dataclass, field
2222
import pathlib
23+
import typing
2324

2425
dir = pathlib.Path(__file__).parent.parent.resolve()
2526

@@ -34,15 +35,15 @@ class ClusterConfiguration:
3435
name: str
3536
namespace: str = None
3637
head_info: list = field(default_factory=list)
37-
head_cpus: int = 2
38-
head_memory: int = 8
38+
head_cpus: typing.Union[int, str] = 2
39+
head_memory: typing.Union[int, str] = 8
3940
head_gpus: int = 0
4041
machine_types: list = field(default_factory=list) # ["m4.xlarge", "g4dn.xlarge"]
41-
min_cpus: int = 1
42-
max_cpus: int = 1
42+
min_cpus: typing.Union[int, str] = 1
43+
max_cpus: typing.Union[int, str] = 1
4344
num_workers: int = 1
44-
min_memory: int = 2
45-
max_memory: int = 2
45+
min_memory: typing.Union[int, str] = 2
46+
max_memory: typing.Union[int, str] = 2
4647
num_gpus: int = 0
4748
template: str = f"{dir}/templates/base-template.yaml"
4849
instascale: bool = False
@@ -59,5 +60,23 @@ def __post_init__(self):
5960
print(
6061
"Warning: TLS verification has been disabled - Endpoint checks will be bypassed"
6162
)
63+
self._memory_to_string()
64+
self._str_mem_no_unit_add_GB()
65+
66+
def _str_mem_no_unit_add_GB(self):
67+
if isinstance(self.head_memory, str) and self.head_memory.isdecimal():
68+
self.head_memory = f"{self.head_memory}G"
69+
if isinstance(self.min_memory, str) and self.min_memory.isdecimal():
70+
self.min_memory = f"{self.min_memory}G"
71+
if isinstance(self.max_memory, str) and self.max_memory.isdecimal():
72+
self.max_memory = f"{self.max_memory}G"
73+
74+
def _memory_to_string(self):
75+
if isinstance(self.head_memory, int):
76+
self.head_memory = f"{self.head_memory}G"
77+
if isinstance(self.min_memory, int):
78+
self.min_memory = f"{self.min_memory}G"
79+
if isinstance(self.max_memory, int):
80+
self.max_memory = f"{self.max_memory}G"
6281

6382
local_queue: str = None

Diff for: src/codeflare_sdk/utils/generate_yaml.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ def update_custompodresources(
140140
# Leave head node resources as template default
141141
resource["requests"]["cpu"] = head_cpus
142142
resource["limits"]["cpu"] = head_cpus
143-
resource["requests"]["memory"] = str(head_memory) + "G"
144-
resource["limits"]["memory"] = str(head_memory) + "G"
143+
resource["requests"]["memory"] = head_memory
144+
resource["limits"]["memory"] = head_memory
145145
resource["requests"]["nvidia.com/gpu"] = head_gpus
146146
resource["limits"]["nvidia.com/gpu"] = head_gpus
147147

@@ -158,9 +158,9 @@ def update_custompodresources(
158158
resource[k][spec] = min_cpu
159159
if spec == "memory":
160160
if k == "limits":
161-
resource[k][spec] = str(max_memory) + "G"
161+
resource[k][spec] = max_memory
162162
else:
163-
resource[k][spec] = str(min_memory) + "G"
163+
resource[k][spec] = min_memory
164164
if spec == "nvidia.com/gpu":
165165
if i == 0:
166166
resource[k][spec] = 0
@@ -213,12 +213,12 @@ def update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu):
213213
requests = resource.get("resources").get("requests")
214214
if requests is not None:
215215
requests["cpu"] = min_cpu
216-
requests["memory"] = str(min_memory) + "G"
216+
requests["memory"] = min_memory
217217
requests["nvidia.com/gpu"] = gpu
218218
limits = resource.get("resources").get("limits")
219219
if limits is not None:
220220
limits["cpu"] = max_cpu
221-
limits["memory"] = str(max_memory) + "G"
221+
limits["memory"] = max_memory
222222
limits["nvidia.com/gpu"] = gpu
223223

224224

Diff for: src/codeflare_sdk/utils/pretty_print.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def print_clusters(clusters: List[RayCluster]):
136136
name = cluster.name
137137
dashboard = cluster.dashboard
138138
workers = str(cluster.workers)
139-
memory = str(cluster.worker_mem_min) + "~" + str(cluster.worker_mem_max)
139+
memory = f"{cluster.worker_mem_min}~{cluster.worker_mem_max}"
140140
cpu = str(cluster.worker_cpu)
141141
gpu = str(cluster.worker_gpu)
142142

Diff for: tests/unit_test.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def test_config_creation():
248248
assert config.name == "unit-test-cluster" and config.namespace == "ns"
249249
assert config.num_workers == 2
250250
assert config.min_cpus == 3 and config.max_cpus == 4
251-
assert config.min_memory == 5 and config.max_memory == 6
251+
assert config.min_memory == "5G" and config.max_memory == "6G"
252252
assert config.num_gpus == 7
253253
assert config.image == "quay.io/project-codeflare/ray:latest-py39-cu118"
254254
assert config.template == f"{parent}/src/codeflare_sdk/templates/base-template.yaml"
@@ -849,8 +849,8 @@ def test_ray_details(mocker, capsys):
849849
name="raytest1",
850850
status=RayClusterStatus.READY,
851851
workers=1,
852-
worker_mem_min=2,
853-
worker_mem_max=2,
852+
worker_mem_min="2G",
853+
worker_mem_max="2G",
854854
worker_cpu=1,
855855
worker_gpu=0,
856856
namespace="ns",
@@ -909,7 +909,7 @@ def test_ray_details(mocker, capsys):
909909
" │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n"
910910
" │ │ # Workers │ │ Memory CPU GPU │ │ \n"
911911
" │ │ │ │ │ │ \n"
912-
" │ │ 1 │ │ 2~2 1 0 │ │ \n"
912+
" │ │ 1 │ │ 2G~2G 1 0 │ │ \n"
913913
" │ │ │ │ │ │ \n"
914914
" │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n"
915915
" ╰───────────────────────────────────────────────────────────────╯ \n"
@@ -927,7 +927,7 @@ def test_ray_details(mocker, capsys):
927927
" │ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │ \n"
928928
" │ │ # Workers │ │ Memory CPU GPU │ │ \n"
929929
" │ │ │ │ │ │ \n"
930-
" │ │ 1 │ │ 2~2 1 0 │ │ \n"
930+
" │ │ 1 │ │ 2G~2G 1 0 │ │ \n"
931931
" │ │ │ │ │ │ \n"
932932
" │ ╰─────────────╯ ╰──────────────────────────────────────╯ │ \n"
933933
" ╰───────────────────────────────────────────────────────────────╯ \n"
@@ -943,7 +943,7 @@ def test_ray_details(mocker, capsys):
943943
"│ ╭── Workers ──╮ ╭───────── Worker specs(each) ─────────╮ │\n"
944944
"│ │ # Workers │ │ Memory CPU GPU │ │\n"
945945
"│ │ │ │ │ │\n"
946-
"│ │ 1 │ │ 2~2 1 0 │ │\n"
946+
"│ │ 1 │ │ 2G~2G 1 0 │ │\n"
947947
"│ │ │ │ │ │\n"
948948
"│ ╰─────────────╯ ╰──────────────────────────────────────╯ │\n"
949949
"╰───────────────────────────────────────────────────────────────╯\n"
@@ -2436,7 +2436,7 @@ def custom_side_effect(group, version, namespace, plural, **kwargs):
24362436
and "g4dn.xlarge" in cluster_config.machine_types
24372437
)
24382438
assert cluster_config.min_cpus == 1 and cluster_config.max_cpus == 1
2439-
assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2
2439+
assert cluster_config.min_memory == "2G" and cluster_config.max_memory == "2G"
24402440
assert cluster_config.num_gpus == 0
24412441
assert (
24422442
cluster_config.image
@@ -2468,7 +2468,7 @@ def test_get_cluster(mocker):
24682468
and "g4dn.xlarge" in cluster_config.machine_types
24692469
)
24702470
assert cluster_config.min_cpus == 1 and cluster_config.max_cpus == 1
2471-
assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2
2471+
assert cluster_config.min_memory == "2G" and cluster_config.max_memory == "2G"
24722472
assert cluster_config.num_gpus == 0
24732473
assert cluster_config.instascale
24742474
assert (

0 commit comments

Comments
 (0)