Skip to content

Commit 6ca9440

Browse files
Adjusted support method to use custom_api client to create kueue resources
1 parent 134e072 commit 6ca9440

File tree

3 files changed

+113
-65
lines changed

3 files changed

+113
-65
lines changed

Diff for: tests/e2e/kueue_resources_setup.sh

-53
This file was deleted.

Diff for: tests/e2e/support.py

+105-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import subprocess
55
from kubernetes import client, config
66
import kubernetes.client
7+
from codeflare_sdk.utils.kube_api_helpers import _kube_api_error_handling
78

89

910
def get_ray_image():
@@ -58,7 +59,107 @@ def run_oc_command(args):
5859
return None
5960

6061

61-
def create_kueue_resources(self):
62-
# Set executable permissions
63-
os.chmod("tests/e2e/kueue_resources_setup.sh", 0o755)
64-
subprocess.call(["bash", "tests/e2e/kueue_resources_setup.sh", self.namespace])
62+
def create_kueue_resources(
63+
self,
64+
cluster_queue="cluster-queue-mnist",
65+
flavor="default-flavor-mnist",
66+
local_queue="local-queue-mnist",
67+
):
68+
print("creating Kueue resources ...")
69+
resource_flavor_json = {
70+
"apiVersion": "kueue.x-k8s.io/v1beta1",
71+
"kind": "ResourceFlavor",
72+
"metadata": {"name": flavor},
73+
}
74+
cluster_queue_json = {
75+
"apiVersion": "kueue.x-k8s.io/v1beta1",
76+
"kind": "ClusterQueue",
77+
"metadata": {"name": cluster_queue},
78+
"spec": {
79+
"namespaceSelector": {},
80+
"resourceGroups": [
81+
{
82+
"coveredResources": ["cpu", "memory", "nvidia.com/gpu"],
83+
"flavors": [
84+
{
85+
"name": flavor,
86+
"resources": [
87+
{"name": "cpu", "nominalQuota": 9},
88+
{"name": "memory", "nominalQuota": "36Gi"},
89+
{"name": "nvidia.com/gpu", "nominalQuota": 0},
90+
],
91+
}
92+
],
93+
}
94+
],
95+
},
96+
}
97+
local_queue_json = {
98+
"apiVersion": "kueue.x-k8s.io/v1beta1",
99+
"kind": "LocalQueue",
100+
"metadata": {
101+
"namespace": self.namespace,
102+
"name": local_queue,
103+
"annotations": {"kueue.x-k8s.io/default-queue": "true"},
104+
},
105+
"spec": {"clusterQueue": cluster_queue},
106+
}
107+
108+
try:
109+
# Check if resource flavor exists
110+
self.custom_api.get_cluster_custom_object(
111+
group="kueue.x-k8s.io",
112+
plural="resourceflavors",
113+
version="v1beta1",
114+
name=flavor,
115+
)
116+
print(f"'{flavor}' already exists")
117+
except:
118+
# create kueue resource flavor
119+
self.custom_api.create_cluster_custom_object(
120+
group="kueue.x-k8s.io",
121+
plural="resourceflavors",
122+
version="v1beta1",
123+
body=resource_flavor_json,
124+
)
125+
print(f"'{flavor}' created!")
126+
127+
try:
128+
# Check if cluster-queue exists
129+
self.custom_api.get_cluster_custom_object(
130+
group="kueue.x-k8s.io",
131+
plural="clusterqueues",
132+
version="v1beta1",
133+
name=cluster_queue,
134+
)
135+
print(f"'{cluster_queue}' already exists")
136+
except:
137+
# create cluster-queue
138+
self.custom_api.create_cluster_custom_object(
139+
group="kueue.x-k8s.io",
140+
plural="clusterqueues",
141+
version="v1beta1",
142+
body=cluster_queue_json,
143+
)
144+
print(f"'{cluster_queue}' created")
145+
146+
try:
147+
# Check if local-queue exists in given namespace
148+
self.custom_api.get_namespaced_custom_object(
149+
group="kueue.x-k8s.io",
150+
namespace=self.namespace,
151+
plural="localqueues",
152+
version="v1beta1",
153+
name=local_queue,
154+
)
155+
print(f"'{local_queue}' already exists in namespace '{self.namespace}'")
156+
except:
157+
# create local-queue
158+
self.custom_api.create_namespaced_custom_object(
159+
group="kueue.x-k8s.io",
160+
namespace=self.namespace,
161+
plural="localqueues",
162+
version="v1beta1",
163+
body=local_queue_json,
164+
)
165+
print(f"'{local_queue}' created in namespace '{self.namespace}'")

Diff for: tests/upgrade/raycluster_sdk_upgrade_test.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
import requests
22
from time import sleep
33

4-
from torchx.specs.api import AppState, is_terminal
5-
64
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
75
from codeflare_sdk.job import RayJobClient
86

9-
import pytest
10-
117
from tests.e2e.support import *
128
from codeflare_sdk.cluster.cluster import get_cluster
139

10+
from codeflare_sdk.utils.kube_api_helpers import _kube_api_error_handling
11+
1412
namespace = "test-ns-rayupgrade"
1513

1614

@@ -19,8 +17,11 @@ class TestMNISTRayClusterUp:
1917
def setup_method(self):
2018
initialize_kubernetes_client(self)
2119
create_namespace_with_name(self, namespace)
22-
cluster_queue = "cluster-queue" # add cluster-queue name here
23-
create_local_queue(self, cluster_queue)
20+
try:
21+
create_kueue_resources(self)
22+
except Exception as e:
23+
delete_namespace(self)
24+
return _kube_api_error_handling(e)
2425

2526
def test_mnist_ray_cluster_sdk_auth(self):
2627
self.run_mnist_raycluster_sdk_oauth()
@@ -57,8 +58,7 @@ def run_mnist_raycluster_sdk_oauth(self):
5758
cluster.up()
5859
cluster.status()
5960
# wait for raycluster to be Ready
60-
# cluster.wait_ready() #temporarily broken
61-
sleep(60)
61+
cluster.wait_ready()
6262
cluster.status()
6363
# Check cluster details
6464
cluster.details()

0 commit comments

Comments
 (0)