Skip to content

Commit 596b5e1

Browse files
abhijeet-dhumalopenshift-merge-bot[bot]
authored andcommittedApr 22, 2024
Convert SDK's Go upgrade test to Python test in accordance with kueue (#494)
1 parent 4e28c9b commit 596b5e1

9 files changed

+173
-1522
lines changed
 

‎go.mod

-98
This file was deleted.

‎go.sum

-870
This file was deleted.

‎tests/__init__.py

Whitespace-only changes.

‎tests/e2e/support.go

-157
This file was deleted.

‎tests/e2e/support.py

+8
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ def create_namespace(self):
2525
self.api_instance.create_namespace(namespace_body)
2626

2727

28+
def create_namespace_with_name(self, namespace_name):
29+
self.namespace = namespace_name
30+
namespace_body = client.V1Namespace(
31+
metadata=client.V1ObjectMeta(name=self.namespace)
32+
)
33+
self.api_instance.create_namespace(namespace_body)
34+
35+
2836
def delete_namespace(self):
2937
if hasattr(self, "namespace"):
3038
self.api_instance.delete_namespace(self.namespace)

‎tests/unit_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
export_env,
6565
)
6666

67-
from unit_test_support import (
67+
from tests.unit_test_support import (
6868
createClusterWithConfig,
6969
createClusterConfig,
7070
)

‎tests/upgrade/__init__.py

Whitespace-only changes.

‎tests/upgrade/raycluster_sdk_upgrade_test.go

-396
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import requests
2+
from time import sleep
3+
4+
from torchx.specs.api import AppState, is_terminal
5+
6+
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
7+
from codeflare_sdk.job import RayJobClient
8+
9+
import pytest
10+
11+
from tests.e2e.support import *
12+
from codeflare_sdk.cluster.cluster import get_cluster
13+
14+
namespace = "test-ns-rayupgrade"
15+
16+
17+
# Creates a Ray cluster
18+
class TestMNISTRayClusterUp:
19+
def setup_method(self):
20+
initialize_kubernetes_client(self)
21+
create_namespace_with_name(self, namespace)
22+
cluster_queue = "cluster-queue" # add cluster name here
23+
create_local_queue(self, cluster_queue)
24+
25+
def test_mnist_ray_cluster_sdk_auth(self):
26+
self.run_mnist_raycluster_sdk_oauth()
27+
28+
def run_mnist_raycluster_sdk_oauth(self):
29+
ray_image = get_ray_image()
30+
31+
auth = TokenAuthentication(
32+
token=run_oc_command(["whoami", "--show-token=true"]),
33+
server=run_oc_command(["whoami", "--show-server=true"]),
34+
skip_tls=True,
35+
)
36+
auth.login()
37+
38+
cluster = Cluster(
39+
ClusterConfiguration(
40+
name="mnist",
41+
namespace=self.namespace,
42+
num_workers=1,
43+
head_cpus="1",
44+
head_memory=2,
45+
min_cpus="1",
46+
max_cpus=1,
47+
min_memory=1,
48+
max_memory=2,
49+
num_gpus=0,
50+
instascale=False,
51+
image=ray_image,
52+
write_to_file=True,
53+
mcad=False,
54+
)
55+
)
56+
57+
try:
58+
cluster.up()
59+
cluster.status()
60+
# wait for raycluster to be Ready
61+
cluster.wait_ready()
62+
cluster.status()
63+
# Check cluster details
64+
cluster.details()
65+
# Assert the cluster status is READY
66+
_, ready = cluster.status()
67+
assert ready, "Cluster is not ready!"
68+
69+
except Exception as e:
70+
print(f"An unexpected error occurred. Error: ", e)
71+
delete_namespace(self)
72+
73+
74+
class TestMnistJobSubmit:
75+
def setup_method(self):
76+
initialize_kubernetes_client(self)
77+
self.namespace = namespace
78+
self.cluster = get_cluster("mnist", self.namespace)
79+
if not self.cluster:
80+
raise RuntimeError("TestRayClusterUp needs to be run before this test")
81+
82+
def teardown_method(self):
83+
delete_namespace(self)
84+
85+
def test_mnist_job_submission(self):
86+
auth = TokenAuthentication(
87+
token=run_oc_command(["whoami", "--show-token=true"]),
88+
server=run_oc_command(["whoami", "--show-server=true"]),
89+
skip_tls=True,
90+
)
91+
auth.login()
92+
93+
self.assert_jobsubmit_withoutLogin(self.cluster)
94+
self.assert_jobsubmit_withlogin(self.cluster)
95+
self.cluster.down()
96+
97+
# Assertions
98+
def assert_jobsubmit_withoutLogin(self, cluster):
99+
dashboard_url = cluster.cluster_dashboard_uri()
100+
jobdata = {
101+
"entrypoint": "python mnist.py",
102+
"runtime_env": {
103+
"working_dir": "./tests/e2e/",
104+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
105+
},
106+
}
107+
try:
108+
response = requests.post(
109+
dashboard_url + "/api/jobs/", verify=False, json=jobdata
110+
)
111+
if response.status_code == 403:
112+
assert True
113+
else:
114+
response.raise_for_status()
115+
assert False
116+
117+
except Exception as e:
118+
print(f"An unexpected error occurred. Error: {e}")
119+
assert False
120+
121+
def assert_jobsubmit_withlogin(self, cluster):
122+
auth_token = run_oc_command(["whoami", "--show-token=true"])
123+
ray_dashboard = cluster.cluster_dashboard_uri()
124+
header = {"Authorization": f"Bearer {auth_token}"}
125+
client = RayJobClient(address=ray_dashboard, headers=header, verify=False)
126+
127+
# Submit the job
128+
submission_id = client.submit_job(
129+
entrypoint="python mnist.py",
130+
runtime_env={
131+
"working_dir": "./tests/e2e/",
132+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
133+
},
134+
)
135+
print(f"Submitted job with ID: {submission_id}")
136+
done = False
137+
time = 0
138+
timeout = 900
139+
while not done:
140+
status = client.get_job_status(submission_id)
141+
if status.is_terminal():
142+
break
143+
if not done:
144+
print(status)
145+
if timeout and time >= timeout:
146+
raise TimeoutError(f"job has timed out after waiting {timeout}s")
147+
sleep(5)
148+
time += 5
149+
150+
logs = client.get_job_logs(submission_id)
151+
print(logs)
152+
153+
self.assert_job_completion(status)
154+
155+
client.delete_job(submission_id)
156+
cluster.down()
157+
158+
def assert_job_completion(self, status):
159+
if status == "SUCCEEDED":
160+
print(f"Job has completed: '{status}'")
161+
assert True
162+
else:
163+
print(f"Job has completed: '{status}'")
164+
assert False

0 commit comments

Comments
 (0)
Please sign in to comment.