Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 35b4533

Browse files
committedApr 3, 2024·
Convert Go raycluster_sdk_upgrade_test to Python test
1 parent 1497434 commit 35b4533

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed
 
+172
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import requests
2+
from time import sleep
3+
4+
from torchx.specs.api import AppState, is_terminal
5+
6+
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
7+
from codeflare_sdk.job.jobs import DDPJobDefinition
8+
9+
import pytest
10+
11+
from support import *
12+
from codeflare_sdk.cluster.cluster import get_cluster
13+
14+
namespace = "test-ns-rayupgrade"
15+
16+
17+
# Creates a Ray cluster
18+
@pytest.mark.openshift
19+
class TestMNISTRayClusterUp:
20+
def setup_method(self):
21+
initialize_kubernetes_client(self)
22+
23+
def test_mnist_ray_cluster_sdk_auth(self):
24+
create_namespace_with_name(self, namespace)
25+
self.run_mnist_raycluster_sdk_oauth()
26+
27+
def run_mnist_raycluster_sdk_oauth(self):
28+
ray_image = get_ray_image()
29+
30+
auth = TokenAuthentication(
31+
token=run_oc_command(["whoami", "--show-token=true"]),
32+
server=run_oc_command(["whoami", "--show-server=true"]),
33+
skip_tls=True,
34+
)
35+
auth.login()
36+
37+
cluster = Cluster(
38+
ClusterConfiguration(
39+
name="mnist",
40+
namespace=self.namespace,
41+
num_workers=1,
42+
head_cpus="1",
43+
head_memory=2,
44+
min_cpus="1",
45+
max_cpus=1,
46+
min_memory=1,
47+
max_memory=2,
48+
num_gpus=0,
49+
instascale=False,
50+
image=ray_image,
51+
openshift_oauth=True,
52+
write_to_file=True,
53+
)
54+
)
55+
56+
try:
57+
cluster.up()
58+
# check if Appwrapper has been created in namespace
59+
self.assert_appwrapper_exists()
60+
cluster.status()
61+
# wait for raycluster to be Ready
62+
cluster.wait_ready()
63+
cluster.status()
64+
# Check cluster details
65+
cluster.details()
66+
# Assert the cluster status is READY
67+
_, ready = cluster.status()
68+
assert ready, "Cluster is not ready!"
69+
70+
except Exception as e:
71+
print(f"An unexpected error occurred. Error: ", e)
72+
delete_namespace(self)
73+
74+
def assert_appwrapper_exists(self):
75+
try:
76+
self.custom_api.get_namespaced_custom_object(
77+
"workload.codeflare.dev",
78+
"v1beta1",
79+
self.namespace,
80+
"appwrappers",
81+
"mnist",
82+
)
83+
print(
84+
f"AppWrapper 'mnist' has been created in the namespace: '{self.namespace}'"
85+
)
86+
assert True
87+
except Exception as e:
88+
print(f"AppWrapper 'mnist' has not been created. Error: {e}")
89+
assert False
90+
91+
92+
class TestMnistJobSubmit:
93+
def setup_method(self):
94+
initialize_kubernetes_client(self)
95+
self.namespace = namespace
96+
self.cluster = get_cluster("mnist", self.namespace)
97+
if not self.cluster:
98+
raise RuntimeError("TestRayClusterUp needs to be run before this test")
99+
100+
def teardown_method(self):
101+
delete_namespace(self)
102+
103+
def test_mnist_job_submission(self):
104+
auth = TokenAuthentication(
105+
token=run_oc_command(["whoami", "--show-token=true"]),
106+
server=run_oc_command(["whoami", "--show-server=true"]),
107+
skip_tls=True,
108+
)
109+
auth.login()
110+
111+
self.assert_jobsubmit_withoutLogin(self.cluster)
112+
self.assert_jobsubmit_withlogin(self.cluster)
113+
self.cluster.down()
114+
115+
# Assertions
116+
def assert_jobsubmit_withoutLogin(self, cluster):
117+
dashboard_url = cluster.cluster_dashboard_uri()
118+
jobdata = {
119+
"entrypoint": "python mnist.py",
120+
"runtime_env": {
121+
"working_dir": "./tests/e2e/",
122+
"pip": "mnist_pip_requirements.txt",
123+
},
124+
}
125+
try:
126+
response = requests.post(
127+
dashboard_url + "/api/jobs/", verify=False, json=jobdata
128+
)
129+
if response.status_code == 403:
130+
assert True
131+
else:
132+
response.raise_for_status()
133+
assert False
134+
135+
except Exception as e:
136+
print(f"An unexpected error occurred. Error: {e}")
137+
assert False
138+
139+
def assert_jobsubmit_withlogin(self, cluster):
140+
jobdef = DDPJobDefinition(
141+
name="mnist",
142+
script="./tests/e2e/mnist.py",
143+
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
144+
)
145+
job = jobdef.submit(cluster)
146+
147+
done = False
148+
time = 0
149+
timeout = 900
150+
while not done:
151+
status = job.status()
152+
if is_terminal(status.state):
153+
break
154+
if not done:
155+
print(status)
156+
if timeout and time >= timeout:
157+
raise TimeoutError(f"job has timed out after waiting {timeout}s")
158+
sleep(5)
159+
time += 5
160+
161+
print(job.status())
162+
self.assert_job_completion(status)
163+
164+
print(job.logs())
165+
166+
def assert_job_completion(self, status):
167+
if status.state == AppState.SUCCEEDED:
168+
print(f"Job has completed: '{status.state}'")
169+
assert True
170+
else:
171+
print(f"Job has completed: '{status.state}'")
172+
assert False

‎tests/e2e/support.py

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ def create_namespace(self):
2424
self.api_instance.create_namespace(namespace_body)
2525

2626

27+
def create_namespace_with_name(self, namespace_name):
28+
self.namespace = namespace_name
29+
namespace_body = client.V1Namespace(
30+
metadata=client.V1ObjectMeta(name=self.namespace)
31+
)
32+
self.api_instance.create_namespace(namespace_body)
33+
34+
2735
def delete_namespace(self):
2836
if hasattr(self, "namespace"):
2937
self.api_instance.delete_namespace(self.namespace)

0 commit comments

Comments
 (0)
Please sign in to comment.