Skip to content

Commit c9c5e51

Browse files
committed
remove: DDPJobDefinition and update tests
1 parent 18da98a commit c9c5e51

8 files changed

+67
-644
lines changed

src/codeflare_sdk/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@
1414
get_cluster,
1515
)
1616

17-
from .job import JobDefinition, Job, DDPJobDefinition, DDPJob, RayJobClient
17+
from .job import RayJobClient
1818

1919
from .utils import generate_cert

src/codeflare_sdk/job/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
from .jobs import JobDefinition, Job, DDPJobDefinition, DDPJob
2-
31
from .ray_jobs import RayJobClient

src/codeflare_sdk/job/jobs.py

-207
This file was deleted.

tests/e2e/mnist_raycluster_sdk_oauth_test.py

+24-18
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
from time import sleep
44

5-
from torchx.specs.api import AppState, is_terminal
6-
75
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
8-
from codeflare_sdk.job.jobs import DDPJobDefinition
6+
from codeflare_sdk.job import RayJobClient
97

108
import pytest
119

@@ -78,7 +76,7 @@ def assert_jobsubmit_withoutLogin(self, cluster):
7876
"entrypoint": "python mnist.py",
7977
"runtime_env": {
8078
"working_dir": "./tests/e2e/",
81-
"pip": "mnist_pip_requirements.txt",
79+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
8280
},
8381
}
8482
try:
@@ -97,19 +95,26 @@ def assert_jobsubmit_withoutLogin(self, cluster):
9795

9896
def assert_jobsubmit_withlogin(self, cluster):
9997
self.assert_appwrapper_exists()
100-
jobdef = DDPJobDefinition(
101-
name="mnist",
102-
script="./tests/e2e/mnist.py",
103-
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
98+
auth_token = run_oc_command(["whoami", "--show-token=true"])
99+
ray_dashboard = cluster.cluster_dashboard_uri()
100+
header = {"Authorization": f"Bearer {auth_token}"}
101+
client = RayJobClient(address=ray_dashboard, headers=header, verify=True)
102+
103+
# Submit the job
104+
submission_id = client.submit_job(
105+
entrypoint="python mnist.py",
106+
runtime_env={
107+
"working_dir": "./tests/e2e/",
108+
"pip": "mnist_pip_requirements.txt",
109+
},
104110
)
105-
job = jobdef.submit(cluster)
106-
111+
print(f"Submitted job with ID: {submission_id}")
107112
done = False
108113
time = 0
109114
timeout = 900
110115
while not done:
111-
status = job.status()
112-
if is_terminal(status.state):
116+
status = client.get_job_status(submission_id)
117+
if status.is_terminal():
113118
break
114119
if not done:
115120
print(status)
@@ -118,11 +123,12 @@ def assert_jobsubmit_withlogin(self, cluster):
118123
sleep(5)
119124
time += 5
120125

121-
print(job.status())
122-
self.assert_job_completion(status)
126+
logs = client.get_job_logs(submission_id)
127+
print(logs)
123128

124-
print(job.logs())
129+
self.assert_job_completion(status)
125130

131+
client.delete_job(submission_id)
126132
cluster.down()
127133

128134
def assert_appwrapper_exists(self):
@@ -143,9 +149,9 @@ def assert_appwrapper_exists(self):
143149
assert False
144150

145151
def assert_job_completion(self, status):
146-
if status.state == AppState.SUCCEEDED:
147-
print(f"Job has completed: '{status.state}'")
152+
if status == "SUCCEEDED":
153+
print(f"Job has completed: '{status}'")
148154
assert True
149155
else:
150-
print(f"Job has completed: '{status.state}'")
156+
print(f"Job has completed: '{status}'")
151157
assert False

tests/e2e/mnist_raycluster_sdk_test.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@
77

88
import ray
99

10-
from torchx.specs.api import AppState, is_terminal
11-
1210
from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration
13-
from codeflare_sdk.job.jobs import DDPJobDefinition
11+
from codeflare_sdk.job import RayJobClient
1412

1513
import pytest
1614

@@ -67,19 +65,26 @@ def run_mnist_raycluster_sdk(self):
6765

6866
cluster.details()
6967

70-
jobdef = DDPJobDefinition(
71-
name="mnist",
72-
script="./tests/e2e/mnist.py",
73-
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
68+
auth_token = run_oc_command(["whoami", "--show-token=true"])
69+
ray_dashboard = cluster.cluster_dashboard_uri()
70+
header = {"Authorization": f"Bearer {auth_token}"}
71+
client = RayJobClient(address=ray_dashboard, headers=header, verify=True)
72+
73+
# Submit the job
74+
submission_id = client.submit_job(
75+
entrypoint="python mnist.py",
76+
runtime_env={
77+
"working_dir": "./tests/e2e/",
78+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
79+
},
7480
)
75-
job = jobdef.submit(cluster)
76-
81+
print(f"Submitted job with ID: {submission_id}")
7782
done = False
7883
time = 0
7984
timeout = 900
8085
while not done:
81-
status = job.status()
82-
if is_terminal(status.state):
86+
status = client.get_job_status(submission_id)
87+
if status.is_terminal():
8388
break
8489
if not done:
8590
print(status)
@@ -88,11 +93,12 @@ def run_mnist_raycluster_sdk(self):
8893
sleep(5)
8994
time += 5
9095

91-
print(job.status())
92-
self.assert_job_completion(status)
96+
logs = client.get_job_logs(submission_id)
97+
print(logs)
9398

94-
print(job.logs())
99+
self.assert_job_completion(status)
95100

101+
client.delete_job(submission_id)
96102
cluster.down()
97103

98104
# Assertions
@@ -127,9 +133,9 @@ def assert_raycluster_exists(self):
127133
assert False
128134

129135
def assert_job_completion(self, status):
130-
if status.state == AppState.SUCCEEDED:
131-
print(f"Job has completed: '{status.state}'")
136+
if status == "SUCCEEDED":
137+
print(f"Job has completed: '{status}'")
132138
assert True
133139
else:
134-
print(f"Job has completed: '{status.state}'")
140+
print(f"Job has completed: '{status}'")
135141
assert False

0 commit comments

Comments
 (0)