Skip to content

Commit 0d6b04f

Browse files
committed
Added Cluster Ray Job Client demo
1 parent d04b6ac commit 0d6b04f

File tree

5 files changed

+457
-501
lines changed

5 files changed

+457
-501
lines changed

Diff for: demo-notebooks/guided-demos/2_job_client.ipynb renamed to demo-notebooks/additional-demos/ray_job_client.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"In this third demo we will go over the basics of the Ray Job Submission Client in the SDK"
7+
"In this demo we will go over the basics of the RayJobClient in the SDK"
88
]
99
},
1010
{

Diff for: demo-notebooks/guided-demos/preview_nbs/2_job_client.ipynb renamed to demo-notebooks/guided-demos/2_cluster_job_client.ipynb

+10-70
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"outputs": [],
1515
"source": [
1616
"# Import pieces from codeflare-sdk\n",
17-
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, RayJobClient"
17+
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication"
1818
]
1919
},
2020
{
@@ -27,9 +27,8 @@
2727
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
2828
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
2929
"\n",
30-
"auth_token = \"XXXXX\" # The auth_token is used later for the RayJobClient\n",
3130
"auth = TokenAuthentication(\n",
32-
" token = auth_token,\n",
31+
" token = \"XXXXX\",\n",
3332
" server = \"XXXXX\",\n",
3433
" skip_tls=False\n",
3534
")\n",
@@ -80,14 +79,14 @@
8079
"cell_type": "markdown",
8180
"metadata": {},
8281
"source": [
83-
"### Ray Job Submission - Authorized Ray Cluster"
82+
"### Ray Job Submission"
8483
]
8584
},
8685
{
8786
"cell_type": "markdown",
8887
"metadata": {},
8988
"source": [
90-
"* Submit a job using an authorized Ray dashboard and the Job Submission Client\n",
89+
"* Initialise the Cluster Job Client \n",
9190
"* Provide an entrypoint command directed to your job script\n",
9291
"* Set up your runtime environment"
9392
]
@@ -98,16 +97,11 @@
9897
"metadata": {},
9998
"outputs": [],
10099
"source": [
101-
"# Gather the dashboard URL\n",
102-
"ray_dashboard = cluster.cluster_dashboard_uri()\n",
103-
"\n",
104-
"# Create the header for passing your bearer token\n",
105-
"header = {\n",
106-
" 'Authorization': f'Bearer {auth_token}'\n",
107-
"}\n",
108-
"\n",
109-
"# Initialize the RayJobClient\n",
110-
"client = RayJobClient(address=ray_dashboard, headers=header, verify=True)"
100+
"# Initialize the Job Submission Client\n",
101+
"\"\"\"\n",
102+
"The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client\n",
103+
"\"\"\"\n",
104+
"client = cluster.job_client"
111105
]
112106
},
113107
{
@@ -116,7 +110,7 @@
116110
"metadata": {},
117111
"outputs": [],
118112
"source": [
119-
"# Submit an example mnist job using the RayJobClient\n",
113+
"# Submit an example mnist job using the Job Submission Client\n",
120114
"submission_id = client.submit_job(\n",
121115
" entrypoint=\"python mnist.py\",\n",
122116
" runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n",
@@ -186,60 +180,6 @@
186180
"client.delete_job(submission_id)"
187181
]
188182
},
189-
{
190-
"cell_type": "markdown",
191-
"metadata": {},
192-
"source": [
193-
"### Unauthorized Ray Cluster with the Ray Job Client"
194-
]
195-
},
196-
{
197-
"cell_type": "code",
198-
"execution_count": null,
199-
"metadata": {},
200-
"outputs": [],
201-
"source": [
202-
"\"\"\"\n",
203-
"Initialise the RayJobClient with the Ray Dashboard\n",
204-
"\"\"\"\n",
205-
"ray_dashboard = cluster.cluster_dashboard_uri()\n",
206-
"client = RayJobClient(address=ray_dashboard, verify=False)"
207-
]
208-
},
209-
{
210-
"cell_type": "code",
211-
"execution_count": null,
212-
"metadata": {},
213-
"outputs": [],
214-
"source": [
215-
"# Submit an example mnist job using the RayJobClient\n",
216-
"submission_id = client.submit_job(\n",
217-
" entrypoint=\"python mnist.py\",\n",
218-
" runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n",
219-
")\n",
220-
"print(submission_id)"
221-
]
222-
},
223-
{
224-
"cell_type": "code",
225-
"execution_count": null,
226-
"metadata": {},
227-
"outputs": [],
228-
"source": [
229-
"# Stop the job \n",
230-
"client.stop_job(submission_id)"
231-
]
232-
},
233-
{
234-
"cell_type": "code",
235-
"execution_count": null,
236-
"metadata": {},
237-
"outputs": [],
238-
"source": [
239-
"# Delete the job\n",
240-
"client.delete_job(submission_id)"
241-
]
242-
},
243183
{
244184
"cell_type": "code",
245185
"execution_count": null,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"In this third demo we will go over the basics of the Ray Job Submission Client in the SDK"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"# Import pieces from codeflare-sdk\n",
17+
"from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication"
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {},
24+
"outputs": [],
25+
"source": [
26+
"# Create authentication object for user permissions\n",
27+
"# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n",
28+
"# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n",
29+
"\n",
30+
"auth = TokenAuthentication(\n",
31+
" token = \"XXXXX\",\n",
32+
" server = \"XXXXX\",\n",
33+
" skip_tls=False\n",
34+
")\n",
35+
"auth.login()"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": null,
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"# Create and configure our cluster object\n",
45+
"cluster = Cluster(ClusterConfiguration(\n",
46+
" name='jobtest',\n",
47+
" namespace='default',\n",
48+
" num_workers=2,\n",
49+
" min_cpus=1,\n",
50+
" max_cpus=1,\n",
51+
" min_memory=4,\n",
52+
" max_memory=4,\n",
53+
" num_gpus=0,\n",
54+
" image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n",
55+
"))"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": null,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"# Bring up the cluster\n",
65+
"cluster.up()\n",
66+
"cluster.wait_ready()"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"cluster.details()"
76+
]
77+
},
78+
{
79+
"cell_type": "markdown",
80+
"metadata": {},
81+
"source": [
82+
"### Ray Job Submission"
83+
]
84+
},
85+
{
86+
"cell_type": "markdown",
87+
"metadata": {},
88+
"source": [
89+
"* Initialise the Cluster Job Client \n",
90+
"* Provide an entrypoint command directed to your job script\n",
91+
"* Set up your runtime environment"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# Initialize the Job Submission Client\n",
101+
"\"\"\"\n",
102+
"The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client\n",
103+
"\"\"\"\n",
104+
"client = cluster.job_client"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": null,
110+
"metadata": {},
111+
"outputs": [],
112+
"source": [
113+
"# Submit an example mnist job using the Job Submission Client\n",
114+
"submission_id = client.submit_job(\n",
115+
" entrypoint=\"python mnist.py\",\n",
116+
" runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n",
117+
")\n",
118+
"print(submission_id)"
119+
]
120+
},
121+
{
122+
"cell_type": "code",
123+
"execution_count": null,
124+
"metadata": {},
125+
"outputs": [],
126+
"source": [
127+
"# Get the job's logs\n",
128+
"client.get_job_logs(submission_id)"
129+
]
130+
},
131+
{
132+
"cell_type": "code",
133+
"execution_count": null,
134+
"metadata": {},
135+
"outputs": [],
136+
"source": [
137+
"# Get the job's status\n",
138+
"client.get_job_status(submission_id)"
139+
]
140+
},
141+
{
142+
"cell_type": "code",
143+
"execution_count": null,
144+
"metadata": {},
145+
"outputs": [],
146+
"source": [
147+
"# Get job related info\n",
148+
"client.get_job_info(submission_id)"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": null,
154+
"metadata": {},
155+
"outputs": [],
156+
"source": [
157+
"# List all existing jobs\n",
158+
"client.list_jobs()"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": null,
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"# Iterate through the logs of a job \n",
168+
"async for lines in client.tail_job_logs(submission_id):\n",
169+
" print(lines, end=\"\") "
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {},
176+
"outputs": [],
177+
"source": [
178+
"# Delete a job\n",
179+
"# Can run client.cancel_job(submission_id) first if job is still running\n",
180+
"client.delete_job(submission_id)"
181+
]
182+
},
183+
{
184+
"cell_type": "code",
185+
"execution_count": null,
186+
"metadata": {},
187+
"outputs": [],
188+
"source": [
189+
"cluster.down()"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"metadata": {},
196+
"outputs": [],
197+
"source": [
198+
"auth.logout()"
199+
]
200+
}
201+
],
202+
"metadata": {
203+
"kernelspec": {
204+
"display_name": "Python 3",
205+
"language": "python",
206+
"name": "python3"
207+
},
208+
"language_info": {
209+
"codemirror_mode": {
210+
"name": "ipython",
211+
"version": 3
212+
},
213+
"file_extension": ".py",
214+
"mimetype": "text/x-python",
215+
"name": "python",
216+
"nbconvert_exporter": "python",
217+
"pygments_lexer": "ipython3",
218+
"version": "3.9.18"
219+
}
220+
},
221+
"nbformat": 4,
222+
"nbformat_minor": 2
223+
}

0 commit comments

Comments
 (0)