From 785432e8737c6f833c3513c30c57821c6c6e5a0a Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 21 Mar 2024 14:51:53 +0000 Subject: [PATCH 01/12] Added demo notebook for Kueue by default Updated existing notebooks in favour of Kueue specific nb Updated wording --- .../additional-demos/hf_interactive.ipynb | 14 +- .../additional-demos/local_interactive.ipynb | 158 +++--------------- demo-notebooks/guided-demos/0_basic_ray.ipynb | 13 +- .../guided-demos/1_basic_instascale.ipynb | 9 +- .../guided-demos/2_job_client.ipynb | 77 +++++++++ .../guided-demos/3_basic_interactive.ipynb | 13 +- .../notebook-ex-outputs/0_basic_ray.ipynb | 13 +- .../1_basic_instascale.ipynb | 9 +- .../3_basic_interactive.ipynb | 13 +- .../preview_nbs/0_basic_ray.ipynb | 13 +- .../preview_nbs/1_basic_instascale.ipynb | 9 +- .../preview_nbs/3_basic_interactive.ipynb | 13 +- 12 files changed, 174 insertions(+), 180 deletions(-) diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb index 37216b5d5..28441037e 100644 --- a/demo-notebooks/additional-demos/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -68,10 +68,12 @@ "id": "bc27f84c", "metadata": {}, "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper).\n", + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding Ray Cluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -89,7 +91,8 @@ } ], "source": [ - "# Create our cluster and submit appwrapper\n", + "# Create our cluster and submit\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", " namespace=\"default\",\n", " num_workers=1,\n", @@ -99,7 +102,8 @@ " max_memory=16, \n", " num_gpus=4,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", + " ))" ] }, { @@ -107,7 +111,7 @@ "id": "12eef53c", "metadata": {}, "source": [ - "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster." + "Next, we want to bring our cluster up, so we call the `up()` function below to submit our Ray Cluster onto the queue, and begin the process of obtaining our resource cluster." ] }, { diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index 674a655ea..bd2597563 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "9a44568b-61ef-41c7-8ad1-9a3b128f03a7", "metadata": { "tags": [] @@ -36,7 +36,9 @@ "source": [ "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -48,7 +50,8 @@ }, "outputs": [], "source": [ - "# Create our cluster and submit appwrapper\n", + "# Create our cluster and submit\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "namespace = \"default\"\n", "cluster_name = \"hfgputest-1\"\n", "local_interactive = True\n", @@ -63,13 +66,13 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=False,\n", - " machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", + " ))" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "69968140-15e6-482f-9529-82b0cd19524b", "metadata": { "tags": [] @@ -81,21 +84,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "e20f9982-f671-460b-8c22-3d62e101fed9", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], + "outputs": [], "source": [ "cluster.wait_ready()" ] @@ -124,82 +118,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "9483bb98-33b3-4beb-9b15-163d7e76c1d7", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:16,088\tINFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level\n", - "2023-06-27 19:14:16,100\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE\n", - "2023-06-27 19:14:16,308\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING\n", - "2023-06-27 19:14:16,434\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY\n", - "2023-06-27 19:14:16,436\tDEBUG worker.py:807 -- Pinging server.\n", - "2023-06-27 19:14:18,634\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000\n", - "2023-06-27 19:14:18,635\tDEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00'\n", - "2023-06-27 19:14:18,645\tDEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n", - "2023-06-27 19:14:19,454\tDEBUG worker.py:636 -- Releasing c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "
\n", - "

Ray

\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - "\n", - "
Python version:3.8.13
Ray version: 2.1.0
Dashboard:http://10.254.20.41:8265
\n", - "
\n", - "
\n" - ], - "text/plain": [ - "ClientContext(dashboard_url='10.254.20.41:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import ray\n", "\n", @@ -209,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "3436eb4a-217c-4109-a3c3-309fda7e2442", "metadata": {}, "outputs": [], @@ -233,72 +158,33 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "5cca1874-2be3-4631-ae48-9adfa45e3af3", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:28,222\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n", - "2023-06-27 19:14:28,222\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n" - ] - } - ], + "outputs": [], "source": [ "ref = heavy_calculation.remote(3000)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "01172c29-e8bf-41ef-8db5-eccb07906111", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:29,202\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n", - "2023-06-27 19:14:31,224\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n" - ] - }, - { - "data": { - "text/plain": [ - "1789.4644387076714" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "ray.get(ref)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "9e79b547-a457-4232-b77d-19147067b972", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-27 19:14:33,161\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n", - "}\n", - "\n", - "2023-06-27 19:14:34,460\tDEBUG dataclient.py:278 -- Shutting down data channel.\n" - ] - } - ], + "outputs": [], "source": [ "ray.cancel(ref)\n", "ray.shutdown()" @@ -306,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "2c198f1f-68bf-43ff-a148-02b5cb000ff2", "metadata": {}, "outputs": [], @@ -339,7 +225,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.9.18" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index 205f02175..9b6aace71 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -45,10 +45,12 @@ "id": "bc27f84c", "metadata": {}, "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper).\n", + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -58,7 +60,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default',\n", @@ -69,7 +72,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=False\n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, @@ -78,7 +81,7 @@ "id": "12eef53c", "metadata": {}, "source": [ - "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster." + "Next, we want to bring our cluster up, so we call the `up()` function below to submit our Ray Cluster onto the queue, and begin the process of obtaining our resource cluster." ] }, { diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb index 418737eb6..07983c6e6 100644 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/1_basic_instascale.ipynb @@ -5,7 +5,9 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments)." + "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "\n", + "NOTE: The InstaScale and MCAD components are in Tech Preview" ] }, { @@ -45,7 +47,9 @@ "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" ] }, { @@ -66,6 +70,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " mcad=True, # Enable MCAD\n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/2_job_client.ipynb b/demo-notebooks/guided-demos/2_job_client.ipynb index 7b3d619b4..73bd5dd9d 100644 --- a/demo-notebooks/guided-demos/2_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_job_client.ipynb @@ -52,7 +52,13 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", +<<<<<<< HEAD:demo-notebooks/guided-demos/2_job_client.ipynb " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", +======= + " mcad=True,\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " instascale=False\n", +>>>>>>> 583747f (Added demo notebook for Kueue by default):demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb "))" ] }, @@ -252,6 +258,77 @@ { "cell_type": "code", "execution_count": null, +<<<<<<< HEAD:demo-notebooks/guided-demos/2_job_client.ipynb +======= + "id": "496139cc", + "metadata": {}, + "outputs": [], + "source": [ + "jobdef = DDPJobDefinition(\n", + " name=\"mnistjob\",\n", + " script=\"mnist.py\",\n", + " scheduler_args={\"namespace\": \"default\"},\n", + " j=\"1x1\",\n", + " gpu=0,\n", + " cpu=1,\n", + " memMB=8000,\n", + " mcad=True,\n", + " image=\"quay.io/project-codeflare/mnist-job-test:v0.0.1\"\n", + ")\n", + "job = jobdef.submit()" + ] + }, + { + "cell_type": "markdown", + "id": "0837e43b", + "metadata": {}, + "source": [ + "Once again, we can look at job status and logs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d18d42c", + "metadata": {}, + "outputs": [], + "source": [ + "job.status()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36d7ea97", + "metadata": {}, + "outputs": [], + "source": [ + "job.logs()" + ] + }, + { + "cell_type": "markdown", + "id": "aebf376a", + "metadata": {}, + "source": [ + "This time, once the pods complete, we can clean them up alongside any other associated resources. The following command can also be used to delete jobs early for both Ray and MCAD submission:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebbb0674", + "metadata": {}, + "outputs": [], + "source": [ + "job.cancel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d41b90e", +>>>>>>> 583747f (Added demo notebook for Kueue by default):demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb "metadata": {}, "outputs": [], "source": [ diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 090a4a305..346d91890 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -5,7 +5,7 @@ "id": "bbc21043", "metadata": {}, "source": [ - "In this fourth and final notebook, we will go over how to leverage the SDK to directly work interactively with a Ray cluster during development." + "In this fourth notebook, we will go over how to leverage the SDK to directly work interactively with a Ray Cluster during development." ] }, { @@ -45,7 +45,9 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -55,7 +57,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='interactivetest',\n", " namespace='default',\n", @@ -66,9 +69,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=True, #<---instascale enabled\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", - " \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index d8f6c34c4..ccc59cb04 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -45,10 +45,12 @@ "id": "bc27f84c", "metadata": {}, "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper).\n", + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -66,7 +68,8 @@ } ], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default',\n", @@ -77,7 +80,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=False\n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, @@ -354,7 +357,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.17" + "version": "3.9.undefined" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index 4b28b2058..ba8426dc0 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -5,7 +5,9 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments)." + "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "\n", + "NOTE: The InstaScale and MCAD components are in Tech Preview" ] }, { @@ -45,7 +47,9 @@ "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" ] }, { @@ -74,6 +78,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " mcad=True, # Enable MCAD\n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 7ac004706..de673ed48 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -5,7 +5,7 @@ "id": "bbc21043", "metadata": {}, "source": [ - "In this fourth and final notebook, we will go over how to leverage the SDK to directly work interactively with a Ray cluster during development." + "In this notebook, we will go over how to leverage the SDK to directly work interactively with a Ray cluster during development." ] }, { @@ -45,7 +45,9 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -63,7 +65,8 @@ } ], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='interactivetest',\n", " namespace='default',\n", @@ -74,9 +77,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=True, #<---instascale enabled\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", - " \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index b0f12d4ba..7ea8e9e48 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -45,10 +45,12 @@ "id": "bc27f84c", "metadata": {}, "source": [ - "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding AppWrapper).\n", + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -58,7 +60,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default',\n", @@ -69,7 +72,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=False\n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, @@ -78,7 +81,7 @@ "id": "12eef53c", "metadata": {}, "source": [ - "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster." + "Next, we want to bring our cluster up, so we call the `up()` function below to submit our Ray Cluster onto the queue, and begin the process of obtaining our resource cluster." ] }, { diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb index 418737eb6..07983c6e6 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb @@ -5,7 +5,9 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments)." + "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "\n", + "NOTE: The InstaScale and MCAD components are in Tech Preview" ] }, { @@ -45,7 +47,9 @@ "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" ] }, { @@ -66,6 +70,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " mcad=True, # Enable MCAD\n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 090a4a305..30665bbab 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -5,7 +5,7 @@ "id": "bbc21043", "metadata": {}, "source": [ - "In this fourth and final notebook, we will go over how to leverage the SDK to directly work interactively with a Ray cluster during development." + "In this notebook, we will go over how to leverage the SDK to directly work interactively with a Ray Cluster during development." ] }, { @@ -45,7 +45,9 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image." + "The example here is a community image.\n", + "\n", + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { @@ -55,7 +57,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Create and configure our cluster object (and appwrapper)\n", + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='interactivetest',\n", " namespace='default',\n", @@ -66,9 +69,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=True, #<---instascale enabled\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"]\n", - " \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, From 8132578350d7ee2da93a785ad281613a5f497a2b Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 5 Apr 2024 17:58:50 +0100 Subject: [PATCH 02/12] Fixed nb --- .../guided-demos/2_job_client.ipynb | 77 ------------------- 1 file changed, 77 deletions(-) diff --git a/demo-notebooks/guided-demos/2_job_client.ipynb b/demo-notebooks/guided-demos/2_job_client.ipynb index 73bd5dd9d..7b3d619b4 100644 --- a/demo-notebooks/guided-demos/2_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_job_client.ipynb @@ -52,13 +52,7 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", -<<<<<<< HEAD:demo-notebooks/guided-demos/2_job_client.ipynb " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", -======= - " mcad=True,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " instascale=False\n", ->>>>>>> 583747f (Added demo notebook for Kueue by default):demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb "))" ] }, @@ -258,77 +252,6 @@ { "cell_type": "code", "execution_count": null, -<<<<<<< HEAD:demo-notebooks/guided-demos/2_job_client.ipynb -======= - "id": "496139cc", - "metadata": {}, - "outputs": [], - "source": [ - "jobdef = DDPJobDefinition(\n", - " name=\"mnistjob\",\n", - " script=\"mnist.py\",\n", - " scheduler_args={\"namespace\": \"default\"},\n", - " j=\"1x1\",\n", - " gpu=0,\n", - " cpu=1,\n", - " memMB=8000,\n", - " mcad=True,\n", - " image=\"quay.io/project-codeflare/mnist-job-test:v0.0.1\"\n", - ")\n", - "job = jobdef.submit()" - ] - }, - { - "cell_type": "markdown", - "id": "0837e43b", - "metadata": {}, - "source": [ - "Once again, we can look at job status and logs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d18d42c", - "metadata": {}, - "outputs": [], - "source": [ - "job.status()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36d7ea97", - "metadata": {}, - "outputs": [], - "source": [ - "job.logs()" - ] - }, - { - "cell_type": "markdown", - "id": "aebf376a", - "metadata": {}, - "source": [ - "This time, once the pods complete, we can clean them up alongside any other associated resources. The following command can also be used to delete jobs early for both Ray and MCAD submission:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ebbb0674", - "metadata": {}, - "outputs": [], - "source": [ - "job.cancel()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d41b90e", ->>>>>>> 583747f (Added demo notebook for Kueue by default):demo-notebooks/guided-demos/preview_nbs/2_basic_jobs.ipynb "metadata": {}, "outputs": [], "source": [ From d04b6acbaf4feae42b3117c68e70cc3d1a137b39 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 22 Apr 2024 12:04:03 +0100 Subject: [PATCH 03/12] Ray mTLS for basic Interactive --- .../additional-demos/local_interactive.ipynb | 167 +++++++++++++++--- .../guided-demos/3_basic_interactive.ipynb | 19 +- .../3_basic_interactive.ipynb | 19 +- .../preview_nbs/3_basic_interactive.ipynb | 21 ++- 4 files changed, 190 insertions(+), 36 deletions(-) diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index bd2597563..31f1ad17c 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "9a44568b-61ef-41c7-8ad1-9a3b128f03a7", "metadata": { "tags": [] @@ -36,9 +36,7 @@ "source": [ "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -50,14 +48,11 @@ }, "outputs": [], "source": [ - "# Create our cluster and submit\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# Create and submit our Ray Cluster\n", "namespace = \"default\"\n", "cluster_name = \"hfgputest-1\"\n", - "local_interactive = True\n", "\n", - "cluster = Cluster(ClusterConfiguration(local_interactive=local_interactive,\n", - " namespace=namespace,\n", + "cluster = Cluster(ClusterConfiguration(namespace=namespace,\n", " name=cluster_name,\n", " num_workers=1,\n", " min_cpus=1,\n", @@ -65,14 +60,12 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", - " ))" + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "69968140-15e6-482f-9529-82b0cd19524b", "metadata": { "tags": [] @@ -84,12 +77,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "e20f9982-f671-460b-8c22-3d62e101fed9", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for requested resources to be set up...\n", + "Requested cluster up and running!\n" + ] + } + ], "source": [ "cluster.wait_ready()" ] @@ -110,21 +112,89 @@ "outputs": [], "source": [ "from codeflare_sdk import generate_cert\n", - "\n", - "if local_interactive:\n", - " generate_cert.generate_tls_cert(cluster_name, namespace)\n", - " generate_cert.export_env(cluster_name, namespace)" + "# Export the environment variables to enable TLS\n", + "generate_cert.generate_tls_cert(cluster_name, namespace)\n", + "generate_cert.export_env(cluster_name, namespace)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "9483bb98-33b3-4beb-9b15-163d7e76c1d7", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-27 19:14:16,088\tINFO client_builder.py:251 -- Passing the following kwargs to ray.init() on the server: logging_level\n", + "2023-06-27 19:14:16,100\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.IDLE\n", + "2023-06-27 19:14:16,308\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.CONNECTING\n", + "2023-06-27 19:14:16,434\tDEBUG worker.py:378 -- client gRPC channel state change: ChannelConnectivity.READY\n", + "2023-06-27 19:14:16,436\tDEBUG worker.py:807 -- Pinging server.\n", + "2023-06-27 19:14:18,634\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000001000000\n", + "2023-06-27 19:14:18,635\tDEBUG worker.py:564 -- Scheduling task get_dashboard_url 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x01\\x00\\x00\\x00'\n", + "2023-06-27 19:14:18,645\tDEBUG worker.py:640 -- Retaining c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n", + "2023-06-27 19:14:19,454\tDEBUG worker.py:636 -- Releasing c8ef45ccd0112571ffffffffffffffffffffffff0100000001000000\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Ray

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
Python version:3.8.13
Ray version: 2.1.0
Dashboard:http://10.254.20.41:8265
\n", + "
\n", + "
\n" + ], + "text/plain": [ + "ClientContext(dashboard_url='10.254.20.41:8265', python_version='3.8.13', ray_version='2.1.0', ray_commit='23f34d948dae8de9b168667ab27e6cf940b3ae85', protocol_version='2022-10-05', _num_clients=1, _context_to_restore=)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import ray\n", "\n", @@ -134,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "3436eb4a-217c-4109-a3c3-309fda7e2442", "metadata": {}, "outputs": [], @@ -158,33 +228,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5cca1874-2be3-4631-ae48-9adfa45e3af3", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-27 19:14:28,222\tDEBUG worker.py:640 -- Retaining 00ffffffffffffffffffffffffffffffffffffff0100000002000000\n", + "2023-06-27 19:14:28,222\tDEBUG worker.py:564 -- Scheduling task heavy_calculation 0 b'\\x00\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\xff\\x01\\x00\\x00\\x00\\x02\\x00\\x00\\x00'\n" + ] + } + ], "source": [ "ref = heavy_calculation.remote(3000)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "01172c29-e8bf-41ef-8db5-eccb07906111", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-27 19:14:29,202\tDEBUG worker.py:640 -- Retaining 16310a0f0a45af5cffffffffffffffffffffffff0100000001000000\n", + "2023-06-27 19:14:31,224\tDEBUG worker.py:439 -- Internal retry for get [ClientObjectRef(16310a0f0a45af5cffffffffffffffffffffffff0100000001000000)]\n" + ] + }, + { + "data": { + "text/plain": [ + "1789.4644387076714" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ray.get(ref)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "9e79b547-a457-4232-b77d-19147067b972", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-06-27 19:14:33,161\tDEBUG dataclient.py:287 -- Got unawaited response connection_cleanup {\n", + "}\n", + "\n", + "2023-06-27 19:14:34,460\tDEBUG dataclient.py:278 -- Shutting down data channel.\n" + ] + } + ], "source": [ "ray.cancel(ref)\n", "ray.shutdown()" @@ -192,7 +301,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "2c198f1f-68bf-43ff-a148-02b5cb000ff2", "metadata": {}, "outputs": [], diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 346d91890..b0aa44ba4 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -59,9 +59,11 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "namespace = \"default\"\n", + "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", - " name='interactivetest',\n", - " namespace='default',\n", + " name=cluster_name,\n", + " namespace=namespace,\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", @@ -126,6 +128,19 @@ "Now we can connect directly to our Ray cluster via the Ray python client:" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9436436", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import generate_cert\n", + "# Export the environment variables to enable TLS\n", + "generate_cert.generate_tls_cert(cluster_name, namespace)\n", + "generate_cert.export_env(cluster_name, namespace)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index de673ed48..33d6af109 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -67,9 +67,11 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "namespace = \"default\"\n", + "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", - " name='interactivetest',\n", - " namespace='default',\n", + " name=cluster_name,\n", + " namespace=namespace,\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", @@ -210,6 +212,19 @@ "Now we can connect directly to our Ray cluster via the Ray python client:" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "13eb52f6", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import generate_cert\n", + "# Export the environment variables to enable TLS\n", + "generate_cert.generate_tls_cert(cluster_name, namespace)\n", + "generate_cert.export_env(cluster_name, namespace)" + ] + }, { "cell_type": "code", "execution_count": 6, diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 30665bbab..fb071611d 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -59,9 +59,11 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "namespace = \"default\"\n", + "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", - " name='interactivetest',\n", - " namespace='default',\n", + " name=cluster_name,\n", + " namespace=namespace,\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", @@ -126,6 +128,19 @@ "Now we can connect directly to our Ray cluster via the Ray python client:" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5308271", + "metadata": {}, + "outputs": [], + "source": [ + "from codeflare_sdk import generate_cert\n", + "# Export the environment variables to enable TLS\n", + "generate_cert.generate_tls_cert(cluster_name, namespace)\n", + "generate_cert.export_env(cluster_name, namespace)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -296,7 +311,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.18" }, "vscode": { "interpreter": { From 0d6b04fdbfa30d6371237c728c51ed17904e109c Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 12:52:50 +0100 Subject: [PATCH 04/12] Added Cluster Ray Job Client demo --- .../ray_job_client.ipynb} | 2 +- ...lient.ipynb => 2_cluster_job_client.ipynb} | 80 +--- .../2_cluster_job_client.ipynb | 223 +++++++++ .../notebook-ex-outputs/2_job_client.ipynb | 430 ------------------ .../preview_nbs/2_cluster_job_client.ipynb | 223 +++++++++ 5 files changed, 457 insertions(+), 501 deletions(-) rename demo-notebooks/{guided-demos/2_job_client.ipynb => additional-demos/ray_job_client.ipynb} (98%) rename demo-notebooks/guided-demos/{preview_nbs/2_job_client.ipynb => 2_cluster_job_client.ipynb} (70%) create mode 100644 demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb delete mode 100644 demo-notebooks/guided-demos/notebook-ex-outputs/2_job_client.ipynb create mode 100644 demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb diff --git a/demo-notebooks/guided-demos/2_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb similarity index 98% rename from demo-notebooks/guided-demos/2_job_client.ipynb rename to demo-notebooks/additional-demos/ray_job_client.ipynb index 7b3d619b4..d24c92c6b 100644 --- a/demo-notebooks/guided-demos/2_job_client.ipynb +++ b/demo-notebooks/additional-demos/ray_job_client.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + "In this demo we will go over the basics of the RayJobClient in the SDK" ] }, { diff --git a/demo-notebooks/guided-demos/preview_nbs/2_job_client.ipynb b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb similarity index 70% rename from demo-notebooks/guided-demos/preview_nbs/2_job_client.ipynb rename to demo-notebooks/guided-demos/2_cluster_job_client.ipynb index 7b3d619b4..8ec29cbe2 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb @@ -14,7 +14,7 @@ "outputs": [], "source": [ "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, RayJobClient" + "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" ] }, { @@ -27,9 +27,8 @@ "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", "\n", - "auth_token = \"XXXXX\" # The auth_token is used later for the RayJobClient\n", "auth = TokenAuthentication(\n", - " token = auth_token,\n", + " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", " skip_tls=False\n", ")\n", @@ -80,14 +79,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Ray Job Submission - Authorized Ray Cluster" + "### Ray Job Submission" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Submit a job using an authorized Ray dashboard and the Job Submission Client\n", + "* Initialise the Cluster Job Client \n", "* Provide an entrypoint command directed to your job script\n", "* Set up your runtime environment" ] @@ -98,16 +97,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Gather the dashboard URL\n", - "ray_dashboard = cluster.cluster_dashboard_uri()\n", - "\n", - "# Create the header for passing your bearer token\n", - "header = {\n", - " 'Authorization': f'Bearer {auth_token}'\n", - "}\n", - "\n", - "# Initialize the RayJobClient\n", - "client = RayJobClient(address=ray_dashboard, headers=header, verify=True)" + "# Initialize the Job Submission Client\n", + "\"\"\"\n", + "The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client\n", + "\"\"\"\n", + "client = cluster.job_client" ] }, { @@ -116,7 +110,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Submit an example mnist job using the RayJobClient\n", + "# Submit an example mnist job using the Job Submission Client\n", "submission_id = client.submit_job(\n", " entrypoint=\"python mnist.py\",\n", " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", @@ -186,60 +180,6 @@ "client.delete_job(submission_id)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Unauthorized Ray Cluster with the Ray Job Client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Initialise the RayJobClient with the Ray Dashboard\n", - "\"\"\"\n", - "ray_dashboard = cluster.cluster_dashboard_uri()\n", - "client = RayJobClient(address=ray_dashboard, verify=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Submit an example mnist job using the RayJobClient\n", - "submission_id = client.submit_job(\n", - " entrypoint=\"python mnist.py\",\n", - " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", - ")\n", - "print(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Stop the job \n", - "client.stop_job(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Delete the job\n", - "client.delete_job(submission_id)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb new file mode 100644 index 000000000..8ec29cbe2 --- /dev/null +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import pieces from codeflare-sdk\n", + "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", + "\n", + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create and configure our cluster object\n", + "cluster = Cluster(ClusterConfiguration(\n", + " name='jobtest',\n", + " namespace='default',\n", + " num_workers=2,\n", + " min_cpus=1,\n", + " max_cpus=1,\n", + " min_memory=4,\n", + " max_memory=4,\n", + " num_gpus=0,\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bring up the cluster\n", + "cluster.up()\n", + "cluster.wait_ready()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster.details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ray Job Submission" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Initialise the Cluster Job Client \n", + "* Provide an entrypoint command directed to your job script\n", + "* Set up your runtime environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the Job Submission Client\n", + "\"\"\"\n", + "The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client\n", + "\"\"\"\n", + "client = cluster.job_client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Submit an example mnist job using the Job Submission Client\n", + "submission_id = client.submit_job(\n", + " entrypoint=\"python mnist.py\",\n", + " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", + ")\n", + "print(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the job's logs\n", + "client.get_job_logs(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the job's status\n", + "client.get_job_status(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get job related info\n", + "client.get_job_info(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List all existing jobs\n", + "client.list_jobs()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Iterate through the logs of a job \n", + "async for lines in client.tail_job_logs(submission_id):\n", + " print(lines, end=\"\") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete a job\n", + "# Can run client.cancel_job(submission_id) first if job is still running\n", + "client.delete_job(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster.down()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "auth.logout()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_job_client.ipynb deleted file mode 100644 index 75000ce46..000000000 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_job_client.ipynb +++ /dev/null @@ -1,430 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, RayJobClient" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create authentication object for user permissions\n", - "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", - "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "\n", - "auth_token = \"XXXXX\" # The auth_token is used later for the RayJobClient\n", - "auth = TokenAuthentication(\n", - " token = auth_token,\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Yaml resources loaded for jobtest\n" - ] - } - ], - "source": [ - "# Create and configure our cluster object\n", - "cluster = Cluster(ClusterConfiguration(\n", - " name='jobtest',\n", - " namespace='default',\n", - " num_workers=2,\n", - " min_cpus=1,\n", - " max_cpus=1,\n", - " min_memory=4,\n", - " max_memory=4,\n", - " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", - "))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster is up and running!\n", - "Dashboard is ready!\n" - ] - } - ], - "source": [ - "# Bring up the cluster\n", - "cluster.up()\n", - "cluster.wait_ready()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                  ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€                  \n",
-       "                                                                   \n",
-       " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n",
-       " โ”‚   Name                                                        โ”‚ \n",
-       " โ”‚   jobtest                                        Active โœ…    โ”‚ \n",
-       " โ”‚                                                               โ”‚ \n",
-       " โ”‚   URI: ray://jobtest-head-svc.default.svc:10001               โ”‚ \n",
-       " โ”‚                                                               โ”‚ \n",
-       " โ”‚   Dashboard๐Ÿ”—                                                 โ”‚ \n",
-       " โ”‚                                                               โ”‚ \n",
-       " โ”‚                       Cluster Resources                       โ”‚ \n",
-       " โ”‚   โ•ญโ”€โ”€ Workers โ”€โ”€โ•ฎ  โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ   โ”‚ \n",
-       " โ”‚   โ”‚  # Workers  โ”‚  โ”‚  Memory      CPU         GPU         โ”‚   โ”‚ \n",
-       " โ”‚   โ”‚             โ”‚  โ”‚                                      โ”‚   โ”‚ \n",
-       " โ”‚   โ”‚  2          โ”‚  โ”‚  4~4         1           0           โ”‚   โ”‚ \n",
-       " โ”‚   โ”‚             โ”‚  โ”‚                                      โ”‚   โ”‚ \n",
-       " โ”‚   โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ  โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ   โ”‚ \n",
-       " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n", - " โ”‚ \u001b[1;37;42mName\u001b[0m โ”‚ \n", - " โ”‚ \u001b[1;4mjobtest\u001b[0m Active โœ… โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b[1mURI:\u001b[0m ray://jobtest-head-svc.default.svc:10001 โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b]8;id=561347;https://ray-dashboard-jobtest-default.apps.rosa.mcampbel.af68.p3.openshiftapps.com\u001b\\\u001b[4;34mDashboard๐Ÿ”—\u001b[0m\u001b]8;;\u001b\\ โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b[3m Cluster Resources \u001b[0m โ”‚ \n", - " โ”‚ โ•ญโ”€โ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n", - " โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m4~4 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m0 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n", - " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='jobtest', status=, head_cpus=2, head_mem=8, head_gpu=0, workers=2, worker_mem_min=4, worker_mem_max=4, worker_cpu=1, worker_gpu=0, namespace='default', dashboard='https://ray-dashboard-jobtest-default.apps.rosa.mcampbel.af68.p3.openshiftapps.com')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cluster.details()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Ray Job Submission - Authorized Ray Cluster" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Submit a job using an authorized Ray dashboard and the Job Submission Client\n", - "* Provide an entrypoint command directed to your job script\n", - "* Set up your runtime environment" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Gather the dashboard URL\n", - "ray_dashboard = cluster.cluster_dashboard_uri()\n", - "\n", - "# Create the header for passing your bearer token\n", - "header = {\n", - " 'Authorization': f'Bearer {auth_token}'\n", - "}\n", - "\n", - "# Initialize the RayJobClient\n", - "client = RayJobClient(address=ray_dashboard, headers=header, verify=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-04-03 12:16:07,112\tINFO dashboard_sdk.py:338 -- Uploading package gcs://_ray_pkg_431abdedbcc7e123.zip.\n", - "2024-04-03 12:16:07,115\tINFO packaging.py:518 -- Creating a file package for local directory './'.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "raysubmit_NvXkkh1QP1kdq4LG\n" - ] - } - ], - "source": [ - "# Submit an example mnist job using the RayJobClient\n", - "submission_id = client.submit_job(\n", - " entrypoint=\"python mnist.py\",\n", - " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", - ")\n", - "print(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "''" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get the job's logs\n", - "client.get_job_logs(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get the job's status\n", - "client.get_job_status(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "JobDetails(type=, job_id=None, submission_id='raysubmit_NvXkkh1QP1kdq4LG', driver_info=None, status=, entrypoint='python mnist.py', message='Job has not started yet. It may be waiting for the runtime environment to be set up.', error_type=None, start_time=1712142968879, end_time=None, metadata={}, runtime_env={'working_dir': 'gcs://_ray_pkg_431abdedbcc7e123.zip', 'pip': {'packages': ['pytorch_lightning==1.5.10', 'ray_lightning', 'torchmetrics==0.9.1', 'torchvision==0.12.0'], 'pip_check': False}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}, driver_agent_http_address=None, driver_node_id=None)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get job related info\n", - "client.get_job_info(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[JobDetails(type=, job_id=None, submission_id='raysubmit_NvXkkh1QP1kdq4LG', driver_info=None, status=, entrypoint='python mnist.py', message='Job has not started yet. It may be waiting for the runtime environment to be set up.', error_type=None, start_time=1712142968879, end_time=None, metadata={}, runtime_env={'working_dir': 'gcs://_ray_pkg_431abdedbcc7e123.zip', 'pip': {'packages': ['pytorch_lightning==1.5.10', 'ray_lightning', 'torchmetrics==0.9.1', 'torchvision==0.12.0'], 'pip_check': False}, '_ray_commit': 'b4bba4717f5ba04ee25580fe8f88eed63ef0c5dc'}, driver_agent_http_address=None, driver_node_id=None)]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# List all existing jobs\n", - "client.list_jobs()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Iterate through the logs of a job \n", - "async for lines in client.tail_job_logs(submission_id):\n", - " print(lines, end=\"\") " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(True, 'Successfully deleted Job raysubmit_NvXkkh1QP1kdq4LG')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Delete a job\n", - "# Can run client.cancel_job(submission_id) first if job is still running\n", - "client.delete_job(submission_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Unauthorized Ray Cluster with the Ray Job Client" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Initialise the RayJobClient with the Ray Dashboard\n", - "\"\"\"\n", - "ray_dashboard = cluster.cluster_dashboard_uri()\n", - "client = RayJobClient(address=ray_dashboard, verify=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Submit an example mnist job using the RayJobClient\n", - "submission_id = client.submit_job(\n", - " entrypoint=\"python mnist.py\",\n", - " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", - ")\n", - "print(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Stop the job \n", - "client.stop_job(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Delete the job\n", - "client.delete_job(submission_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "cluster.down()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "auth.logout()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb new file mode 100644 index 000000000..8ec29cbe2 --- /dev/null +++ b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import pieces from codeflare-sdk\n", + "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", + "\n", + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create and configure our cluster object\n", + "cluster = Cluster(ClusterConfiguration(\n", + " name='jobtest',\n", + " namespace='default',\n", + " num_workers=2,\n", + " min_cpus=1,\n", + " max_cpus=1,\n", + " min_memory=4,\n", + " max_memory=4,\n", + " num_gpus=0,\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bring up the cluster\n", + "cluster.up()\n", + "cluster.wait_ready()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster.details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ray Job Submission" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Initialise the Cluster Job Client \n", + "* Provide an entrypoint command directed to your job script\n", + "* Set up your runtime environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the Job Submission Client\n", + "\"\"\"\n", + "The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client\n", + "\"\"\"\n", + "client = cluster.job_client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Submit an example mnist job using the Job Submission Client\n", + "submission_id = client.submit_job(\n", + " entrypoint=\"python mnist.py\",\n", + " runtime_env={\"working_dir\": \"./\",\"pip\": \"requirements.txt\"},\n", + ")\n", + "print(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the job's logs\n", + "client.get_job_logs(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the job's status\n", + "client.get_job_status(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get job related info\n", + "client.get_job_info(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List all existing jobs\n", + "client.list_jobs()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Iterate through the logs of a job \n", + "async for lines in client.tail_job_logs(submission_id):\n", + " print(lines, end=\"\") " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete a job\n", + "# Can run client.cancel_job(submission_id) first if job is still running\n", + "client.delete_job(submission_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cluster.down()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "auth.logout()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 7d47f0cf0a7dd27d0ae75ade2f390de4da70cf92 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 13:00:38 +0100 Subject: [PATCH 05/12] Added namespace reminder --- .../additional-demos/local_interactive.ipynb | 10 ++++++---- demo-notebooks/additional-demos/ray_job_client.ipynb | 2 +- demo-notebooks/guided-demos/0_basic_ray.ipynb | 2 +- demo-notebooks/guided-demos/1_basic_instascale.ipynb | 2 +- demo-notebooks/guided-demos/2_cluster_job_client.ipynb | 2 +- demo-notebooks/guided-demos/3_basic_interactive.ipynb | 2 +- .../guided-demos/notebook-ex-outputs/0_basic_ray.ipynb | 4 ++-- .../notebook-ex-outputs/1_basic_instascale.ipynb | 2 +- .../notebook-ex-outputs/2_cluster_job_client.ipynb | 2 +- .../notebook-ex-outputs/3_basic_interactive.ipynb | 2 +- .../guided-demos/preview_nbs/0_basic_ray.ipynb | 4 ++-- .../guided-demos/preview_nbs/1_basic_instascale.ipynb | 2 +- .../preview_nbs/2_cluster_job_client.ipynb | 2 +- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 2 +- 14 files changed, 21 insertions(+), 19 deletions(-) diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index 31f1ad17c..61dc25833 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -48,8 +48,8 @@ }, "outputs": [], "source": [ - "# Create and submit our Ray Cluster\n", - "namespace = \"default\"\n", + "# Create our cluster and submit appwrapper\n", + "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"hfgputest-1\"\n", "\n", "cluster = Cluster(ClusterConfiguration(namespace=namespace,\n", @@ -60,7 +60,9 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",))" + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " instascale=False,\n", + " machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" ] }, { @@ -112,7 +114,7 @@ "outputs": [], "source": [ "from codeflare_sdk import generate_cert\n", - "# Export the environment variables to enable TLS\n", + "\n", "generate_cert.generate_tls_cert(cluster_name, namespace)\n", "generate_cert.export_env(cluster_name, namespace)" ] diff --git a/demo-notebooks/additional-demos/ray_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb index d24c92c6b..78978f9ce 100644 --- a/demo-notebooks/additional-demos/ray_job_client.ipynb +++ b/demo-notebooks/additional-demos/ray_job_client.ipynb @@ -45,7 +45,7 @@ "# Create and configure our cluster object\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index 9b6aace71..dabadfb43 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -64,7 +64,7 @@ "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb index 07983c6e6..6f34df3f8 100644 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/1_basic_instascale.ipynb @@ -62,7 +62,7 @@ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", " name='instascaletest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", diff --git a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb index 8ec29cbe2..ea194ca9d 100644 --- a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb @@ -44,7 +44,7 @@ "# Create and configure our cluster object\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index b0aa44ba4..53caed005 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -59,7 +59,7 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", - "namespace = \"default\"\n", + "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name=cluster_name,\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index ccc59cb04..a9df543db 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -72,7 +72,7 @@ "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", @@ -357,7 +357,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.undefined" + "version": "3.9.18" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index ba8426dc0..345435979 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -70,7 +70,7 @@ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", " name='instascaletest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb index 8ec29cbe2..ea194ca9d 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb @@ -44,7 +44,7 @@ "# Create and configure our cluster object\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 33d6af109..d5c123562 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -67,7 +67,7 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", - "namespace = \"default\"\n", + "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name=cluster_name,\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index 7ea8e9e48..dabadfb43 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -64,7 +64,7 @@ "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", @@ -196,7 +196,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.18" }, "vscode": { "interpreter": { diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb index 07983c6e6..6f34df3f8 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb @@ -62,7 +62,7 @@ "# Create and configure our cluster object (and appwrapper)\n", "cluster = Cluster(ClusterConfiguration(\n", " name='instascaletest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=2,\n", " max_cpus=2,\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb index 8ec29cbe2..ea194ca9d 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb @@ -44,7 +44,7 @@ "# Create and configure our cluster object\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", - " namespace='default',\n", + " namespace='default', # Update to your namespace\n", " num_workers=2,\n", " min_cpus=1,\n", " max_cpus=1,\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index fb071611d..169bb8b97 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -59,7 +59,7 @@ "source": [ "# Create and configure our cluster object\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", - "namespace = \"default\"\n", + "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", " name=cluster_name,\n", From 5ed73b2e648e72251a02c68b96d95b5403cc365d Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 14:19:54 +0100 Subject: [PATCH 06/12] Addressed review comments --- demo-notebooks/additional-demos/hf_interactive.ipynb | 2 +- demo-notebooks/additional-demos/local_interactive.ipynb | 3 ++- demo-notebooks/additional-demos/ray_job_client.ipynb | 2 +- demo-notebooks/guided-demos/0_basic_ray.ipynb | 4 ++-- demo-notebooks/guided-demos/1_basic_instascale.ipynb | 2 +- demo-notebooks/guided-demos/2_cluster_job_client.ipynb | 4 ++-- demo-notebooks/guided-demos/3_basic_interactive.ipynb | 6 +++--- .../guided-demos/notebook-ex-outputs/0_basic_ray.ipynb | 4 ++-- .../notebook-ex-outputs/1_basic_instascale.ipynb | 2 +- .../notebook-ex-outputs/2_cluster_job_client.ipynb | 4 ++-- .../notebook-ex-outputs/3_basic_interactive.ipynb | 4 ++-- demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb | 4 ++-- .../guided-demos/preview_nbs/1_basic_instascale.ipynb | 2 +- .../guided-demos/preview_nbs/2_cluster_job_client.ipynb | 4 ++-- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 4 ++-- 15 files changed, 26 insertions(+), 25 deletions(-) diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb index 28441037e..72b3dba27 100644 --- a/demo-notebooks/additional-demos/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -92,7 +92,7 @@ ], "source": [ "# Create our cluster and submit\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", " namespace=\"default\",\n", " num_workers=1,\n", diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index 61dc25833..f8078e240 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -48,7 +48,8 @@ }, "outputs": [], "source": [ - "# Create our cluster and submit appwrapper\n", + "# Create and submit our cluster\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"hfgputest-1\"\n", "\n", diff --git a/demo-notebooks/additional-demos/ray_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb index 78978f9ce..cd81423cc 100644 --- a/demo-notebooks/additional-demos/ray_job_client.ipynb +++ b/demo-notebooks/additional-demos/ray_job_client.ipynb @@ -26,7 +26,6 @@ "# Create authentication object for user permissions\n", "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "\n", "auth_token = \"XXXXX\" # The auth_token is used later for the RayJobClient\n", "auth = TokenAuthentication(\n", " token = auth_token,\n", @@ -43,6 +42,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index dabadfb43..47884f0f6 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -5,7 +5,7 @@ "id": "8d4a42f6", "metadata": {}, "source": [ - "In this first notebook, we will go through the basics of using the SDK to:\n", + "In first notebook, we will go through the basics of using the SDK to:\n", " - Spin up a Ray cluster with our desired resources\n", " - View the status and specs of our Ray cluster\n", " - Take down the Ray cluster when finished" @@ -61,7 +61,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb index 6f34df3f8..00963886e 100644 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/1_basic_instascale.ipynb @@ -5,7 +5,7 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", "\n", "NOTE: The InstaScale and MCAD components are in Tech Preview" ] diff --git a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb index ea194ca9d..65447ac34 100644 --- a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + "In this demo we will go over the basics of the Ray Job Submission Client in the SDK" ] }, { @@ -26,7 +26,6 @@ "# Create authentication object for user permissions\n", "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", @@ -42,6 +41,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 53caed005..872298309 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -5,7 +5,7 @@ "id": "bbc21043", "metadata": {}, "source": [ - "In this fourth notebook, we will go over how to leverage the SDK to directly work interactively with a Ray Cluster during development." + "In this notebook, we will go over how to leverage the SDK to directly work interactively with a Ray Cluster during development." ] }, { @@ -58,7 +58,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", @@ -136,7 +136,7 @@ "outputs": [], "source": [ "from codeflare_sdk import generate_cert\n", - "# Export the environment variables to enable TLS\n", + "# Create required TLS cert and export the environment variables to enable TLS\n", "generate_cert.generate_tls_cert(cluster_name, namespace)\n", "generate_cert.export_env(cluster_name, namespace)" ] diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index a9df543db..afadb55db 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -5,7 +5,7 @@ "id": "8d4a42f6", "metadata": {}, "source": [ - "In this first notebook, we will go through the basics of using the SDK to:\n", + "In this notebook, we will go through the basics of using the SDK to:\n", " - Spin up a Ray cluster with our desired resources\n", " - View the status and specs of our Ray cluster\n", " - Take down the Ray cluster when finished" @@ -69,7 +69,7 @@ ], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index 345435979..ab10a5586 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -5,7 +5,7 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", "\n", "NOTE: The InstaScale and MCAD components are in Tech Preview" ] diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb index ea194ca9d..65447ac34 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + "In this demo we will go over the basics of the Ray Job Submission Client in the SDK" ] }, { @@ -26,7 +26,6 @@ "# Create authentication object for user permissions\n", "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", @@ -42,6 +41,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index d5c123562..1d3afdc65 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -66,7 +66,7 @@ ], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", @@ -220,7 +220,7 @@ "outputs": [], "source": [ "from codeflare_sdk import generate_cert\n", - "# Export the environment variables to enable TLS\n", + "# Create required TLS cert and export the environment variables to enable TLS\n", "generate_cert.generate_tls_cert(cluster_name, namespace)\n", "generate_cert.export_env(cluster_name, namespace)" ] diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index dabadfb43..bf2bde96b 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -5,7 +5,7 @@ "id": "8d4a42f6", "metadata": {}, "source": [ - "In this first notebook, we will go through the basics of using the SDK to:\n", + "In this notebook, we will go through the basics of using the SDK to:\n", " - Spin up a Ray cluster with our desired resources\n", " - View the status and specs of our Ray cluster\n", " - Take down the Ray cluster when finished" @@ -61,7 +61,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='raytest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb index 6f34df3f8..00963886e 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb @@ -5,7 +5,7 @@ "id": "9865ee8c", "metadata": {}, "source": [ - "In this second notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", + "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", "\n", "NOTE: The InstaScale and MCAD components are in Tech Preview" ] diff --git a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb index ea194ca9d..65447ac34 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this third demo we will go over the basics of the Ray Job Submission Client in the SDK" + "In this demo we will go over the basics of the Ray Job Submission Client in the SDK" ] }, { @@ -26,7 +26,6 @@ "# Create authentication object for user permissions\n", "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "\n", "auth = TokenAuthentication(\n", " token = \"XXXXX\",\n", " server = \"XXXXX\",\n", @@ -42,6 +41,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(\n", " name='jobtest',\n", " namespace='default', # Update to your namespace\n", diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 169bb8b97..27ce71f28 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -58,7 +58,7 @@ "outputs": [], "source": [ "# Create and configure our cluster object\n", - "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\"\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "namespace = \"default\" # Update to your namespace\n", "cluster_name = \"interactivetest\"\n", "cluster = Cluster(ClusterConfiguration(\n", @@ -136,7 +136,7 @@ "outputs": [], "source": [ "from codeflare_sdk import generate_cert\n", - "# Export the environment variables to enable TLS\n", + "# Create required TLS cert and export the environment variables to enable TLS\n", "generate_cert.generate_tls_cert(cluster_name, namespace)\n", "generate_cert.export_env(cluster_name, namespace)" ] From 75993efc917fffc8f240bf4a82638ede817061e9 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 14:33:57 +0100 Subject: [PATCH 07/12] Added Tech Preview comment to MCAD references --- demo-notebooks/guided-demos/0_basic_ray.ipynb | 2 +- demo-notebooks/guided-demos/3_basic_interactive.ipynb | 2 +- .../guided-demos/notebook-ex-outputs/0_basic_ray.ipynb | 2 +- .../guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb | 2 +- demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb | 2 +- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index 47884f0f6..d5b7000aa 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -50,7 +50,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 872298309..6afde0bc8 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -47,7 +47,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index afadb55db..7d2dfac12 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -50,7 +50,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 1d3afdc65..78ace55e5 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -47,7 +47,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index bf2bde96b..f963fa5f0 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -50,7 +50,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 27ce71f28..06062e145 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -47,7 +47,7 @@ "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", "The example here is a community image.\n", "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" ] }, { From 46df14c2a867b725c5fddbd3d1b84db8999244e1 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 15:23:51 +0100 Subject: [PATCH 08/12] Ommited MCAD references from notebooks except InstaScale NB --- demo-notebooks/additional-demos/hf_interactive.ipynb | 7 +++---- demo-notebooks/additional-demos/local_interactive.ipynb | 1 + demo-notebooks/additional-demos/ray_job_client.ipynb | 3 ++- demo-notebooks/guided-demos/0_basic_ray.ipynb | 5 ++--- demo-notebooks/guided-demos/1_basic_instascale.ipynb | 1 + demo-notebooks/guided-demos/2_cluster_job_client.ipynb | 3 ++- demo-notebooks/guided-demos/3_basic_interactive.ipynb | 5 ++--- .../guided-demos/notebook-ex-outputs/0_basic_ray.ipynb | 7 +++---- .../notebook-ex-outputs/1_basic_instascale.ipynb | 1 + .../notebook-ex-outputs/2_cluster_job_client.ipynb | 3 ++- .../notebook-ex-outputs/3_basic_interactive.ipynb | 5 ++--- demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb | 5 ++--- .../guided-demos/preview_nbs/1_basic_instascale.ipynb | 1 + .../guided-demos/preview_nbs/2_cluster_job_client.ipynb | 3 ++- .../guided-demos/preview_nbs/3_basic_interactive.ipynb | 5 ++--- 15 files changed, 28 insertions(+), 27 deletions(-) diff --git a/demo-notebooks/additional-demos/hf_interactive.ipynb b/demo-notebooks/additional-demos/hf_interactive.ipynb index 72b3dba27..9181f3aac 100644 --- a/demo-notebooks/additional-demos/hf_interactive.ipynb +++ b/demo-notebooks/additional-demos/hf_interactive.ipynb @@ -71,9 +71,7 @@ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding Ray Cluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -94,7 +92,7 @@ "# Create our cluster and submit\n", "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", - " namespace=\"default\",\n", + " namespace=\"default\", # Update to your namespace\n", " num_workers=1,\n", " min_cpus=8, \n", " max_cpus=8, \n", @@ -102,6 +100,7 @@ " max_memory=16, \n", " num_gpus=4,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", " ))" ] diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index f8078e240..9af9bc333 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -62,6 +62,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " instascale=False,\n", " machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" ] diff --git a/demo-notebooks/additional-demos/ray_job_client.ipynb b/demo-notebooks/additional-demos/ray_job_client.ipynb index cd81423cc..e3d90cd39 100644 --- a/demo-notebooks/additional-demos/ray_job_client.ipynb +++ b/demo-notebooks/additional-demos/ray_job_client.ipynb @@ -52,7 +52,8 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", "))" ] }, diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index d5b7000aa..bd2e66f0b 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -48,9 +48,7 @@ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -72,6 +70,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb index 00963886e..072392c74 100644 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/1_basic_instascale.ipynb @@ -71,6 +71,7 @@ " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", " mcad=True, # Enable MCAD\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb index 65447ac34..c8f1157a9 100644 --- a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/2_cluster_job_client.ipynb @@ -51,7 +51,8 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", "))" ] }, diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/3_basic_interactive.ipynb index 6afde0bc8..943425a89 100644 --- a/demo-notebooks/guided-demos/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/3_basic_interactive.ipynb @@ -45,9 +45,7 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -71,6 +69,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb index 7d2dfac12..6771e9d7a 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/0_basic_ray.ipynb @@ -48,9 +48,7 @@ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -80,6 +78,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] @@ -89,7 +88,7 @@ "id": "12eef53c", "metadata": {}, "source": [ - "Next, we want to bring our cluster up, so we call the `up()` function below to submit our cluster AppWrapper yaml onto the MCAD queue, and begin the process of obtaining our resource cluster." + "Next, we want to bring our cluster up, so we call the `up()` function below to submit our Ray Cluster onto the queue, and begin the process of obtaining our resource cluster." ] }, { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb index ab10a5586..003cdb5f2 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb @@ -79,6 +79,7 @@ " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", " mcad=True, # Enable MCAD\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb index 65447ac34..c8f1157a9 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb @@ -51,7 +51,8 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", "))" ] }, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb index 78ace55e5..62d34f3f8 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb @@ -45,9 +45,7 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -79,6 +77,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] diff --git a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb index f963fa5f0..6a3b37108 100644 --- a/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/0_basic_ray.ipynb @@ -48,9 +48,7 @@ "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -72,6 +70,7 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb index 00963886e..072392c74 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb @@ -71,6 +71,7 @@ " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", " mcad=True, # Enable MCAD\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", "))" diff --git a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb index 65447ac34..c8f1157a9 100644 --- a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb @@ -51,7 +51,8 @@ " min_memory=4,\n", " max_memory=4,\n", " num_gpus=0,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\"\n", + " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", "))" ] }, diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb index 06062e145..28e05a26a 100644 --- a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb @@ -45,9 +45,7 @@ "Once again, let's start by running through the same cluster setup as before:\n", "\n", "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: By default the SDK uses Kueue as it's scheduling solution to use Tech Preview component MCAD set the `mcad=True` option in `ClusterConfiguration`" + "The example here is a community image." ] }, { @@ -71,6 +69,7 @@ " max_memory=8,\n", " num_gpus=1,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] From eef80d1f125423923a6325fa368f63fad4e109d6 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 24 Apr 2024 15:51:37 +0100 Subject: [PATCH 09/12] Removed InstaScale NB --- .../guided-demos/1_basic_instascale.ipynb | 183 ------------- ...lient.ipynb => 1_cluster_job_client.ipynb} | 0 ...active.ipynb => 2_basic_interactive.ipynb} | 0 .../1_basic_instascale.ipynb | 258 ------------------ ...lient.ipynb => 1_cluster_job_client.ipynb} | 0 ...active.ipynb => 2_basic_interactive.ipynb} | 0 .../preview_nbs/1_basic_instascale.ipynb | 183 ------------- ...lient.ipynb => 1_cluster_job_client.ipynb} | 0 ...active.ipynb => 2_basic_interactive.ipynb} | 0 9 files changed, 624 deletions(-) delete mode 100644 demo-notebooks/guided-demos/1_basic_instascale.ipynb rename demo-notebooks/guided-demos/{2_cluster_job_client.ipynb => 1_cluster_job_client.ipynb} (100%) rename demo-notebooks/guided-demos/{3_basic_interactive.ipynb => 2_basic_interactive.ipynb} (100%) delete mode 100644 demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb rename demo-notebooks/guided-demos/notebook-ex-outputs/{2_cluster_job_client.ipynb => 1_cluster_job_client.ipynb} (100%) rename demo-notebooks/guided-demos/notebook-ex-outputs/{3_basic_interactive.ipynb => 2_basic_interactive.ipynb} (100%) delete mode 100644 demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb rename demo-notebooks/guided-demos/preview_nbs/{2_cluster_job_client.ipynb => 1_cluster_job_client.ipynb} (100%) rename demo-notebooks/guided-demos/preview_nbs/{3_basic_interactive.ipynb => 2_basic_interactive.ipynb} (100%) diff --git a/demo-notebooks/guided-demos/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/1_basic_instascale.ipynb deleted file mode 100644 index 072392c74..000000000 --- a/demo-notebooks/guided-demos/1_basic_instascale.ipynb +++ /dev/null @@ -1,183 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9865ee8c", - "metadata": {}, - "source": [ - "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", - "\n", - "NOTE: The InstaScale and MCAD components are in Tech Preview" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", - "metadata": {}, - "outputs": [], - "source": [ - "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "614daa0c", - "metadata": {}, - "outputs": [], - "source": [ - "# Create authentication object for user permissions\n", - "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", - "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" - ] - }, - { - "cell_type": "markdown", - "id": "bc27f84c", - "metadata": {}, - "source": [ - "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", - "\n", - "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f4bc870-091f-4e11-9642-cba145710159", - "metadata": {}, - "outputs": [], - "source": [ - "# Create and configure our cluster object (and appwrapper)\n", - "cluster = Cluster(ClusterConfiguration(\n", - " name='instascaletest',\n", - " namespace='default', # Update to your namespace\n", - " num_workers=2,\n", - " min_cpus=2,\n", - " max_cpus=2,\n", - " min_memory=8,\n", - " max_memory=8,\n", - " num_gpus=1,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " mcad=True, # Enable MCAD\n", - " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", - " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", - "))" - ] - }, - { - "cell_type": "markdown", - "id": "12eef53c", - "metadata": {}, - "source": [ - "Same as last time, we will bring the cluster up, wait for it to be ready, and confirm that the specs are as-requested:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", - "metadata": {}, - "outputs": [], - "source": [ - "# Bring up the cluster\n", - "cluster.up()\n", - "cluster.wait_ready()" - ] - }, - { - "cell_type": "markdown", - "id": "6abfe904", - "metadata": {}, - "source": [ - "While the resources are being scaled, we can also go into the console and take a look at the InstaScale logs, as well as the new machines/nodes spinning up.\n", - "\n", - "Once the cluster is ready, we can confirm the specs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", - "metadata": {}, - "outputs": [], - "source": [ - "cluster.details()" - ] - }, - { - "cell_type": "markdown", - "id": "5af8cd32", - "metadata": {}, - "source": [ - "Finally, we bring our resource cluster down and release/terminate the associated resources, bringing everything back to the way it was before our cluster was brought up." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", - "metadata": {}, - "outputs": [], - "source": [ - "cluster.down()" - ] - }, - { - "cell_type": "markdown", - "id": "c883caea", - "metadata": {}, - "source": [ - "Once again, we can look at the machines/nodes and see that everything has been successfully scaled down!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d41b90e", - "metadata": {}, - "outputs": [], - "source": [ - "auth.logout()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo-notebooks/guided-demos/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb similarity index 100% rename from demo-notebooks/guided-demos/2_cluster_job_client.ipynb rename to demo-notebooks/guided-demos/1_cluster_job_client.ipynb diff --git a/demo-notebooks/guided-demos/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/2_basic_interactive.ipynb similarity index 100% rename from demo-notebooks/guided-demos/3_basic_interactive.ipynb rename to demo-notebooks/guided-demos/2_basic_interactive.ipynb diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb deleted file mode 100644 index 003cdb5f2..000000000 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_basic_instascale.ipynb +++ /dev/null @@ -1,258 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9865ee8c", - "metadata": {}, - "source": [ - "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", - "\n", - "NOTE: The InstaScale and MCAD components are in Tech Preview" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", - "metadata": {}, - "outputs": [], - "source": [ - "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "614daa0c", - "metadata": {}, - "outputs": [], - "source": [ - "# Create authentication object for user permissions\n", - "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", - "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" - ] - }, - { - "cell_type": "markdown", - "id": "bc27f84c", - "metadata": {}, - "source": [ - "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", - "\n", - "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "0f4bc870-091f-4e11-9642-cba145710159", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Written to: instascaletest.yaml\n" - ] - } - ], - "source": [ - "# Create and configure our cluster object (and appwrapper)\n", - "cluster = Cluster(ClusterConfiguration(\n", - " name='instascaletest',\n", - " namespace='default', # Update to your namespace\n", - " num_workers=2,\n", - " min_cpus=2,\n", - " max_cpus=2,\n", - " min_memory=8,\n", - " max_memory=8,\n", - " num_gpus=1,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " mcad=True, # Enable MCAD\n", - " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", - " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", - "))" - ] - }, - { - "cell_type": "markdown", - "id": "12eef53c", - "metadata": {}, - "source": [ - "Same as last time, we will bring the cluster up, wait for it to be ready, and confirm that the specs are as-requested:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for requested resources to be set up...\n", - "Requested cluster up and running!\n" - ] - } - ], - "source": [ - "# Bring up the cluster\n", - "cluster.up()\n", - "cluster.wait_ready()" - ] - }, - { - "cell_type": "markdown", - "id": "6abfe904", - "metadata": {}, - "source": [ - "While the resources are being scaled, we can also go into the console and take a look at the InstaScale logs, as well as the new machines/nodes spinning up.\n", - "\n", - "Once the cluster is ready, we can confirm the specs:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
                     ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€                     \n",
-       "                                                                         \n",
-       " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n",
-       " โ”‚   Name                                                              โ”‚ \n",
-       " โ”‚   instascaletest                                        Active โœ…   โ”‚ \n",
-       " โ”‚                                                                     โ”‚ \n",
-       " โ”‚   URI: ray://instascaletest-head-svc.default.svc:10001              โ”‚ \n",
-       " โ”‚                                                                     โ”‚ \n",
-       " โ”‚   Dashboard๐Ÿ”—                                                       โ”‚ \n",
-       " โ”‚                                                                     โ”‚ \n",
-       " โ”‚                       Cluster Resources                             โ”‚ \n",
-       " โ”‚   โ•ญโ”€โ”€ Workers โ”€โ”€โ•ฎ  โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ         โ”‚ \n",
-       " โ”‚   โ”‚  # Workers  โ”‚  โ”‚  Memory      CPU         GPU         โ”‚         โ”‚ \n",
-       " โ”‚   โ”‚             โ”‚  โ”‚                                      โ”‚         โ”‚ \n",
-       " โ”‚   โ”‚  2          โ”‚  โ”‚  8~8         2           1           โ”‚         โ”‚ \n",
-       " โ”‚   โ”‚             โ”‚  โ”‚                                      โ”‚         โ”‚ \n",
-       " โ”‚   โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ  โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ         โ”‚ \n",
-       " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[3m \u001b[0m\u001b[1;3m ๐Ÿš€ CodeFlare Cluster Details ๐Ÿš€\u001b[0m\u001b[3m \u001b[0m\n", - "\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\n", - " โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ \n", - " โ”‚ \u001b[1;37;42mName\u001b[0m โ”‚ \n", - " โ”‚ \u001b[1;4minstascaletest\u001b[0m Active โœ… โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b[1mURI:\u001b[0m ray://instascaletest-head-svc.default.svc:10001 โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b]8;id=65933;http://ray-dashboard-instascaletest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org\u001b\\\u001b[4;34mDashboard๐Ÿ”—\u001b[0m\u001b]8;;\u001b\\ โ”‚ \n", - " โ”‚ โ”‚ \n", - " โ”‚ \u001b[3m Cluster Resources \u001b[0m โ”‚ \n", - " โ”‚ โ•ญโ”€โ”€ Workers โ”€โ”€โ•ฎ โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Worker specs(each) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ โ”‚ \n", - " โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1m# Workers\u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \u001b[1m \u001b[0m\u001b[1mMemory \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mCPU \u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m\u001b[1mGPU \u001b[0m\u001b[1m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m8~8 \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m2 \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m1 \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ”‚ \u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[36m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m\u001b[35m \u001b[0m โ”‚ โ”‚ \n", - " โ”‚ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ \n", - " โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "RayCluster(name='instascaletest', status=, workers=2, worker_mem_min=8, worker_mem_max=8, worker_cpu=2, worker_gpu=1, namespace='default', dashboard='http://ray-dashboard-instascaletest-default.apps.meyceoz-07122023.psap.aws.rhperfscale.org')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cluster.details()" - ] - }, - { - "cell_type": "markdown", - "id": "5af8cd32", - "metadata": {}, - "source": [ - "Finally, we bring our resource cluster down and release/terminate the associated resources, bringing everything back to the way it was before our cluster was brought up." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", - "metadata": {}, - "outputs": [], - "source": [ - "cluster.down()" - ] - }, - { - "cell_type": "markdown", - "id": "c883caea", - "metadata": {}, - "source": [ - "Once again, we can look at the machines/nodes and see that everything has been successfully scaled down!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d41b90e", - "metadata": {}, - "outputs": [], - "source": [ - "auth.logout()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.17" - }, - "vscode": { - "interpreter": { - "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb similarity index 100% rename from demo-notebooks/guided-demos/notebook-ex-outputs/2_cluster_job_client.ipynb rename to demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb similarity index 100% rename from demo-notebooks/guided-demos/notebook-ex-outputs/3_basic_interactive.ipynb rename to demo-notebooks/guided-demos/notebook-ex-outputs/2_basic_interactive.ipynb diff --git a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb deleted file mode 100644 index 072392c74..000000000 --- a/demo-notebooks/guided-demos/preview_nbs/1_basic_instascale.ipynb +++ /dev/null @@ -1,183 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9865ee8c", - "metadata": {}, - "source": [ - "In this notebook, we will go over the basics of using InstaScale to scale up/down necessary resources that are not currently available on your OpenShift Cluster (in cloud environments).\n", - "\n", - "NOTE: The InstaScale and MCAD components are in Tech Preview" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", - "metadata": {}, - "outputs": [], - "source": [ - "# Import pieces from codeflare-sdk\n", - "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "614daa0c", - "metadata": {}, - "outputs": [], - "source": [ - "# Create authentication object for user permissions\n", - "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", - "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", - "auth = TokenAuthentication(\n", - " token = \"XXXXX\",\n", - " server = \"XXXXX\",\n", - " skip_tls=False\n", - ")\n", - "auth.login()" - ] - }, - { - "cell_type": "markdown", - "id": "bc27f84c", - "metadata": {}, - "source": [ - "This time, we are working in a cloud environment, and our OpenShift cluster does not have the resources needed for our desired workloads. We will use InstaScale to dynamically scale-up guaranteed resources based on our request (that will also automatically scale-down when we are finished working):\n", - "\n", - "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", - "The example here is a community image.\n", - "\n", - "NOTE: This specific demo requires MCAD and InstaScale to be enabled on the Cluster" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f4bc870-091f-4e11-9642-cba145710159", - "metadata": {}, - "outputs": [], - "source": [ - "# Create and configure our cluster object (and appwrapper)\n", - "cluster = Cluster(ClusterConfiguration(\n", - " name='instascaletest',\n", - " namespace='default', # Update to your namespace\n", - " num_workers=2,\n", - " min_cpus=2,\n", - " max_cpus=2,\n", - " min_memory=8,\n", - " max_memory=8,\n", - " num_gpus=1,\n", - " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " mcad=True, # Enable MCAD\n", - " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", - " instascale=True, # InstaScale now enabled, will scale OCP cluster to guarantee resource request\n", - " machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Head, worker AWS machine types desired\n", - "))" - ] - }, - { - "cell_type": "markdown", - "id": "12eef53c", - "metadata": {}, - "source": [ - "Same as last time, we will bring the cluster up, wait for it to be ready, and confirm that the specs are as-requested:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", - "metadata": {}, - "outputs": [], - "source": [ - "# Bring up the cluster\n", - "cluster.up()\n", - "cluster.wait_ready()" - ] - }, - { - "cell_type": "markdown", - "id": "6abfe904", - "metadata": {}, - "source": [ - "While the resources are being scaled, we can also go into the console and take a look at the InstaScale logs, as well as the new machines/nodes spinning up.\n", - "\n", - "Once the cluster is ready, we can confirm the specs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7fd45bc5-03c0-4ae5-9ec5-dd1c30f1a084", - "metadata": {}, - "outputs": [], - "source": [ - "cluster.details()" - ] - }, - { - "cell_type": "markdown", - "id": "5af8cd32", - "metadata": {}, - "source": [ - "Finally, we bring our resource cluster down and release/terminate the associated resources, bringing everything back to the way it was before our cluster was brought up." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f36db0f-31f6-4373-9503-dc3c1c4c3f57", - "metadata": {}, - "outputs": [], - "source": [ - "cluster.down()" - ] - }, - { - "cell_type": "markdown", - "id": "c883caea", - "metadata": {}, - "source": [ - "Once again, we can look at the machines/nodes and see that everything has been successfully scaled down!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0d41b90e", - "metadata": {}, - "outputs": [], - "source": [ - "auth.logout()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "f9f85f796d01129d0dd105a088854619f454435301f6ffec2fea96ecbd9be4ac" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb similarity index 100% rename from demo-notebooks/guided-demos/preview_nbs/2_cluster_job_client.ipynb rename to demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb diff --git a/demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb b/demo-notebooks/guided-demos/preview_nbs/2_basic_interactive.ipynb similarity index 100% rename from demo-notebooks/guided-demos/preview_nbs/3_basic_interactive.ipynb rename to demo-notebooks/guided-demos/preview_nbs/2_basic_interactive.ipynb From a21c63e5325340923c8a8a2361eb99a9a27b7010 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 25 Apr 2024 09:28:05 +0100 Subject: [PATCH 10/12] Removed mention of InstaScale in NB --- .../additional-demos/local_interactive.ipynb | 3 +- .../notebook-ex-outputs/instascaletest.yaml | 185 ------------------ 2 files changed, 1 insertion(+), 187 deletions(-) delete mode 100644 demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index 9af9bc333..f0243d6e0 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -63,8 +63,7 @@ " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", - " instascale=False,\n", - " machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))" + " ))" ] }, { diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml b/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml deleted file mode 100644 index 8cb96a794..000000000 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/instascaletest.yaml +++ /dev/null @@ -1,185 +0,0 @@ -apiVersion: workload.codeflare.dev/v1beta1 -kind: AppWrapper -metadata: - labels: - orderedinstance: m5.xlarge_g4dn.xlarge - name: instascaletest - namespace: default -spec: - priority: 9 - resources: - GenericItems: - - custompodresources: - - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - replicas: 1 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 1 - replicas: 2 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 1 - generictemplate: - apiVersion: ray.io/v1 - kind: RayCluster - metadata: - labels: - appwrapper.mcad.ibm.com: instascaletest - controller-tools.k8s.io: '1.0' - name: instascaletest - namespace: default - spec: - autoscalerOptions: - idleTimeoutSeconds: 60 - imagePullPolicy: Always - resources: - limits: - cpu: 500m - memory: 512Mi - requests: - cpu: 500m - memory: 512Mi - upscalingMode: Default - enableInTreeAutoscaling: false - headGroupSpec: - rayStartParams: - block: 'true' - dashboard-host: 0.0.0.0 - num-gpus: '0' - serviceType: ClusterIP - template: - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: instascaletest - operator: In - values: - - instascaletest - containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - ray stop - name: ray-head - ports: - - containerPort: 6379 - name: gcs - - containerPort: 8265 - name: dashboard - - containerPort: 10001 - name: client - resources: - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 0 - imagePullSecrets: [] - rayVersion: 2.1.0 - workerGroupSpecs: - - groupName: small-group-instascaletest - maxReplicas: 2 - minReplicas: 2 - rayStartParams: - block: 'true' - num-gpus: '1' - replicas: 2 - template: - metadata: - annotations: - key: value - labels: - key: value - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: instascaletest - operator: In - values: - - instascaletest - containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 - lifecycle: - preStop: - exec: - command: - - /bin/sh - - -c - - ray stop - name: machine-learning - resources: - limits: - cpu: 2 - memory: 8G - nvidia.com/gpu: 1 - requests: - cpu: 2 - memory: 8G - nvidia.com/gpu: 1 - imagePullSecrets: [] - replicas: 1 - - generictemplate: - apiVersion: route.openshift.io/v1 - kind: Route - metadata: - labels: - odh-ray-cluster-service: instascaletest-head-svc - name: ray-dashboard-instascaletest - namespace: default - spec: - port: - targetPort: dashboard - to: - kind: Service - name: instascaletest-head-svc - replicas: 1 - Items: [] From a9e4ebd611f52eae89bb19883737e734cbfb4e6e Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 25 Apr 2024 09:49:48 +0100 Subject: [PATCH 11/12] Added local queue comment --- demo-notebooks/additional-demos/local_interactive.ipynb | 1 + demo-notebooks/guided-demos/1_cluster_job_client.ipynb | 3 ++- .../notebook-ex-outputs/1_cluster_job_client.ipynb | 3 ++- .../guided-demos/preview_nbs/1_cluster_job_client.ipynb | 3 ++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/demo-notebooks/additional-demos/local_interactive.ipynb b/demo-notebooks/additional-demos/local_interactive.ipynb index f0243d6e0..36adfb500 100644 --- a/demo-notebooks/additional-demos/local_interactive.ipynb +++ b/demo-notebooks/additional-demos/local_interactive.ipynb @@ -63,6 +63,7 @@ " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", " ))" ] }, diff --git a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb index c8f1157a9..e46dc62a9 100644 --- a/demo-notebooks/guided-demos/1_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/1_cluster_job_client.ipynb @@ -52,7 +52,8 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, diff --git a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb index c8f1157a9..e46dc62a9 100644 --- a/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/notebook-ex-outputs/1_cluster_job_client.ipynb @@ -52,7 +52,8 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, diff --git a/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb b/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb index c8f1157a9..b20f920bd 100644 --- a/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb +++ b/demo-notebooks/guided-demos/preview_nbs/1_cluster_job_client.ipynb @@ -52,7 +52,8 @@ " max_memory=4,\n", " num_gpus=0,\n", " image=\"quay.io/project-codeflare/ray:latest-py39-cu118\",\n", - " write_to_file=False # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources\n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", "))" ] }, From 67e8d78d2c39687e4f11662463698d91f0254c12 Mon Sep 17 00:00:00 2001 From: Mark Campbell Date: Thu, 25 Apr 2024 09:56:12 +0100 Subject: [PATCH 12/12] Update demo-notebooks/guided-demos/0_basic_ray.ipynb Co-authored-by: Fiona Waters --- demo-notebooks/guided-demos/0_basic_ray.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo-notebooks/guided-demos/0_basic_ray.ipynb b/demo-notebooks/guided-demos/0_basic_ray.ipynb index bd2e66f0b..6a3b37108 100644 --- a/demo-notebooks/guided-demos/0_basic_ray.ipynb +++ b/demo-notebooks/guided-demos/0_basic_ray.ipynb @@ -5,7 +5,7 @@ "id": "8d4a42f6", "metadata": {}, "source": [ - "In first notebook, we will go through the basics of using the SDK to:\n", + "In this notebook, we will go through the basics of using the SDK to:\n", " - Spin up a Ray cluster with our desired resources\n", " - View the status and specs of our Ray cluster\n", " - Take down the Ray cluster when finished"