Skip to content

Cli submit delete raycluster #257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions src/codeflare_sdk/cli/codeflare_cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import click
import sys
import os

from codeflare_sdk.cli.cli_utils import load_auth

cmd_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "commands"))


class CodeflareContext:
def __init__(self, codeflare_path):
self.codeflare_path = codeflare_path
def __init__(self):
self.codeflare_path = _initialize_codeflare_folder()


def _initialize_codeflare_folder():
codeflare_folder = os.path.expanduser("~/.codeflare")
if not os.path.exists(codeflare_folder):
os.makedirs(codeflare_folder)
return codeflare_folder


class CodeflareCLI(click.MultiCommand):
Expand All @@ -31,18 +39,11 @@ def get_command(self, ctx, name):
return


def initialize_cli(ctx):
# Make .codeflare folder
codeflare_folder = os.path.expanduser("~/.codeflare")
if not os.path.exists(codeflare_folder):
os.makedirs(codeflare_folder)
ctx.obj = CodeflareContext(codeflare_folder)


@click.command(cls=CodeflareCLI)
@click.pass_context
def cli(ctx):
initialize_cli(ctx) # Ran on every command
load_auth()
ctx.obj = CodeflareContext() # Ran on every command
pass


Expand Down
23 changes: 23 additions & 0 deletions src/codeflare_sdk/cli/commands/delete.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import click

from codeflare_sdk.cluster.cluster import get_cluster


@click.group()
def cli():
"""
Delete a specified resource from the Kubernetes cluster
"""
pass


@cli.command()
@click.argument("name", type=str)
@click.option("--namespace", type=str, default="default")
def raycluster(name, namespace):
"""
Delete a specified RayCluster from the Kubernetes cluster
"""
cluster = get_cluster(name, namespace)
cluster.down()
click.echo(f"Cluster deleted successfully")
32 changes: 32 additions & 0 deletions src/codeflare_sdk/cli/commands/submit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import click

from codeflare_sdk.cluster.cluster import Cluster


@click.group()
def cli():
"""
Submit a defined resource to the Kubernetes cluster
"""
pass


@cli.command()
@click.argument("name", type=str)
@click.option("--wait", is_flag=True)
def raycluster(name, wait):
"""
Submit a defined RayCluster to the Kubernetes cluster
"""
cluster = Cluster.from_definition_yaml(name + ".yaml")
if not cluster:
click.echo(
"Error submitting RayCluster. Make sure the RayCluster is defined before submitting it"
)
return
if not wait:
cluster.up()
click.echo("Cluster submitted successfully")
return
cluster.up()
cluster.wait_ready()
59 changes: 56 additions & 3 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ def torchx_config(
def from_k8_cluster_object(rc):
machine_types = (
rc["metadata"]["labels"]["orderedinstance"].split("_")
if "orderedinstance" in rc["metadata"]["labels"]
if "labels" in rc["metadata"]
and "orderedinstance" in rc["metadata"]["labels"]
else []
)
local_interactive = (
Expand Down Expand Up @@ -347,6 +348,58 @@ def from_k8_cluster_object(rc):
)
return Cluster(cluster_config)

def from_definition_yaml(yaml_path):
try:
with open(yaml_path) as yaml_file:
rc = yaml.load(yaml_file, Loader=yaml.FullLoader)
machine_types = (
rc["metadata"]["labels"]["orderedinstance"].split("_")
if "labels" in rc["metadata"]
and "orderedinstance" in rc["metadata"]["labels"]
else []
)
worker_group_specs = rc["spec"]["resources"]["GenericItems"][0][
"generictemplate"
]["spec"]["workerGroupSpecs"][0]
local_interactive = (
"volumeMounts"
in worker_group_specs["template"]["spec"]["containers"][0]
)
cluster_config = ClusterConfiguration(
name=rc["metadata"]["name"],
namespace=rc["metadata"]["namespace"],
machine_types=machine_types,
min_worker=worker_group_specs["minReplicas"],
max_worker=worker_group_specs["maxReplicas"],
min_cpus=worker_group_specs["template"]["spec"]["containers"][0][
"resources"
]["requests"]["cpu"],
max_cpus=worker_group_specs["template"]["spec"]["containers"][0][
"resources"
]["limits"]["cpu"],
min_memory=int(
worker_group_specs["template"]["spec"]["containers"][0][
"resources"
]["requests"]["memory"][:-1]
),
max_memory=int(
worker_group_specs["template"]["spec"]["containers"][0][
"resources"
]["limits"]["memory"][:-1]
),
gpu=worker_group_specs["template"]["spec"]["containers"][0][
"resources"
]["requests"]["nvidia.com/gpu"],
instascale=True if machine_types else False,
image=worker_group_specs["template"]["spec"]["containers"][0][
"image"
],
local_interactive=local_interactive,
)
return Cluster(cluster_config)
except IOError:
return None

def local_client_url(self):
if self.config.local_interactive == True:
ingress_domain = _get_ingress_domain()
Expand Down Expand Up @@ -406,8 +459,8 @@ def get_current_namespace(): # pragma: no cover

def get_cluster(cluster_name: str, namespace: str = "default"):
try:
config.load_kube_config()
api_instance = client.CustomObjectsApi()
config_check()
api_instance = client.CustomObjectsApi(api_config_handler())
rcs = api_instance.list_namespaced_custom_object(
group="ray.io",
version="v1alpha1",
Expand Down
6 changes: 3 additions & 3 deletions tests/cli-test-case.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ metadata:
labels:
orderedinstance: cpu.small_gpu.large
name: cli-test-cluster
namespace: ns
namespace: default
spec:
priority: 9
resources:
Expand Down Expand Up @@ -36,7 +36,7 @@ spec:
appwrapper.mcad.ibm.com: cli-test-cluster
controller-tools.k8s.io: '1.0'
name: cli-test-cluster
namespace: ns
namespace: default
spec:
autoscalerOptions:
idleTimeoutSeconds: 60
Expand Down Expand Up @@ -184,7 +184,7 @@ spec:
labels:
odh-ray-cluster-service: cli-test-cluster-head-svc
name: ray-dashboard-cli-test-cluster
namespace: ns
namespace: default
spec:
port:
targetPort: dashboard
Expand Down
46 changes: 42 additions & 4 deletions tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,13 @@ def test_cli_working():
assert result.exit_code == 0


def test_cluster_definition_cli():
def test_cluster_definition_cli(mocker):
mocker.patch.object(client, "ApiClient")
runner = CliRunner()
define_cluster_command = """
define raycluster
--name=cli-test-cluster
--namespace=ns
--namespace=default
--min_worker=1
--max_worker=2
--min_cpus=3
Expand All @@ -105,7 +106,10 @@ def test_cluster_definition_cli():
--image_pull_secrets='["cli-test-pull-secret"]'
"""
result = runner.invoke(cli, define_cluster_command)
assert result.output == "Written to: cli-test-cluster.yaml\n"
assert (
result.output
== "No authentication found, trying default kubeconfig\nWritten to: cli-test-cluster.yaml\n"
)
assert filecmp.cmp(
"cli-test-cluster.yaml", f"{parent}/tests/cli-test-case.yaml", shallow=True
)
Expand All @@ -120,7 +124,10 @@ def test_login_cli(mocker):
--token=testtoken
"""
login_result = runner.invoke(cli, k8s_login_command)
assert login_result.output == "Logged into 'testserver:6443'\n"
assert (
login_result.output
== "No authentication found, trying default kubeconfig\nLogged into 'testserver:6443'\n"
)
try:
auth_file_path = os.path.expanduser("~/.codeflare/auth")
with open(auth_file_path, "rb") as file:
Expand Down Expand Up @@ -170,6 +177,37 @@ def test_load_auth():
assert sdk_auth.api_client is not None


def test_cluster_submission_cli(mocker):
mocker.patch.object(client, "ApiClient")
runner = CliRunner()
submit_cluster_command = """
submit raycluster
cli-test-cluster
"""
result = runner.invoke(cli, submit_cluster_command)

assert result.exit_code == 0
assert "Cluster submitted successfully" in result.output


def test_cluster_deletion_cli(mocker):
mocker.patch.object(client, "ApiClient")
mocker.patch("kubernetes.config.load_kube_config", return_value="ignore")
mocker.patch(
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
side_effect=get_ray_obj,
)
runner = CliRunner()
delete_cluster_command = """
delete raycluster
quicktest
"""
result = runner.invoke(cli, delete_cluster_command)

assert result.exit_code == 0
assert "Cluster deleted successfully" in result.output


# For mocking openshift client results
fake_res = openshift.Result("fake")

Expand Down