Skip to content

Added support for ingress over routes on cluster creation #251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

106 changes: 57 additions & 49 deletions src/codeflare_sdk/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@

from .auth import config_check, api_config_handler
from ..utils import pretty_print
from ..utils.generate_yaml import generate_appwrapper
from ..utils.generate_yaml import (
generate_appwrapper,
)
from ..utils.kube_api_helpers import _kube_api_error_handling
from ..utils.openshift_oauth import (
create_openshift_oauth_objects,
Expand Down Expand Up @@ -175,6 +177,8 @@ def create_app_wrapper(self):
local_interactive = self.config.local_interactive
image_pull_secrets = self.config.image_pull_secrets
dispatch_priority = self.config.dispatch_priority
ingress_domain = self.config.ingress_domain
ingress_options = self.config.ingress_options
return generate_appwrapper(
name=name,
namespace=namespace,
Expand All @@ -198,6 +202,8 @@ def create_app_wrapper(self):
dispatch_priority=dispatch_priority,
priority_val=priority_val,
openshift_oauth=self.config.openshift_oauth,
ingress_domain=ingress_domain,
ingress_options=ingress_options,
)

# creates a new cluster with the provided or default spec
Expand Down Expand Up @@ -336,7 +342,7 @@ def is_dashboard_ready(self) -> bool:
timeout=5,
verify=self._client_verify_tls,
)
except requests.exceptions.SSLError:
except requests.exceptions.SSLError: # pragma no cover
# SSL exception occurs when oauth ingress has been created but cluster is not up
return False
if response.status_code == 200:
Expand Down Expand Up @@ -399,27 +405,24 @@ def cluster_dashboard_uri(self) -> str:
"""
try:
config_check()
api_instance = client.CustomObjectsApi(api_config_handler())
routes = api_instance.list_namespaced_custom_object(
group="route.openshift.io",
version="v1",
namespace=self.config.namespace,
plural="routes",
)
except Exception as e: # pragma: no cover
api_instance = client.NetworkingV1Api(api_config_handler())
ingresses = api_instance.list_namespaced_ingress(self.config.namespace)
except Exception as e: # pragma no cover
return _kube_api_error_handling(e)

for route in routes["items"]:
if route["metadata"][
"name"
] == f"ray-dashboard-{self.config.name}" or route["metadata"][
"name"
].startswith(
f"{self.config.name}-ingress"
for ingress in ingresses.items:
annotations = ingress.metadata.annotations
protocol = "http"
if (
ingress.metadata.name == f"ray-dashboard-{self.config.name}"
or ingress.metadata.name.startswith(f"{self.config.name}-ingress")
):
protocol = "https" if route["spec"].get("tls") else "http"
return f"{protocol}://{route['spec']['host']}"
return "Dashboard route not available yet, have you run cluster.up()?"
if annotations == None:
protocol = "http"
elif "route.openshift.io/termination" in annotations:
protocol = "https"
return f"{protocol}://{ingress.spec.rules[0].host}"
return "Dashboard ingress not available yet, have you run cluster.up()?"

def list_jobs(self) -> List:
"""
Expand Down Expand Up @@ -498,8 +501,8 @@ def from_k8_cluster_object(rc, mcad=True):

def local_client_url(self):
if self.config.local_interactive == True:
ingress_domain = _get_ingress_domain()
return f"ray://rayclient-{self.config.name}-{self.config.namespace}.{ingress_domain}"
ingress_domain = _get_ingress_domain(self)
return f"ray://{ingress_domain}"
else:
return "None"

Expand Down Expand Up @@ -655,16 +658,23 @@ def _check_aw_exists(name: str, namespace: str) -> bool:
return False


def _get_ingress_domain():
# Cant test this until get_current_namespace is fixed
def _get_ingress_domain(self): # pragma: no cover
try:
config_check()
api_client = client.CustomObjectsApi(api_config_handler())
ingress = api_client.get_cluster_custom_object(
"config.openshift.io", "v1", "ingresses", "cluster"
)
api_client = client.NetworkingV1Api(api_config_handler())
if self.config.namespace != None:
namespace = self.config.namespace
else:
namespace = get_current_namespace()
ingresses = api_client.list_namespaced_ingress(namespace)
except Exception as e: # pragma: no cover
return _kube_api_error_handling(e)
return ingress["spec"]["domain"]
domain = None
for ingress in ingresses.items:
if ingress.spec.rules[0].http.paths[0].backend.service.port.number == 10001:
domain = ingress.spec.rules[0].host
return domain


def _app_wrapper_status(name, namespace="default") -> Optional[AppWrapper]:
Expand Down Expand Up @@ -756,27 +766,25 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
status = RayClusterStatus(rc["status"]["state"].lower())
else:
status = RayClusterStatus.UNKNOWN

config_check()
api_instance = client.CustomObjectsApi(api_config_handler())
# UPDATE THIS
routes = api_instance.list_namespaced_custom_object(
group="route.openshift.io",
version="v1",
namespace=rc["metadata"]["namespace"],
plural="routes",
)
ray_route = None
for route in routes["items"]:
if route["metadata"][
"name"
] == f"ray-dashboard-{rc['metadata']['name']}" or route["metadata"][
"name"
].startswith(
f"{rc['metadata']['name']}-ingress"
try:
config_check()
api_instance = client.NetworkingV1Api(api_config_handler())
ingresses = api_instance.list_namespaced_ingress(rc["metadata"]["namespace"])
except Exception as e: # pragma no cover
return _kube_api_error_handling(e)
ray_ingress = None
for ingress in ingresses.items:
annotations = ingress.metadata.annotations
protocol = "http"
if (
ingress.metadata.name == f"ray-dashboard-{rc['metadata']['name']}"
or ingress.metadata.name.startswith(f"{rc['metadata']['name']}-ingress")
):
protocol = "https" if route["spec"].get("tls") else "http"
ray_route = f"{protocol}://{route['spec']['host']}"
if annotations == None:
protocol = "http"
elif "route.openshift.io/termination" in annotations:
protocol = "https"
ray_ingress = f"{protocol}://{ingress.spec.rules[0].host}"

return RayCluster(
name=rc["metadata"]["name"],
Expand All @@ -794,7 +802,6 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
]["resources"]["limits"]["cpu"],
worker_gpu=0, # hard to detect currently how many gpus, can override it with what the user asked for
namespace=rc["metadata"]["namespace"],
dashboard=ray_route,
head_cpus=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][
"resources"
]["limits"]["cpu"],
Expand All @@ -804,6 +811,7 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
head_gpu=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][
"resources"
]["limits"]["nvidia.com/gpu"],
dashboard=ray_ingress,
)


Expand Down
2 changes: 2 additions & 0 deletions src/codeflare_sdk/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,5 @@ class ClusterConfiguration:
image_pull_secrets: list = field(default_factory=list)
dispatch_priority: str = None
openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator
ingress_options: dict = field(default_factory=dict)
ingress_domain: str = None
54 changes: 33 additions & 21 deletions src/codeflare_sdk/templates/base-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,38 +289,50 @@ spec:
emptyDir: {}
- replicas: 1
generictemplate:
kind: Route
apiVersion: route.openshift.io/v1
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ray-dashboard-deployment-name
name: ray-dashboard-raytest
namespace: default
labels:
# allows me to return name of service that Ray operator creates
odh-ray-cluster-service: deployment-name-head-svc
annotations:
annotations-example:annotations-example
spec:
to:
kind: Service
name: deployment-name-head-svc
port:
targetPort: dashboard
ingressClassName: nginx
rules:
- http:
paths:
- backend:
service:
name: raytest-head-svc
port:
number: 8265
pathType: Prefix
path: /
host: ray-dashboard-raytest.<ingress-domain>
- replicas: 1
generictemplate:
apiVersion: route.openshift.io/v1
kind: Route
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: rayclient-deployment-name
namespace: default
annotations:
annotations-example:annotations-example
labels:
# allows me to return name of service that Ray operator creates
odh-ray-cluster-service: deployment-name-head-svc
spec:
port:
targetPort: client
tls:
termination: passthrough
to:
kind: Service
name: deployment-name-head-svc
ingressClassName: nginx
rules:
- http:
paths:
- backend:
service:
name: deployment-name-head-svc
port:
number: 10001
path: ''
pathType: ImplementationSpecific
host: rayclient-raytest.<ingress-domain>
- replicas: 1
generictemplate:
apiVersion: v1
Expand Down
Loading