diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 115ac25ec..127c6fade 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -33,10 +33,7 @@ ) from ..utils.kube_api_helpers import _kube_api_error_handling from ..utils.generate_yaml import is_openshift_cluster -from ..utils.openshift_oauth import ( - create_openshift_oauth_objects, - delete_openshift_oauth_objects, -) + from .config import ClusterConfiguration from .model import ( AppWrapper, @@ -86,14 +83,16 @@ def _client_headers(self): @property def _client_verify_tls(self): - return not self.config.openshift_oauth + if not is_openshift_cluster or not self.config.verify_tls: + return False + return True @property def job_client(self): k8client = api_config_handler() or client.ApiClient() if self._job_submission_client: return self._job_submission_client - if self.config.openshift_oauth: + if is_openshift_cluster(): print(k8client.configuration.get_api_key_with_prefix("authorization")) self._job_submission_client = JobSubmissionClient( self.cluster_dashboard_uri(), @@ -191,6 +190,7 @@ def create_app_wrapper(self): ingress_domain = self.config.ingress_domain ingress_options = self.config.ingress_options write_to_file = self.config.write_to_file + verify_tls = self.config.verify_tls return generate_appwrapper( name=name, namespace=namespace, @@ -213,10 +213,10 @@ def create_app_wrapper(self): image_pull_secrets=image_pull_secrets, dispatch_priority=dispatch_priority, priority_val=priority_val, - openshift_oauth=self.config.openshift_oauth, ingress_domain=ingress_domain, ingress_options=ingress_options, write_to_file=write_to_file, + verify_tls=verify_tls, ) # creates a new cluster with the provided or default spec @@ -226,10 +226,6 @@ def up(self): the MCAD queue. """ namespace = self.config.namespace - if self.config.openshift_oauth: - create_openshift_oauth_objects( - cluster_name=self.config.name, namespace=namespace - ) try: config_check() @@ -281,11 +277,6 @@ def down(self): except Exception as e: # pragma: no cover return _kube_api_error_handling(e) - if self.config.openshift_oauth: - delete_openshift_oauth_objects( - cluster_name=self.config.name, namespace=namespace - ) - def status( self, print_to_console: bool = True ) -> Tuple[CodeFlareClusterStatus, bool]: @@ -500,10 +491,14 @@ def torchx_config( return to_return def from_k8_cluster_object( - rc, mcad=True, ingress_domain=None, ingress_options={}, write_to_file=False + rc, + mcad=True, + ingress_domain=None, + ingress_options={}, + write_to_file=False, + verify_tls=True, ): config_check() - openshift_oauth = False if ( rc["metadata"]["annotations"]["sdk.codeflare.dev/local_interactive"] == "True" @@ -511,15 +506,6 @@ def from_k8_cluster_object( local_interactive = True else: local_interactive = False - if "codeflare.dev/oauth" in rc["metadata"]["annotations"]: - openshift_oauth = ( - rc["metadata"]["annotations"]["codeflare.dev/oauth"] == "True" - ) - else: - for container in rc["spec"]["headGroupSpec"]["template"]["spec"][ - "containers" - ]: - openshift_oauth = "oauth-proxy" in container["name"] machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -570,7 +556,7 @@ def from_k8_cluster_object( ingress_domain=ingress_domain, ingress_options=ingress_options, write_to_file=write_to_file, - openshift_oauth=openshift_oauth, + verify_tls=verify_tls, ) return Cluster(cluster_config) @@ -655,7 +641,10 @@ def get_current_namespace(): # pragma: no cover def get_cluster( - cluster_name: str, namespace: str = "default", write_to_file: bool = False + cluster_name: str, + namespace: str = "default", + write_to_file: bool = False, + verify_tls: bool = True, ): try: config_check() @@ -729,6 +718,7 @@ def get_cluster( ingress_domain=ingress_domain, ingress_options=ingress_options, write_to_file=write_to_file, + verify_tls=verify_tls, ) raise FileNotFoundError( f"Cluster {cluster_name} is not found in {namespace} namespace" diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index 86d4252e8..195349ce3 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -52,7 +52,13 @@ class ClusterConfiguration: local_interactive: bool = False image_pull_secrets: list = field(default_factory=list) dispatch_priority: str = None - openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator ingress_options: dict = field(default_factory=dict) ingress_domain: str = None write_to_file: bool = False + verify_tls: bool = True + + def __post_init__(self): + if not self.verify_tls: + print( + "Warning: TLS verification has been disabled - Endpoint checks will be bypassed" + ) diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index da65defd7..00790ac21 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -227,6 +227,20 @@ def update_names(yaml, item, appwrapper_name, cluster_name, namespace): lower_meta["labels"]["workload.codeflare.dev/appwrapper"] = appwrapper_name lower_meta["name"] = cluster_name lower_meta["namespace"] = namespace + lower_spec = item.get("generictemplate", {}).get("spec") + if is_openshift_cluster(): + cookie_secret_env_var = { + "name": "COOKIE_SECRET", + "valueFrom": { + "secretKeyRef": { + "key": "cookie_secret", + "name": f"{cluster_name}-oauth-config", + } + }, + } + lower_spec["headGroupSpec"]["template"]["spec"]["containers"][0]["env"].append( + cookie_secret_env_var + ) def update_labels(yaml, instascale, instance_types): @@ -585,9 +599,6 @@ def enable_openshift_oauth(user_yaml, cluster_name, namespace): ) # allows for setting value of Cluster object when initializing object from an existing AppWrapper on cluster user_yaml["metadata"]["annotations"] = user_yaml["metadata"].get("annotations", {}) - user_yaml["metadata"]["annotations"][ - "codeflare-sdk-use-oauth" - ] = "true" # if the user gets an ray_headgroup_pod = user_yaml["spec"]["resources"]["GenericItems"][0][ "generictemplate" ]["spec"]["headGroupSpec"]["template"]["spec"] @@ -620,7 +631,7 @@ def _create_oauth_sidecar_object( "--upstream=http://localhost:8265", f"--tls-cert={tls_mount_location}/tls.crt", f"--tls-key={tls_mount_location}/tls.key", - f"--cookie-secret={b64encode(urandom(64)).decode('utf-8')}", # create random string for encrypting cookie + "--cookie-secret=$(COOKIE_SECRET)", f'--openshift-delegate-urls={{"/":{{"resource":"pods","namespace":"{namespace}","verb":"get"}}}}', ], image="registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", @@ -696,10 +707,10 @@ def generate_appwrapper( image_pull_secrets: list, dispatch_priority: str, priority_val: int, - openshift_oauth: bool, ingress_domain: str, ingress_options: dict, write_to_file: bool, + verify_tls: bool, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -757,7 +768,7 @@ def generate_appwrapper( delete_route_or_ingress(resources["resources"]) - if openshift_oauth: + if is_openshift_cluster(): enable_openshift_oauth(user_yaml, cluster_name, namespace) directory_path = os.path.expanduser("~/.codeflare/appwrapper/") diff --git a/src/codeflare_sdk/utils/openshift_oauth.py b/src/codeflare_sdk/utils/openshift_oauth.py deleted file mode 100644 index facd84117..000000000 --- a/src/codeflare_sdk/utils/openshift_oauth.py +++ /dev/null @@ -1,199 +0,0 @@ -from urllib3.util import parse_url -import yaml - -from ..cluster.auth import config_check, api_config_handler - -from kubernetes import client -from kubernetes import dynamic - - -def _route_api_getter(): - return dynamic.DynamicClient( - api_config_handler() or client.ApiClient() - ).resources.get(api_version="route.openshift.io/v1", kind="Route") - - -def create_openshift_oauth_objects(cluster_name, namespace): - config_check() - oauth_port = 8443 - oauth_sa_name = f"{cluster_name}-oauth-proxy" - tls_secret_name = _gen_tls_secret_name(cluster_name) - service_name = f"{cluster_name}-oauth" - port_name = "oauth-proxy" - - _create_or_replace_oauth_sa(namespace, oauth_sa_name, cluster_name) - _create_or_replace_oauth_service_obj( - cluster_name, namespace, oauth_port, tls_secret_name, service_name, port_name - ) - _create_or_replace_oauth_route_object( - cluster_name, - namespace, - service_name, - port_name, - ) - _create_or_replace_oauth_rb(cluster_name, namespace, oauth_sa_name) - - -def _create_or_replace_oauth_sa(namespace, oauth_sa_name, cluster_name): - oauth_sa = client.V1ServiceAccount( - api_version="v1", - kind="ServiceAccount", - metadata=client.V1ObjectMeta( - name=oauth_sa_name, - namespace=namespace, - annotations={ - "serviceaccounts.openshift.io/oauth-redirectreference.first": '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"' - + "ray-dashboard-" - + cluster_name - + '"}}' - }, - ), - ) - try: - client.CoreV1Api(api_config_handler()).create_namespaced_service_account( - namespace=namespace, body=oauth_sa - ) - except client.ApiException as e: - if e.reason == "Conflict": - client.CoreV1Api(api_config_handler()).replace_namespaced_service_account( - namespace=namespace, - body=oauth_sa, - name=oauth_sa_name, - ) - else: - raise e - - -def _create_or_replace_oauth_rb(cluster_name, namespace, oauth_sa_name): - oauth_crb = client.V1ClusterRoleBinding( - api_version="rbac.authorization.k8s.io/v1", - kind="ClusterRoleBinding", - metadata=client.V1ObjectMeta(name=f"{cluster_name}-rb"), - role_ref=client.V1RoleRef( - api_group="rbac.authorization.k8s.io", - kind="ClusterRole", - name="system:auth-delegator", - ), - subjects=[ - client.V1Subject( - kind="ServiceAccount", name=oauth_sa_name, namespace=namespace - ) - ], - ) - try: - client.RbacAuthorizationV1Api(api_config_handler()).create_cluster_role_binding( - body=oauth_crb - ) - except client.ApiException as e: - if e.reason == "Conflict": - client.RbacAuthorizationV1Api( - api_config_handler() - ).replace_cluster_role_binding(body=oauth_crb, name=f"{cluster_name}-rb") - else: - raise e - - -def _gen_tls_secret_name(cluster_name): - return f"{cluster_name}-proxy-tls-secret" - - -def delete_openshift_oauth_objects(cluster_name, namespace): - # NOTE: it might be worth adding error handling here, but shouldn't be necessary because cluster.down(...) checks - # for an existing cluster before calling this => the objects should never be deleted twice - oauth_sa_name = f"{cluster_name}-oauth-proxy" - service_name = f"{cluster_name}-oauth" - v1_routes = _route_api_getter() - client.CoreV1Api(api_config_handler()).delete_namespaced_service_account( - name=oauth_sa_name, namespace=namespace - ) - client.CoreV1Api(api_config_handler()).delete_namespaced_service( - name=service_name, namespace=namespace - ) - v1_routes.delete(name=f"ray-dashboard-{cluster_name}", namespace=namespace) - client.RbacAuthorizationV1Api(api_config_handler()).delete_cluster_role_binding( - name=f"{cluster_name}-rb" - ) - - -def _create_or_replace_oauth_service_obj( - cluster_name: str, - namespace: str, - oauth_port: int, - tls_secret_name: str, - service_name: str, - port_name: str, -) -> client.V1Service: - oauth_service = client.V1Service( - api_version="v1", - kind="Service", - metadata=client.V1ObjectMeta( - annotations={ - "service.beta.openshift.io/serving-cert-secret-name": tls_secret_name - }, - name=service_name, - namespace=namespace, - ), - spec=client.V1ServiceSpec( - ports=[ - client.V1ServicePort( - name=port_name, - protocol="TCP", - port=443, - target_port=oauth_port, - ) - ], - selector={ - "app.kubernetes.io/created-by": "kuberay-operator", - "app.kubernetes.io/name": "kuberay", - "ray.io/cluster": cluster_name, - "ray.io/identifier": f"{cluster_name}-head", - "ray.io/node-type": "head", - }, - ), - ) - try: - client.CoreV1Api(api_config_handler()).create_namespaced_service( - namespace=namespace, body=oauth_service - ) - except client.ApiException as e: - if e.reason == "Conflict": - client.CoreV1Api(api_config_handler()).replace_namespaced_service( - namespace=namespace, body=oauth_service, name=service_name - ) - else: - raise e - - -def _create_or_replace_oauth_route_object( - cluster_name: str, - namespace: str, - service_name: str, - port_name: str, -): - route = f""" - apiVersion: route.openshift.io/v1 - kind: Route - metadata: - name: ray-dashboard-{cluster_name} - namespace: {namespace} - spec: - port: - targetPort: {port_name} - tls: - termination: reencrypt - to: - kind: Service - name: {service_name} - """ - route_data = yaml.safe_load(route) - v1_routes = _route_api_getter() - try: - existing_route = v1_routes.get( - name=f"ray-dashboard-{cluster_name}", namespace=namespace - ) - route_data["metadata"]["resourceVersion"] = existing_route["metadata"][ - "resourceVersion" - ] - v1_routes.replace(body=route_data) - except dynamic.client.ApiException: - v1_routes.create(body=route_data) diff --git a/tests/e2e/mnist_raycluster_sdk_oauth_test.py b/tests/e2e/mnist_raycluster_sdk_oauth_test.py index 3e24d4653..708a389d5 100644 --- a/tests/e2e/mnist_raycluster_sdk_oauth_test.py +++ b/tests/e2e/mnist_raycluster_sdk_oauth_test.py @@ -51,7 +51,6 @@ def run_mnist_raycluster_sdk_oauth(self): num_gpus=0, instascale=False, image=ray_image, - openshift_oauth=True, write_to_file=True, ) ) diff --git a/tests/unit_test.py b/tests/unit_test.py index 3edadc63c..9a28d1c1a 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -39,7 +39,6 @@ get_cluster, _app_wrapper_status, _ray_cluster_status, - _get_ingress_domain, ) from codeflare_sdk.cluster.auth import ( TokenAuthentication, @@ -47,10 +46,6 @@ KubeConfigFileAuthentication, config_check, ) -from codeflare_sdk.utils.openshift_oauth import ( - create_openshift_oauth_objects, - delete_openshift_oauth_objects, -) from codeflare_sdk.utils.pretty_print import ( print_no_resources_found, print_app_wrappers_status, @@ -91,7 +86,6 @@ read_template, enable_local_interactive, ) -import codeflare_sdk.utils.openshift_oauth as sdk_oauth import openshift from openshift.selector import Selector @@ -113,7 +107,6 @@ def mock_routes_api(mocker): mocker.patch.object( - sdk_oauth, "_route_api_getter", return_value=MagicMock( resources=MagicMock( @@ -588,24 +581,6 @@ def test_rc_status(mocker): assert rc == None -def test_delete_openshift_oauth_objects(mocker): - mocker.patch.object(client.CoreV1Api, "delete_namespaced_service_account") - mocker.patch.object(client.CoreV1Api, "delete_namespaced_service") - mocker.patch.object(client.NetworkingV1Api, "delete_namespaced_ingress") - mocker.patch.object(client.RbacAuthorizationV1Api, "delete_cluster_role_binding") - mock_routes_api(mocker) - delete_openshift_oauth_objects("test-cluster", "test-namespace") - client.CoreV1Api.delete_namespaced_service_account.assert_called_with( - name="test-cluster-oauth-proxy", namespace="test-namespace" - ) - client.CoreV1Api.delete_namespaced_service.assert_called_with( - name="test-cluster-oauth", namespace="test-namespace" - ) - client.RbacAuthorizationV1Api.delete_cluster_role_binding.assert_called_with( - name="test-cluster-rb" - ) - - def test_cluster_uris(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") @@ -3167,90 +3142,15 @@ def test_enable_local_interactive(mocker): } -def test_create_openshift_oauth(mocker: MockerFixture): - create_namespaced_service_account = MagicMock() - create_cluster_role_binding = MagicMock() - create_namespaced_service = MagicMock() - mocker.patch.object( - client.CoreV1Api, - "create_namespaced_service_account", - create_namespaced_service_account, - ) - mocker.patch.object( - client.RbacAuthorizationV1Api, - "create_cluster_role_binding", - create_cluster_role_binding, - ) - mocker.patch.object( - client.CoreV1Api, "create_namespaced_service", create_namespaced_service - ) - mock_routes_api(mocker) - create_openshift_oauth_objects("foo", "bar") - create_ns_sa_args = create_namespaced_service_account.call_args - create_crb_args = create_cluster_role_binding.call_args - create_ns_serv_args = create_namespaced_service.call_args - assert ( - create_ns_sa_args.kwargs["namespace"] == create_ns_serv_args.kwargs["namespace"] - ) - assert isinstance(create_ns_sa_args.kwargs["body"], client.V1ServiceAccount) - assert isinstance(create_crb_args.kwargs["body"], client.V1ClusterRoleBinding) - assert isinstance(create_ns_serv_args.kwargs["body"], client.V1Service) - - -def test_replace_openshift_oauth(mocker: MockerFixture): - # not_found_exception = client.ApiException(reason="Conflict") - create_namespaced_service_account = MagicMock( - side_effect=client.ApiException(reason="Conflict") - ) - create_cluster_role_binding = MagicMock( - side_effect=client.ApiException(reason="Conflict") - ) - create_namespaced_service = MagicMock( - side_effect=client.ApiException(reason="Conflict") - ) - mocker.patch.object( - client.CoreV1Api, - "create_namespaced_service_account", - create_namespaced_service_account, - ) - mocker.patch.object( - client.RbacAuthorizationV1Api, - "create_cluster_role_binding", - create_cluster_role_binding, - ) - mocker.patch.object( - client.CoreV1Api, "create_namespaced_service", create_namespaced_service - ) - mocker.patch.object(dynamic.ResourceList, "get", return_value=True) - replace_namespaced_service_account = MagicMock() - replace_cluster_role_binding = MagicMock() - replace_namespaced_service = MagicMock() - mocker.patch.object( - client.CoreV1Api, - "replace_namespaced_service_account", - replace_namespaced_service_account, - ) - mocker.patch.object( - client.RbacAuthorizationV1Api, - "replace_cluster_role_binding", - replace_cluster_role_binding, - ) - mocker.patch.object( - client.CoreV1Api, "replace_namespaced_service", replace_namespaced_service - ) - mock_routes_api(mocker) - create_openshift_oauth_objects("foo", "bar") - replace_namespaced_service_account.assert_called_once() - replace_cluster_role_binding.assert_called_once() - replace_namespaced_service.assert_called_once() - - def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "codeflare_sdk.cluster.cluster.get_current_namespace", return_value="opendatahub", ) + mocker.patch( + "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True + ) write_user_appwrapper = MagicMock() mocker.patch( "codeflare_sdk.utils.generate_yaml.write_user_appwrapper", write_user_appwrapper @@ -3258,7 +3158,6 @@ def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): Cluster( ClusterConfiguration( "test_cluster", - openshift_oauth=True, image="quay.io/project-codeflare/ray:latest-py39-cu118", ingress_domain="apps.cluster.awsroute.org", write_to_file=True, diff --git a/tests/unit_test_support.py b/tests/unit_test_support.py index c4b7416de..36c25c69a 100644 --- a/tests/unit_test_support.py +++ b/tests/unit_test_support.py @@ -60,7 +60,6 @@ def createClusterWithConfig(mocker): return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, ) cluster = Cluster(createClusterConfig()) - cluster.config.image = "quay.io/project-codeflare/ray:latest-py39-cu118" return cluster