From b2077aef305f82804f44d73e404565093f62446b Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Mon, 28 Aug 2023 19:35:38 -0700
Subject: [PATCH 1/4] Update mcad kuberay example

---
 .../kuberay/config/aw-raycluster-1.yaml       | 153 ++++++++++++++++
 .../kuberay/config/aw-raycluster.yaml         | 167 +++++++++---------
 doc/usage/examples/kuberay/kuberay-mcad.md    |  56 +++++-
 3 files changed, 284 insertions(+), 92 deletions(-)
 create mode 100644 doc/usage/examples/kuberay/config/aw-raycluster-1.yaml

diff --git a/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml b/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml
new file mode 100644
index 000000000..22293c1b5
--- /dev/null
+++ b/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml
@@ -0,0 +1,153 @@
+apiVersion: mcad.ibm.com/v1beta1
+kind: AppWrapper
+metadata:
+  name: raycluster-complete-1
+  namespace: default
+spec:
+  resources:
+    GenericItems:
+    - replicas: 1
+      custompodresources: # Optional section that specifies resource requirements
+                          # for non-standard k8s resources, follows same format as
+                          # that of standard k8s resources.
+      - replicas: 2 # because AppWrappers are generic they must define the resultant pods that will be needed
+                    # to fulfill a request as the request values cannot be reliably extracted from the
+                    # generictemplate below
+        requests:
+          cpu: 8
+          memory: 512Mi
+        limits:
+          cpu: 10
+          memory: 1G
+      generictemplate:
+        # The resource requests and limits in this config are too small for production!
+        # For examples with more realistic resource configuration, see
+        # ray-cluster.complete.large.yaml and
+        # ray-cluster.autoscaler.large.yaml.
+        apiVersion: ray.io/v1alpha1
+        kind: RayCluster
+        metadata:
+          labels:
+            controller-tools.k8s.io: "1.0"
+            # A unique identifier for the head node and workers of this cluster.
+          name: raycluster-complete-1
+        spec:
+          rayVersion: '2.5.0'
+          # Ray head pod configuration
+          headGroupSpec:
+            # Kubernetes Service Type. This is an optional field, and the default value is ClusterIP.
+            # Refer to https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types.
+            serviceType: ClusterIP
+            # The `rayStartParams` are used to configure the `ray start` command.
+            # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
+            # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
+            rayStartParams:
+              dashboard-host: '0.0.0.0'
+            # pod template
+            template:
+              metadata:
+                # Custom labels. NOTE: To avoid conflicts with KubeRay operator, do not define custom labels start with `raycluster`.
+                # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+                labels: {}
+              spec:
+                containers:
+                - name: ray-head
+                  image: rayproject/ray:2.5.0
+                  ports:
+                  - containerPort: 6379
+                    name: gcs
+                  - containerPort: 8265
+                    name: dashboard
+                  - containerPort: 10001
+                    name: client
+                  lifecycle:
+                    preStop:
+                      exec:
+                        command: ["/bin/sh","-c","ray stop"]
+                  volumeMounts:
+                    - mountPath: /tmp/ray
+                      name: ray-logs
+                  # The resource requests and limits in this config are too small for production!
+                  # For an example with more realistic resource configuration, see
+                  # ray-cluster.autoscaler.large.yaml.
+                  # It is better to use a few large Ray pod than many small ones.
+                  # For production, it is ideal to size each Ray pod to take up the
+                  # entire Kubernetes node on which it is scheduled.
+                  resources:
+                    limits:
+                      cpu: "1"
+                      memory: "2G"
+                    requests:
+                      # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                      # We also recommend setting requests equal to limits for both CPU and memory.
+                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                      # Kubernetes testing environments such as KinD and minikube.
+                      cpu: "500m"
+                      memory: "2G"
+                volumes:
+                  - name: ray-logs
+                    emptyDir: {}
+          workerGroupSpecs:
+          # the pod replicas in this group typed worker
+          - replicas: 1
+            minReplicas: 1
+            maxReplicas: 10
+            # logical group name, for this called small-group, also can be functional
+            groupName: small-group
+            # If worker pods need to be added, we can increment the replicas.
+            # If worker pods need to be removed, we decrement the replicas, and populate the workersToDelete list.
+            # The operator will remove pods from the list until the desired number of replicas is satisfied.
+            # If the difference between the current replica count and the desired replicas is greater than the
+            # number of entries in workersToDelete, random worker pods will be deleted.
+            #scaleStrategy:
+            #  workersToDelete:
+            #  - raycluster-complete-worker-small-group-bdtwh
+            #  - raycluster-complete-worker-small-group-hv457
+            #  - raycluster-complete-worker-small-group-k8tj7
+            # The `rayStartParams` are used to configure the `ray start` command.
+            # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
+            # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
+            rayStartParams: {}
+            #pod template
+            template:
+              spec:
+                containers:
+                - name: ray-worker
+                  image: rayproject/ray:2.5.0
+                  lifecycle:
+                    preStop:
+                      exec:
+                        command: ["/bin/sh","-c","ray stop"]
+                  # use volumeMounts.Optional.
+                  # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
+                  volumeMounts:
+                    - mountPath: /tmp/ray
+                      name: ray-logs
+                  # The resource requests and limits in this config are too small for production!
+                  # For an example with more realistic resource configuration, see
+                  # ray-cluster.autoscaler.large.yaml.
+                  # It is better to use a few large Ray pod than many small ones.
+                  # For production, it is ideal to size each Ray pod to take up the
+                  # entire Kubernetes node on which it is scheduled.
+                  resources:
+                    limits:
+                      cpu: "1"
+                      memory: "1G"
+                    # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                    # We also recommend setting requests equal to limits for both CPU and memory.
+                    # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                    # Kubernetes testing environments such as KinD and minikube.
+                    requests:
+                      # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                      # We also recommend setting requests equal to limits for both CPU and memory.
+                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                      # Kubernetes testing environments such as KinD and minikube.
+                      cpu: "500m"
+                      # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container.
+                      memory: "1G"
+                # use volumes
+                # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
+                volumes:
+                  - name: ray-logs
+                    emptyDir: {}
+
diff --git a/doc/usage/examples/kuberay/config/aw-raycluster.yaml b/doc/usage/examples/kuberay/config/aw-raycluster.yaml
index bbef20d0d..dc33b268f 100644
--- a/doc/usage/examples/kuberay/config/aw-raycluster.yaml
+++ b/doc/usage/examples/kuberay/config/aw-raycluster.yaml
@@ -1,25 +1,28 @@
 apiVersion: mcad.ibm.com/v1beta1
 kind: AppWrapper
 metadata:
-  name: raycluster-autoscaler
+  name: raycluster-complete
   namespace: default
 spec:
   resources:
-    Items: []
     GenericItems:
     - replicas: 1
-      custompodresources:
-      - replicas: 2
+      custompodresources: # Optional section that specifies resource requirements
+                          # for non-standard k8s resources, follows same format as
+                          # that of standard k8s resources.
+      - replicas: 2 # because AppWrappers are generic they must define the resultant pods that will be needed
+                    # to fulfill a request as the request values cannot be reliably extracted from the
+                    # generictemplate below
         requests:
-          cpu: 10
+          cpu: 8
           memory: 512Mi
         limits:
           cpu: 10
           memory: 1G
       generictemplate:
-        # This config demonstrates KubeRay's Ray autoscaler integration.
         # The resource requests and limits in this config are too small for production!
-        # For an example with more realistic resource configuration, see
+        # For examples with more realistic resource configuration, see
+        # ray-cluster.complete.large.yaml and
         # ray-cluster.autoscaler.large.yaml.
         apiVersion: ray.io/v1alpha1
         kind: RayCluster
@@ -27,66 +30,29 @@ spec:
           labels:
             controller-tools.k8s.io: "1.0"
             # A unique identifier for the head node and workers of this cluster.
-          name: raycluster-autoscaler
+          name: raycluster-complete
         spec:
-          # The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
-          rayVersion: '2.0.0'
-          # If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
-          # Ray autoscaler integration is supported only for Ray versions >= 1.11.0
-          # Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
-          enableInTreeAutoscaling: true
-          # autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
-          # The example configuration shown below below represents the DEFAULT values.
-          # (You may delete autoscalerOptions if the defaults are suitable.)
-          autoscalerOptions:
-            # upscalingMode is "Default" or "Aggressive."
-            # Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
-            # Default: Upscaling is not rate-limited.
-            # Aggressive: An alias for Default; upscaling is not rate-limited.
-            upscalingMode: Default
-            # idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
-            idleTimeoutSeconds: 60
-            # image optionally overrides the autoscaler's container image.
-            # If instance.spec.rayVersion is at least "2.0.0", the autoscaler will default to the same image as
-            # the ray container. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
-            ## image: "my-repo/my-custom-autoscaler-image:tag"
-            # imagePullPolicy optionally overrides the autoscaler container's image pull policy.
-            imagePullPolicy: Always
-            # resources specifies optional resource request and limit overrides for the autoscaler container.
-            # For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required.
-            resources:
-              limits:
-                cpu: "500m"
-                memory: "512Mi"
-              requests:
-                cpu: "500m"
-                memory: "512Mi"
-          ######################headGroupSpec#################################
-          # head group template and specs, (perhaps 'group' is not needed in the name)
+          rayVersion: '2.5.0'
+          # Ray head pod configuration
           headGroupSpec:
-            # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
+            # Kubernetes Service Type. This is an optional field, and the default value is ClusterIP.
+            # Refer to https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types.
             serviceType: ClusterIP
-            # logical group name, for this called head-group, also can be functional
-            # pod type head or worker
-            # rayNodeType: head # Not needed since it is under the headgroup
-            # the following params are used to complete the ray start: ray start --head --block ...
+            # The `rayStartParams` are used to configure the `ray start` command.
+            # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
+            # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
             rayStartParams:
-              # Flag "no-monitor" will be automatically set when autoscaling is enabled.
               dashboard-host: '0.0.0.0'
-              block: 'true'
-              # num-cpus: '1' # can be auto-completed from the limits
-              # Use `resources` to optionally specify custom resource annotations for the Ray node.
-              # The value of `resources` is a string-integer mapping.
-              # Currently, `resources` must be provided in the specific format demonstrated below:
-              # resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
-            #pod template
+            # pod template
             template:
+              metadata:
+                # Custom labels. NOTE: To avoid conflicts with KubeRay operator, do not define custom labels start with `raycluster`.
+                # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+                labels: {}
               spec:
                 containers:
-                # The Ray head pod
                 - name: ray-head
-                  image: rayproject/ray:2.0.0
-                  imagePullPolicy: Always
+                  image: rayproject/ray:2.5.0
                   ports:
                   - containerPort: 6379
                     name: gcs
@@ -98,59 +64,90 @@ spec:
                     preStop:
                       exec:
                         command: ["/bin/sh","-c","ray stop"]
+                  volumeMounts:
+                    - mountPath: /tmp/ray
+                      name: ray-logs
+                  # The resource requests and limits in this config are too small for production!
+                  # For an example with more realistic resource configuration, see
+                  # ray-cluster.autoscaler.large.yaml.
+                  # It is better to use a few large Ray pod than many small ones.
+                  # For production, it is ideal to size each Ray pod to take up the
+                  # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
                       cpu: "1"
-                      memory: "1G"
+                      memory: "2G"
                     requests:
+                      # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                      # We also recommend setting requests equal to limits for both CPU and memory.
+                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                      # Kubernetes testing environments such as KinD and minikube.
                       cpu: "500m"
-                      memory: "512Mi"
+                      memory: "2G"
+                volumes:
+                  - name: ray-logs
+                    emptyDir: {}
           workerGroupSpecs:
           # the pod replicas in this group typed worker
           - replicas: 1
             minReplicas: 1
-            maxReplicas: 300
+            maxReplicas: 10
             # logical group name, for this called small-group, also can be functional
             groupName: small-group
-            # if worker pods need to be added, we can simply increment the replicas
-            # if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
-            # the operator will remove pods from the list until the number of replicas is satisfied
-            # when a pod is confirmed to be deleted, its name will be removed from the list below
+            # If worker pods need to be added, we can increment the replicas.
+            # If worker pods need to be removed, we decrement the replicas, and populate the workersToDelete list.
+            # The operator will remove pods from the list until the desired number of replicas is satisfied.
+            # If the difference between the current replica count and the desired replicas is greater than the
+            # number of entries in workersToDelete, random worker pods will be deleted.
             #scaleStrategy:
             #  workersToDelete:
             #  - raycluster-complete-worker-small-group-bdtwh
             #  - raycluster-complete-worker-small-group-hv457
             #  - raycluster-complete-worker-small-group-k8tj7
-            # the following params are used to complete the ray start: ray start --block ...
-            rayStartParams:
-              block: 'true'
+            # The `rayStartParams` are used to configure the `ray start` command.
+            # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
+            # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
+            rayStartParams: {}
             #pod template
             template:
-              metadata:
-                labels:
-                  key: value
-                # annotations for pod
-                annotations:
-                  key: value
               spec:
-                initContainers:
-                # the env var $RAY_IP is set by the operator if missing, with the value of the head service name
-                - name: init-myservice
-                  image: busybox:1.28
-                  command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
                 containers:
-                - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name',  or '123-abc'
-                  image: rayproject/ray:2.0.0
-                  # environment variables to set in the container.Optional.
-                  # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
+                - name: ray-worker
+                  image: rayproject/ray:2.5.0
                   lifecycle:
                     preStop:
                       exec:
                         command: ["/bin/sh","-c","ray stop"]
+                  # use volumeMounts.Optional.
+                  # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
+                  volumeMounts:
+                    - mountPath: /tmp/ray
+                      name: ray-logs
+                  # The resource requests and limits in this config are too small for production!
+                  # For an example with more realistic resource configuration, see
+                  # ray-cluster.autoscaler.large.yaml.
+                  # It is better to use a few large Ray pod than many small ones.
+                  # For production, it is ideal to size each Ray pod to take up the
+                  # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
                       cpu: "1"
-                      memory: "512Mi"
+                      memory: "1G"
+                    # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                    # We also recommend setting requests equal to limits for both CPU and memory.
+                    # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                    # Kubernetes testing environments such as KinD and minikube.
                     requests:
+                      # For production use-cases, we recommend specifying integer CPU reqests and limits.
+                      # We also recommend setting requests equal to limits for both CPU and memory.
+                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
+                      # Kubernetes testing environments such as KinD and minikube.
                       cpu: "500m"
-                      memory: "256Mi"
\ No newline at end of file
+                      # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container.
+                      memory: "1G"
+                # use volumes
+                # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
+                volumes:
+                  - name: ray-logs
+                    emptyDir: {}
+
diff --git a/doc/usage/examples/kuberay/kuberay-mcad.md b/doc/usage/examples/kuberay/kuberay-mcad.md
index 922217e19..7aa7fbe80 100644
--- a/doc/usage/examples/kuberay/kuberay-mcad.md
+++ b/doc/usage/examples/kuberay/kuberay-mcad.md
@@ -4,13 +4,55 @@ This integration will help in queuing on [kuberay](https://github.com/ray-projec
 
 #### Prerequisites
 
-- kubernetes or Openshift cluster 
-- Install MCAD using instructions present under `deployment` directory
-- Make sure MCAD has clusterrole to create ray resources, please patch using configuration file present in `config` directory with name `xqueuejob-controller.yaml`
+- Kubernetes(see [KinD](https://helm.sh/docs/intro/install/)) or Openshift cluster(see [OpenShift Local](https://developers.redhat.com/products/openshift-local/overview))
+- Kubernetes client tools such as [kubectl](https://kubernetes.io/docs/tasks/tools/) or [OpenShift CLI](https://docs.openshift.com/container-platform/4.13/cli_reference/openshift_cli/getting-started-cli.html)
+- [Helm](https://helm.sh/docs/intro/install/)
+- Install MCAD and KubeRay operators:
+    - KinD cluster:
+
+        Install the stable release of MCAD opeartor from local charts
+        ```bash
+        git clone https://github.com/project-codeflare/multi-cluster-app-dispatcher
+        cd multi-cluster-app-dispatcher
+        helm install mcad --set image.repository=quay.io/project-codeflare/mcad-controller --set image.tag=stable deployment/mcad-controller
+        ```
+
+        Make sure MCAD has clusterrole to create ray resources, please patch using [xqueuejob-controller.yaml](doc/usage/examples/kuberay/config/xqueuejob-controller.yaml). For example:
+        ```
+        kubectl apply -f doc/usage/examples/kuberay/config/xqueuejob-controller.yaml
+        ```
+
+        See [deployment.md](../../../../doc/deploy/deployment.md) for more options.
+
+        Install kuberay operator using the [instructions](https://github.com/ray-project/kuberay#quick-start). For example, install kuberay v0.6.0 from remote helm repo:
+        ```
+        helm repo add kuberay https://ray-project.github.io/kuberay-helm/
+        helm install kuberay-operator kuberay/kuberay-operator --version 0.6.0
+        ```
+
+    - OpenShift cluster:
+
+        MCAD and KubeRay Operators are part of the CodeFlare stack which provides a simple, user-friendly abstraction for scaling,
+queuing and resource management of distributed AI/ML and Python workloads. Please follow the `Distributed Workloads` [Quick-Start](https://github.com/opendatahub-io/distributed-workloads/blob/main/Quick-Start.md) for installation.
+
 
 #### Steps
 
-- Install kuberay operator from [link](https://docs.ray.io/en/latest/cluster/kubernetes/getting-started.html#deploying-the-kuberay-operator)
-- Submit ray cluster to MCAD as appwrapper using the config file `aw-raycluster.yaml` present in the `config` directory using command `kubectl create -f aw-raycluster.yaml`
-- Check the status of the appwrapper using command `kubectl describe appwrapper <your-appwrapper-name>`
-- Check running pods using command `kubectl get pods -n <your-name-space>`
\ No newline at end of file
+
+- Submit the RayCluster custom resource to MCAD as AppWrapper using the [aw-raycluster.yaml](doc/usage/examples/kuberay/config/aw-raycluster.yaml) exmaple:
+    ```bash
+    kubectl create -f doc/usage/examples/kuberay/config/aw-raycluster.yaml
+    ```
+- Check the status of the AppWrapper custom resource using command
+    ```bash
+    kubectl describe appwrapper raycluster-complete -n default
+    ```
+- Check the raycluster status is ready using command
+    ```bash
+    kubectl get raycluster -n default
+    ```
+    Expect:
+    ``````
+    NAME                  DESIRED WORKERS   AVAILABLE WORKERS   STATUS   AGE
+    raycluster-complete   1                 1                   ready    6m45s
+    ```

From 3a03149287995f872fcca793fca009d4fda67cdd Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Thu, 31 Aug 2023 00:22:12 -0700
Subject: [PATCH 2/4] Update doc/usage/examples/kuberay/kuberay-mcad.md

Co-authored-by: Anish Asthana <anishasthana1@gmail.com>
---
 doc/usage/examples/kuberay/kuberay-mcad.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/usage/examples/kuberay/kuberay-mcad.md b/doc/usage/examples/kuberay/kuberay-mcad.md
index 7aa7fbe80..f133c8e20 100644
--- a/doc/usage/examples/kuberay/kuberay-mcad.md
+++ b/doc/usage/examples/kuberay/kuberay-mcad.md
@@ -32,8 +32,7 @@ This integration will help in queuing on [kuberay](https://github.com/ray-projec
 
     - OpenShift cluster:
 
-        MCAD and KubeRay Operators are part of the CodeFlare stack which provides a simple, user-friendly abstraction for scaling,
-queuing and resource management of distributed AI/ML and Python workloads. Please follow the `Distributed Workloads` [Quick-Start](https://github.com/opendatahub-io/distributed-workloads/blob/main/Quick-Start.md) for installation.
+        On OpenShift,  MCAD and KubeRay are already part of the Open Data Hub Distributed Workload Stack. The stack provides a simple, user-friendly abstraction for scaling, queuing and resource management of distributed AI/ML and Python workloads. Please follow the Quick Start in the [Distributed Workloads](https://github.com/opendatahub-io/distributed-workloads) for installation.
 
 
 #### Steps

From 5eefdcd790d3127852b85dfa7d940d6309b3e2f5 Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Thu, 31 Aug 2023 00:28:59 -0700
Subject: [PATCH 3/4] Update doc/usage/examples/kuberay/kuberay-mcad.md

Co-authored-by: Kai-Hsun Chen <kaihsun@apache.org>
---
 doc/usage/examples/kuberay/kuberay-mcad.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/usage/examples/kuberay/kuberay-mcad.md b/doc/usage/examples/kuberay/kuberay-mcad.md
index f133c8e20..a15eb1b1f 100644
--- a/doc/usage/examples/kuberay/kuberay-mcad.md
+++ b/doc/usage/examples/kuberay/kuberay-mcad.md
@@ -4,7 +4,7 @@ This integration will help in queuing on [kuberay](https://github.com/ray-projec
 
 #### Prerequisites
 
-- Kubernetes(see [KinD](https://helm.sh/docs/intro/install/)) or Openshift cluster(see [OpenShift Local](https://developers.redhat.com/products/openshift-local/overview))
+- Kubernetes(see [KinD](https://kind.sigs.k8s.io/)) or Openshift cluster(see [OpenShift Local](https://developers.redhat.com/products/openshift-local/overview))
 - Kubernetes client tools such as [kubectl](https://kubernetes.io/docs/tasks/tools/) or [OpenShift CLI](https://docs.openshift.com/container-platform/4.13/cli_reference/openshift_cli/getting-started-cli.html)
 - [Helm](https://helm.sh/docs/intro/install/)
 - Install MCAD and KubeRay operators:

From 65abae1a4ff96655d4dcf1be057f69964d8f4fca Mon Sep 17 00:00:00 2001
From: ted chang <htchang@us.ibm.com>
Date: Thu, 31 Aug 2023 10:20:31 -0700
Subject: [PATCH 4/4] update appwrapper yamls

---
 .../kuberay/config/aw-raycluster-1.yaml       | 55 +++++++++++--------
 .../kuberay/config/aw-raycluster.yaml         | 55 +++++++++++--------
 2 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml b/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml
index 22293c1b5..ffb1f9701 100644
--- a/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml
+++ b/doc/usage/examples/kuberay/config/aw-raycluster-1.yaml
@@ -7,18 +7,31 @@ spec:
   resources:
     GenericItems:
     - replicas: 1
-      custompodresources: # Optional section that specifies resource requirements
-                          # for non-standard k8s resources, follows same format as
-                          # that of standard k8s resources.
-      - replicas: 2 # because AppWrappers are generic they must define the resultant pods that will be needed
-                    # to fulfill a request as the request values cannot be reliably extracted from the
-                    # generictemplate below
+      custompodresources:
+      # Optional section that specifies resource requirements
+      # for non-standard k8s resources, follows same format as
+      # that of standard k8s resources.
+      # Each item in the custompodresources stanza should include resources consumed by target Item.
+      # In this example, the 2 items correspond to 1 Ray head pod and 1 Ray worker pod
+      - replicas: 1
+        limits:
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
         requests:
-          cpu: 8
-          memory: 512Mi
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
+      # The replica should match the number of worker pods
+      - replicas: 1
         limits:
-          cpu: 10
-          memory: 1G
+          cpu: 8
+          memory: 8G
+          nvidia.com/gpu: 0
+        requests:
+          cpu: 8
+          memory: 8G
+          nvidia.com/gpu: 0
       generictemplate:
         # The resource requests and limits in this config are too small for production!
         # For examples with more realistic resource configuration, see
@@ -75,15 +88,13 @@ spec:
                   # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
-                      cpu: "1"
-                      memory: "2G"
+                      cpu: "2"
+                      memory: "8G"
                     requests:
                       # For production use-cases, we recommend specifying integer CPU reqests and limits.
                       # We also recommend setting requests equal to limits for both CPU and memory.
-                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                      # Kubernetes testing environments such as KinD and minikube.
-                      cpu: "500m"
-                      memory: "2G"
+                      cpu: "2"
+                      memory: "8G"
                 volumes:
                   - name: ray-logs
                     emptyDir: {}
@@ -131,20 +142,16 @@ spec:
                   # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
-                      cpu: "1"
-                      memory: "1G"
+                      cpu: "8"
+                      memory: "8G"
                     # For production use-cases, we recommend specifying integer CPU reqests and limits.
                     # We also recommend setting requests equal to limits for both CPU and memory.
-                    # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                    # Kubernetes testing environments such as KinD and minikube.
                     requests:
                       # For production use-cases, we recommend specifying integer CPU reqests and limits.
                       # We also recommend setting requests equal to limits for both CPU and memory.
-                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                      # Kubernetes testing environments such as KinD and minikube.
-                      cpu: "500m"
+                      cpu: "8"
                       # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container.
-                      memory: "1G"
+                      memory: "8G"
                 # use volumes
                 # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
                 volumes:
diff --git a/doc/usage/examples/kuberay/config/aw-raycluster.yaml b/doc/usage/examples/kuberay/config/aw-raycluster.yaml
index dc33b268f..932b004e9 100644
--- a/doc/usage/examples/kuberay/config/aw-raycluster.yaml
+++ b/doc/usage/examples/kuberay/config/aw-raycluster.yaml
@@ -7,18 +7,31 @@ spec:
   resources:
     GenericItems:
     - replicas: 1
-      custompodresources: # Optional section that specifies resource requirements
-                          # for non-standard k8s resources, follows same format as
-                          # that of standard k8s resources.
-      - replicas: 2 # because AppWrappers are generic they must define the resultant pods that will be needed
-                    # to fulfill a request as the request values cannot be reliably extracted from the
-                    # generictemplate below
+      custompodresources:
+      # Optional section that specifies resource requirements
+      # for non-standard k8s resources, follows same format as
+      # that of standard k8s resources.
+      # Each item in the custompodresources stanza should include resources consumed by target Item.
+      # In this example, the 2 items correspond to 1 Ray head pod and 1 Ray worker pod
+      - replicas: 1
+        limits:
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
         requests:
-          cpu: 8
-          memory: 512Mi
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
+      # The replica should match the number of worker pods
+      - replicas: 1
         limits:
-          cpu: 10
-          memory: 1G
+          cpu: 8
+          memory: 8G
+          nvidia.com/gpu: 0
+        requests:
+          cpu: 8
+          memory: 8G
+          nvidia.com/gpu: 0
       generictemplate:
         # The resource requests and limits in this config are too small for production!
         # For examples with more realistic resource configuration, see
@@ -75,15 +88,13 @@ spec:
                   # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
-                      cpu: "1"
-                      memory: "2G"
+                      cpu: "2"
+                      memory: "8G"
                     requests:
                       # For production use-cases, we recommend specifying integer CPU reqests and limits.
                       # We also recommend setting requests equal to limits for both CPU and memory.
-                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                      # Kubernetes testing environments such as KinD and minikube.
-                      cpu: "500m"
-                      memory: "2G"
+                      cpu: "2"
+                      memory: "8G"
                 volumes:
                   - name: ray-logs
                     emptyDir: {}
@@ -131,20 +142,16 @@ spec:
                   # entire Kubernetes node on which it is scheduled.
                   resources:
                     limits:
-                      cpu: "1"
-                      memory: "1G"
+                      cpu: "8"
+                      memory: "8G"
                     # For production use-cases, we recommend specifying integer CPU reqests and limits.
                     # We also recommend setting requests equal to limits for both CPU and memory.
-                    # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                    # Kubernetes testing environments such as KinD and minikube.
                     requests:
                       # For production use-cases, we recommend specifying integer CPU reqests and limits.
                       # We also recommend setting requests equal to limits for both CPU and memory.
-                      # For this example, we use a 500m CPU request to accomodate resource-constrained local
-                      # Kubernetes testing environments such as KinD and minikube.
-                      cpu: "500m"
+                      cpu: "8"
                       # For production use-cases, we recommend allocating at least 8Gb memory for each Ray container.
-                      memory: "1G"
+                      memory: "8G"
                 # use volumes
                 # Refer to https://kubernetes.io/docs/concepts/storage/volumes/
                 volumes: