From 66afd55ae6e5bf85c452dac62c42a70ab5f47095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CShauna=20Diaz=E2=80=9D?= Date: Mon, 10 Mar 2025 13:26:24 -0400 Subject: [PATCH] [CP-4.19] OSDOCS-13287: adds RHOAI to MicroShift --- _attributes/attributes-microshift.adoc | 15 ++- _attributes/common-attributes.adoc | 71 ++++++----- _topic_maps/_topic_map_ms.yml | 37 +++--- microshift_ai/_attributes | 1 + microshift_ai/images | 1 + microshift_ai/microshift-rhoai.adoc | 93 ++++++++++++++ microshift_ai/modules | 1 + microshift_ai/snippets | 1 + .../microshift-configuring-routes.adoc | 2 +- ...roshift-inferenceservice-more-options.adoc | 29 +++++ modules/microshift-rhoai-con.adoc | 39 ++++++ modules/microshift-rhoai-create-ns.adoc | 41 ++++++ modules/microshift-rhoai-create-route.adoc | 45 +++++++ ...shift-rhoai-get-model-ready-inference.adoc | 44 +++++++ ...oshift-rhoai-get-model-server-metrics.adoc | 32 +++++ .../microshift-rhoai-inferenceservice-ex.adoc | 61 +++++++++ modules/microshift-rhoai-install.adoc | 81 ++++++++++++ .../microshift-rhoai-model-package-oci.adoc | 118 ++++++++++++++++++ ...roshift-rhoai-model-serving-rt-verify.adoc | 78 ++++++++++++ ...croshift-rhoai-override-kserve-config.adoc | 35 ++++++ modules/microshift-rhoai-query-model-con.adoc | 13 ++ modules/microshift-rhoai-query-model.adoc | 77 ++++++++++++ ...icroshift-rhoai-serving-ai-models-con.adoc | 40 ++++++ .../microshift-rhoai-servingruntimes-ex.adoc | 75 +++++++++++ modules/microshift-rhoai-supported-crds.adoc | 17 +++ ...oshift-rhoai-supported-mserv-runtimes.adoc | 26 ++++ ...croshift-rhoai-verify-model-connected.adoc | 79 ++++++++++++ modules/microshift-rhoai-workflow.adoc | 50 ++++++++ 28 files changed, 1149 insertions(+), 53 deletions(-) create mode 120000 microshift_ai/_attributes create mode 120000 microshift_ai/images create mode 100644 microshift_ai/microshift-rhoai.adoc create mode 120000 microshift_ai/modules create mode 120000 microshift_ai/snippets create mode 100644 modules/microshift-inferenceservice-more-options.adoc create mode 100644 modules/microshift-rhoai-con.adoc create mode 100644 modules/microshift-rhoai-create-ns.adoc create mode 100644 modules/microshift-rhoai-create-route.adoc create mode 100644 modules/microshift-rhoai-get-model-ready-inference.adoc create mode 100644 modules/microshift-rhoai-get-model-server-metrics.adoc create mode 100644 modules/microshift-rhoai-inferenceservice-ex.adoc create mode 100644 modules/microshift-rhoai-install.adoc create mode 100644 modules/microshift-rhoai-model-package-oci.adoc create mode 100644 modules/microshift-rhoai-model-serving-rt-verify.adoc create mode 100644 modules/microshift-rhoai-override-kserve-config.adoc create mode 100644 modules/microshift-rhoai-query-model-con.adoc create mode 100644 modules/microshift-rhoai-query-model.adoc create mode 100644 modules/microshift-rhoai-serving-ai-models-con.adoc create mode 100644 modules/microshift-rhoai-servingruntimes-ex.adoc create mode 100644 modules/microshift-rhoai-supported-crds.adoc create mode 100644 modules/microshift-rhoai-supported-mserv-runtimes.adoc create mode 100644 modules/microshift-rhoai-verify-model-connected.adoc create mode 100644 modules/microshift-rhoai-workflow.adoc diff --git a/_attributes/attributes-microshift.adoc b/_attributes/attributes-microshift.adoc index ef5c576b2ef0..4928f88994c7 100644 --- a/_attributes/attributes-microshift.adoc +++ b/_attributes/attributes-microshift.adoc @@ -6,23 +6,28 @@ :OCP: OpenShift Container Platform :ocp-version: 4.19 :oc-first: pass:quotes[OpenShift CLI (`oc`)] +:ai-first: artificial intelligence (AI) //OpenShift Kubernetes Engine :oke: OpenShift Kubernetes Engine -:product-title-first: Red Hat build of MicroShift (MicroShift) +:product-title-first: Red{nbsp}Hat build of MicroShift (MicroShift) :microshift-short: MicroShift :product-registry: OpenShift image registry :product-version: 4.19 :rhel-major: rhel-9 -:op-system-base-full: Red Hat Enterprise Linux (RHEL) +:rhoai-full: Red{nbsp}Hat OpenShift AI +:rhoai: RHOAI +:op-system-base-full: Red{nbsp}Hat Enterprise Linux (RHEL) :op-system-base: RHEL -:op-system-ostree-first: Red Hat Enterprise Linux for Edge (RHEL for Edge) +:op-system-ostree-first: Red{nbsp}Hat Enterprise Linux for Edge (RHEL for Edge) :op-system-ostree: RHEL for Edge -:op-system-rt-kernel: Red Hat Enterprise Linux for Real Time (real-time kernel) +:op-system-rt-kernel: Red{nbsp}Hat Enterprise Linux for Real Time (real-time kernel) :op-system-rtk: real-time kernel :op-system-image: image mode for RHEL :op-system-version: 9.6 :op-system-version-major: 9 -:op-system-bundle: Red Hat Device Edge +:op-system-bundle: Red{nbsp}Hat Device Edge +:ovms: OpenVINO Model Server +:ov: OVMS :rpm-repo-version: rhocp-4.19 :rhde-version: 4 :VirtProductName: OpenShift Virtualization diff --git a/_attributes/common-attributes.adoc b/_attributes/common-attributes.adoc index 8aa39a7f64b2..47a464db2c36 100644 --- a/_attributes/common-attributes.adoc +++ b/_attributes/common-attributes.adoc @@ -14,13 +14,14 @@ :ocp-nminus1: 4.17 :ocp-nplus1: 4.19 // Operating system attributes -:op-system-first: Red Hat Enterprise Linux CoreOS (RHCOS) +:op-system-first: Red{nbsp}Hat Enterprise Linux CoreOS (RHCOS) :op-system: RHCOS :op-system-lowercase: rhcos :op-system-base: RHEL -:op-system-base-full: Red Hat Enterprise Linux (RHEL) +:op-system-base-full: Red{nbsp}Hat Enterprise Linux (RHEL) :op-system-version: 9.x :op-system-version-9: 9 +:op-system-ai: Red{nbsp}Hat Enterprise Linux AI ifdef::openshift-origin[] :op-system-first: Fedora CoreOS (FCOS) :op-system: FCOS @@ -37,7 +38,7 @@ endif::[] :cluster-manager-url: link:https://console.redhat.com/openshift[OpenShift Cluster Manager] :cluster-manager-url-pull: link:https://console.redhat.com/openshift/install/pull-secret[pull secret from Red Hat OpenShift Cluster Manager] :insights-advisor-url: link:https://console.redhat.com/openshift/insights/advisor/[Insights Advisor] -:hybrid-console: Red Hat Hybrid Cloud Console +:hybrid-console: Red{nbsp}Hat Hybrid Cloud Console :hybrid-console-second: Hybrid Cloud Console :hybrid-console-url: link:https://console.redhat.com[Red Hat Hybrid Cloud Console] // OADP attributes @@ -53,8 +54,8 @@ endif::[] :product-mirror-registry: Mirror registry for Red Hat OpenShift :rh-storage-first: Red Hat OpenShift Data Foundation :rh-storage: OpenShift Data Foundation -:rh-rhacm-title: Red Hat Advanced Cluster Management -:rh-rhacm-first: Red Hat Advanced Cluster Management (RHACM) +:rh-rhacm-title: Red{nbsp}Hat Advanced Cluster Management +:rh-rhacm-first: Red{nbsp}Hat Advanced Cluster Management (RHACM) :rh-rhacm: RHACM :rh-rhacm-version: 2.13 :osc: OpenShift sandboxed containers @@ -87,12 +88,12 @@ ifdef::openshift-origin[] :builds-v1shortname: Builds v1 endif::[] //gitops -:gitops-title: Red Hat OpenShift GitOps +:gitops-title: Red{nbsp}Hat OpenShift GitOps :gitops-shortname: GitOps :gitops-ver: 1.1 :rh-app-icon: image:red-hat-applications-menu-icon.jpg[title="Red Hat applications"] //pipelines -:pipelines-title: Red Hat OpenShift Pipelines +:pipelines-title: Red{nbsp}Hat OpenShift Pipelines :pipelines-shortname: OpenShift Pipelines :pipelines-ver: pipelines-1.18 :pipelines-version-number: 1.18 @@ -123,22 +124,22 @@ ifdef::openshift-origin[] :CNVSubscriptionSpecName: community-kubevirt-hyperconverged endif::[] // openshift virtualization engine (ove) -:ove-first: Red Hat OpenShift Virtualization Engine +:ove-first: Red{nbsp}Hat OpenShift Virtualization Engine :ove: OpenShift Virtualization Engine //distributed tracing -:DTProductName: Red Hat OpenShift distributed tracing platform +:DTProductName: Red{nbsp}Hat OpenShift distributed tracing platform :DTShortName: distributed tracing platform :DTProductVersion: 3.1 -:JaegerName: Red Hat OpenShift distributed tracing platform (Jaeger) -:JaegerOperator: Red Hat OpenShift distributed tracing platform +:JaegerName: Red{nbsp}Hat OpenShift distributed tracing platform (Jaeger) +:JaegerOperator: Red{nbsp}Hat OpenShift distributed tracing platform :JaegerShortName: distributed tracing platform (Jaeger) -:JaegerOperator: Red Hat OpenShift distributed tracing platform +:JaegerOperator: Red{nbsp}Hat OpenShift distributed tracing platform :JaegerVersion: 1.53.0 -:OTELName: Red Hat build of OpenTelemetry -:OTELShortName: Red Hat build of OpenTelemetry -:OTELOperator: Red Hat build of OpenTelemetry Operator +:OTELName: Red{nbsp}Hat build of OpenTelemetry +:OTELShortName: Red{nbsp}Hat build of OpenTelemetry +:OTELOperator: Red{nbsp}Hat build of OpenTelemetry Operator :OTELVersion: 0.93.0 -:TempoName: Red Hat OpenShift distributed tracing platform (Tempo) +:TempoName: Red{nbsp}Hat OpenShift distributed tracing platform (Tempo) :TempoShortName: distributed tracing platform (Tempo) :TempoOperator: Tempo Operator :TempoVersion: 2.3.1 @@ -153,18 +154,18 @@ ifdef::telco-core[] :rds-caps: Telco core endif::[] //lightspeed -:ols-official: Red Hat OpenShift Lightspeed +:ols-official: Red{nbsp}Hat OpenShift Lightspeed :ols: OpenShift Lightspeed //logging :logging: logging :logging-uc: Logging -:for: for Red Hat OpenShift -:clo: Red Hat OpenShift Logging Operator +:for: for Red{nbsp}Hat OpenShift +:clo: Red{nbsp}Hat OpenShift Logging Operator :loki-op: Loki Operator :es-op: OpenShift Elasticsearch Operator :log-plug: logging Console plugin //observability -:ObservabilityLongName: Red Hat OpenShift Observability +:ObservabilityLongName: Red{nbsp}Hat OpenShift Observability :ObservabilityShortName: Observability // Cluster Monitoring Operator :cmo-first: Cluster Monitoring Operator (CMO) @@ -183,9 +184,9 @@ endif::[] :ServerlessOperatorName: OpenShift Serverless Operator :FunctionsProductName: OpenShift Serverless Functions //service mesh v2 -:product-dedicated: Red Hat OpenShift Dedicated -:product-rosa: Red Hat OpenShift Service on AWS -:SMProductName: Red Hat OpenShift Service Mesh +:product-dedicated: Red{nbsp}Hat OpenShift Dedicated +:product-rosa: Red{nbsp}Hat OpenShift Service on AWS +:SMProductName: Red{nbsp}Hat OpenShift Service Mesh :SMProductShortName: Service Mesh :SMProductVersion: 2.6.7 :MaistraVersion: 2.6 @@ -195,11 +196,11 @@ endif::[] //Service Mesh v1 :SMProductVersion1x: 1.1.18.2 //Windows containers -:productwinc: Red Hat OpenShift support for Windows Containers +:productwinc: Red{nbsp}Hat OpenShift support for Windows Containers // Red Hat Quay Container Security Operator :rhq-cso: Red Hat Quay Container Security Operator // Red Hat Quay -:quay: Red Hat Quay +:quay: Red{nbsp}Hat Quay :sno: single-node OpenShift :sno-caps: Single-node OpenShift :sno-okd: single-node OKD @@ -210,9 +211,8 @@ endif::[] :cgu-operator: TALM :redfish-operator: Bare Metal Event Relay //Formerly known as CodeReady Containers and CodeReady Workspaces -:openshift-local-productname: Red Hat OpenShift Local -:openshift-dev-spaces-productname: Red Hat OpenShift Dev Spaces -// Factory-precaching-cli tool +:openshift-local-productname: Red{nbsp}Hat OpenShift Local +:openshift-dev-spaces-productname: Red{nbsp}Hat OpenShift Dev Spaces :factory-prestaging-tool: factory-precaching-cli tool :factory-prestaging-tool-caps: Factory-precaching-cli tool :openshift-networking: Red Hat OpenShift Networking @@ -254,8 +254,8 @@ endif::[] :coo-full: Cluster Observability Operator :coo-short: COO // ODF -:odf-first: Red Hat OpenShift Data Foundation (ODF) -:odf-full: Red Hat OpenShift Data Foundation +:odf-first: Red{nbsp}Hat OpenShift Data Foundation (ODF) +:odf-full: Red{nbsp}Hat OpenShift Data Foundation :odf-short: ODF :rh-dev-hub: Red Hat Developer Hub // IBU @@ -316,14 +316,14 @@ endif::[] :oci-pca-short: Private Cloud Appliance // Red Hat OpenStack Platform (RHOSP)/OpenStack ifndef::openshift-origin[] -:rh-openstack-first: Red Hat OpenStack Platform (RHOSP) +:rh-openstack-first: Red{nbsp}Hat OpenStack Platform (RHOSP) :rh-openstack: RHOSP endif::openshift-origin[] ifdef::openshift-origin[] :rh-openstack-first: OpenStack :rh-openstack: OpenStack endif::openshift-origin[] -:rhoso-first: Red Hat OpenStack Services on OpenShift (RHOSO) +:rhoso-first: Red{nbsp}Hat OpenStack Services on OpenShift (RHOSO) :rhoso: RHOSO // VMware vSphere :vmw-first: VMware vSphere @@ -375,3 +375,10 @@ endif::openshift-origin[] :hcp: hosted control planes :mce: multicluster engine for Kubernetes Operator :mce-short: multicluster engine Operator +//AI names; OpenShift AI can be used as the family name +:rhoai-full: Red{nbsp}Hat OpenShift AI +:rhoai: RHOAI +:rhoai-diy: Red{nbsp}Hat OpenShift AI Self-Managed +:rhoai-cloud: Red{nbsp}Hat OpenShift AI Cloud Service +:ai-first: artificial intelligence (AI) +//RHEL AI attribute listed with RHEL family \ No newline at end of file diff --git a/_topic_maps/_topic_map_ms.yml b/_topic_maps/_topic_map_ms.yml index c75992c36dec..bd7f6a5759ec 100644 --- a/_topic_maps/_topic_map_ms.yml +++ b/_topic_maps/_topic_map_ms.yml @@ -103,21 +103,6 @@ Topics: - Name: Listing update package contents File: microshift-list-update-contents --- -Name: Support -Dir: microshift_support -Distros: microshift -Topics: -- Name: The etcd service - File: microshift-etcd -- Name: The sos report tool - File: microshift-sos-report -- Name: Getting your cluster ID - File: microshift-getting-cluster-id -- Name: Getting support - File: microshift-getting-support -- Name: Remote health monitoring with a connected cluster - File: microshift-remote-cluster-monitoring ---- Name: Configuring Dir: microshift_configuring Distros: microshift @@ -209,6 +194,13 @@ Topics: - Name: Understanding storage migration File: microshift-storage-migration --- +Name: Using AI models +Dir: microshift_ai +Distros: microshift +Topics: +- Name: Using artificial intelligence with MicroShift + File: microshift-rhoai +--- Name: Running applications Dir: microshift_running_apps Distros: microshift @@ -250,6 +242,21 @@ Topics: - Name: Automated recovery from manual backups File: microshift-auto-recover-manual-backup --- +Name: Support +Dir: microshift_support +Distros: microshift +Topics: +- Name: The etcd service + File: microshift-etcd +- Name: The sos report tool + File: microshift-sos-report +- Name: Getting your cluster ID + File: microshift-getting-cluster-id +- Name: Getting support + File: microshift-getting-support +- Name: Remote health monitoring with a connected cluster + File: microshift-remote-cluster-monitoring +--- Name: Troubleshooting Dir: microshift_troubleshooting Distros: microshift diff --git a/microshift_ai/_attributes b/microshift_ai/_attributes new file mode 120000 index 000000000000..f27fd275ea6b --- /dev/null +++ b/microshift_ai/_attributes @@ -0,0 +1 @@ +../_attributes/ \ No newline at end of file diff --git a/microshift_ai/images b/microshift_ai/images new file mode 120000 index 000000000000..e4c5bd02a10a --- /dev/null +++ b/microshift_ai/images @@ -0,0 +1 @@ +../images/ \ No newline at end of file diff --git a/microshift_ai/microshift-rhoai.adoc b/microshift_ai/microshift-rhoai.adoc new file mode 100644 index 000000000000..bbc2b977ff0e --- /dev/null +++ b/microshift_ai/microshift-rhoai.adoc @@ -0,0 +1,93 @@ +:_mod-docs-content-type: ASSEMBLY +[id="microshift-rh-openshift-ai"] +include::_attributes/attributes-microshift.adoc[] += Using {rhoai-full} with {microshift-short} +:context: microshift-rh-openshift-ai + +toc::[] + +Learn how to serve artificial intelligence and machine learning (AI/ML) models with {ai-first} on your {microshift-short} edge deployments. + +:FeatureName: {rhoai-full} +include::snippets/technology-preview.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-con.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-workflow.adoc[leveloffset=+1] + +//additional resources for rhoai-workflow module +[role="_additional-resources"] +.Additional resources + +* xref:../microshift_networking/microshift-configuring-routes.adoc#microshift-configuring-routes[Configuring routes] + +include::modules/microshift-rhoai-install.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-create-ns.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-model-package-oci.adoc[leveloffset=+1] + +//additional resources for rhoai-oci module +[role="_additional-resources"] +.Additional resources + +* link:https://kserve.github.io/website/latest/modelserving/storage/oci/[Serving models with OCI images] (KServe documentation) + +include::modules/microshift-rhoai-serving-ai-models-con.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-supported-crds.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-supported-mserv-runtimes.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-servingruntimes-ex.adoc[leveloffset=+1] + +//additional resources for serving runtimes procedure module +[role="_additional-resources"] +.Additional resources + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/latest/html/serving_models/about-model-serving_about-model-serving#about-model-serving_about-model-serving[About model serving] ({rhoai-full} documentation) + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/latest/html/serving_models/serving-large-models_serving-large-models#servingruntime[Model-serving runtimes] ({rhoai-full} documentation) + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/latest/html/serving_models/serving-large-models_serving-large-models[Serving models on the single-model serving platform] ({rhoai-full} documentation) + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_cloud_service/latest/html/serving_models/serving-large-models_serving-large-models#tested-verified-runtimes_serving-large-models[Tested and verified model-serving runtimes] ({rhoai-full} documentation) +//the `2-latest` link is not working (2-latest in place of `1`) + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/latest/html/serving_models/serving-large-models_serving-large-models#adding-a-tested-and-verified-model-serving-runtime-for-the-single-model-serving-platform_serving-large-models[Adding a tested and verified model-serving runtime for the single-model serving platform] ({rhoai-full} documentation) + +* link:https://kserve.github.io/website/0.8/modelserving/servingruntimes/[Serving Runtimes] (KServe documentation) + +* link:https://kserve.github.io/website/latest/modelserving/data_plane/v1_protocol/[V1 Inference Protocol] (KServe documentation) + +* link:https://kserve.github.io/website/latest/modelserving/data_plane/v2_protocol/[Open Inference Protocol (V2)] (KServe documentation) + +include::modules/microshift-rhoai-inferenceservice-ex.adoc[leveloffset=+1] + +include::modules/microshift-inferenceservice-more-options.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-model-serving-rt-verify.adoc[leveloffset=+1] + +//additional resources for inferenceservice modules +.Additional resources + +* link:https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/latest/html/serving_models/serving-large-models_serving-large-models#inferenceservice[InferenceService] ({rhoai-full} documentation) + +include::modules/microshift-rhoai-create-route.adoc[leveloffset=+1] + +//additional resources for creating a route +.Additional resources + +* xref:../microshift_networking/microshift-configuring-routes.adoc#microshift-configuring-routes[Configuring routes] + +include::modules/microshift-rhoai-query-model-con.adoc[leveloffset=+1] + +include::modules/microshift-rhoai-verify-model-connected.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-get-model-ready-inference.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-query-model.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-get-model-server-metrics.adoc[leveloffset=+2] + +include::modules/microshift-rhoai-override-kserve-config.adoc[leveloffset=+1] diff --git a/microshift_ai/modules b/microshift_ai/modules new file mode 120000 index 000000000000..43aab75b53c9 --- /dev/null +++ b/microshift_ai/modules @@ -0,0 +1 @@ +../modules/ \ No newline at end of file diff --git a/microshift_ai/snippets b/microshift_ai/snippets new file mode 120000 index 000000000000..9d58b92e5058 --- /dev/null +++ b/microshift_ai/snippets @@ -0,0 +1 @@ +../snippets/ \ No newline at end of file diff --git a/microshift_networking/microshift-configuring-routes.adoc b/microshift_networking/microshift-configuring-routes.adoc index 89242164b41f..4a4759c231e1 100644 --- a/microshift_networking/microshift-configuring-routes.adoc +++ b/microshift_networking/microshift-configuring-routes.adoc @@ -6,7 +6,7 @@ include::_attributes/attributes-microshift.adoc[] toc::[] -You can configure routes for {microshift-short} for clusters. +You can configure routes for services to have {microshift-short} cluster access. //OCP module, edit with care; Creating an insecure/http route include::modules/microshift-nw-create-http-based-route.adoc[leveloffset=+1] diff --git a/modules/microshift-inferenceservice-more-options.adoc b/modules/microshift-inferenceservice-more-options.adoc new file mode 100644 index 000000000000..1f949fa9060c --- /dev/null +++ b/modules/microshift-inferenceservice-more-options.adoc @@ -0,0 +1,29 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-inferenceservice-more-options_{context}"] += More InferenceService CRD options + +The inference service YAML file can include many different options. For example, you can include a `resources` section that is passed first to the deployment and then to the pod, so that the model server gets access to your hardware through the device plugin. + +.Example NVIDIA device `resources` snippet in an `InferenceService` CR +[source,yaml] +---- +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: is-name +spec: + predictor: + model: + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 +#... +---- + +For complete `InferenceService` specifications, see the link:https://kserve.github.io/website/latest/reference/api/[Control Plane API Reference] (KServe documentation). diff --git a/modules/microshift-rhoai-con.adoc b/modules/microshift-rhoai-con.adoc new file mode 100644 index 000000000000..2bfeb126b47b --- /dev/null +++ b/modules/microshift-rhoai-con.adoc @@ -0,0 +1,39 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-con_{context}"] += How {rhoai-full} works in {microshift-short} + +Edge deployments are where data happens and decisions need to be made. You can use {rhoai-full} ({rhoai}) to integrate a fleet of {microshift-short}-driven edge devices into the artificial intelligence and machine learning (AI/ML) operations cycle. {microshift-short} is compatible with a single-model serving platform based on the KServe component of Kubernetes. KServe is a platform that orchestrates model serving. + +{rhoai} is a platform for data scientists and developers of AI/ML applications. First, use {rhoai} in the cloud or data center to develop, train, and test an AI model. Then, run your model in your edge deployments on {microshift-short}. + +After you deploy your AI model, application data can be sent to the model where the model can make data-driven decisions without a human user. This is an ideal scenario for edge applications where interaction with an administrator is naturally limited. + +Implemented with KServe:: + +The KServe component includes model-serving runtimes that implement the loading of various types of model servers. These runtimes are configured with custom resources (CRs). KServe custom resource definitions (CRDs) also define the lifecycle of the deployment object, storage access, and networking setup. + +Specifics of using {rhoai} with {microshift-short}:: + +As an edge-optimized Kubernetes deployment, {microshift-short} has the following limitations when using {rhoai}: + +* AI model serving on {microshift-short} is only available on the x86_64 architecture. + +* A subset of {rhoai} Operator components are supported on {microshift-short}. + +* As a single-node Kubernetes distribution, {microshift-short} does not support multi-model deployments. You must use the single-model serving platform. + +* You must develop the AI models you want to run on the {microshift-short} model-serving platform in the cloud or your data center. Using {microshift-short} as a development platform for AI models is not supported. + +* You must plan for any additional RAM, disk space, and storage configurations required to serve your AI model. + +* Not all model servers support the IPv6 networking protocol. Check each model server's documentation to verify that your networking configuration is supported. + +* You must secure the exposed model server endpoint, for example, with OAUTH2. + +* `ClusterServingRuntimes` CRDs are not supported by {rhoai}, which means that you must copy the `ServingRuntime` CR shipped within the `microshift-ai-model-serving` RPM to your workload namespace. + +* To administer model serving on {microshift-short}, you must use the CLI. The {rhoai} dashboard is not supported. diff --git a/modules/microshift-rhoai-create-ns.adoc b/modules/microshift-rhoai-create-ns.adoc new file mode 100644 index 000000000000..1ae8e7d3e58d --- /dev/null +++ b/modules/microshift-rhoai-create-ns.adoc @@ -0,0 +1,41 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-create-namespace_{context}"] += Creating a namespace for your AI model on {microshift-short} + +Create a namespace for your AI model and all other resources. + +.Prerequisites + +* You have root user access to your machine. +* The {oc-first} is installed. + +.Procedure + +* Create a new namespace by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ oc create ns __ <1> +---- +<1> Replace `__` with the namespace name you want to use. In the following examples, `ai-demo` is used. + +.Verification + +* Verify that you created the desired namespace by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ oc get ns __ <1> +---- +<1> Replace `__` with the namespace name you want to use. In the following examples, `ai-demo` is used. ++ +.Example output +[source,text] +---- +NAME STATUS AGE +ai-demo Active 1h +---- diff --git a/modules/microshift-rhoai-create-route.adoc b/modules/microshift-rhoai-create-route.adoc new file mode 100644 index 000000000000..57ed9e8b7835 --- /dev/null +++ b/modules/microshift-rhoai-create-route.adoc @@ -0,0 +1,45 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-create-route_{context}"] += Creating a route to use for AI queries in {microshift-short} + +Create a route so that your AI model can receive queries and give output. You can either use the `oc expose svc` command or create a definition in a YAML file and apply it. + +.Prerequisites + +* You have root user access to your machine. +* The {oc-first} is installed. + +.Procedure + +* Create a route using the following command: ++ +[source,terminal] +---- +$ oc expose svc -n ai-demo ovms-resnet50-predictor +---- ++ +.Example output +[source,terminal] +---- +route.route.openshift.io/ovms-resnet50-predictor exposed +---- + +.Verification + +* Verify that the route you created exists by running the following command: ++ +[source,terminal] +---- +$ oc get route -n ai-demo +---- ++ +.Example output +[source,terminal] +---- +NAME HOST ADMITTED SERVICE TLS +ovms-resnet50-predictor ovms-resnet50-predictor-ai-demo.apps.example.com True ovms-resnet50-predictor +---- diff --git a/modules/microshift-rhoai-get-model-ready-inference.adoc b/modules/microshift-rhoai-get-model-ready-inference.adoc new file mode 100644 index 000000000000..723267b2e139 --- /dev/null +++ b/modules/microshift-rhoai-get-model-ready-inference.adoc @@ -0,0 +1,44 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-get-model-ready-inference_{context}"] += Getting your AI model ready for inference + +Before querying your AI model through the API, get the model ready to provide answers based on the the training data. The following examples continue with the OVMS model. + +.Prerequisites + +* The {microshift-short} cluster is running. +* You have the `xxd` utility, which is part of the `vim-common` package. +* You configured the model-serving runtime. +* You uploaded your AI model to {microshift-short}. + +.Procedure + +. Download an image of a bee from the {ovms} examples by running the following command: ++ +[source,terminal] +---- +$ curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/bee.jpeg +---- + +. Create the request data by running the following script: ++ +[source,bash] +---- +IMAGE=./bee.jpeg +REQ=./request.json + +# Add an inference header +echo -n '{"inputs" : [{"name": "0", "shape": [1], "datatype": "BYTES"}]}' > "${REQ}" +# Get the size of the inference header <1> +HEADER_LEN="$(stat -c %s "${REQ}")" +# Add size of the data (image) in binary format (4 bytes, little endian) <2> +printf "%08X" $(stat --format=%s "${IMAGE}") | sed 's/\(..\)/\1\n/g' | tac | tr -d '\n' | xxd -r -p >> "${REQ}" +# Add the data, that is, append the image to the request file +cat "${IMAGE}" >> "${REQ}" +---- +<1> The inference header size must be passed to {ovms} later in the form of an HTTP header. +<2> The {ovms} requires 4 bytes in little endian byte order. diff --git a/modules/microshift-rhoai-get-model-server-metrics.adoc b/modules/microshift-rhoai-get-model-server-metrics.adoc new file mode 100644 index 000000000000..46b370849906 --- /dev/null +++ b/modules/microshift-rhoai-get-model-server-metrics.adoc @@ -0,0 +1,32 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-get-model-server-metrics_{context}"] += Getting the model-server metrics + +After making a query, you can get the model server's metrics to identify bottlenecks, optimize resource allocation, and ensure efficient infrastructure utilization. + +.Prerequisites + +* The {microshift-short} cluster is running. +* There have been enough queries to provide the metrics data you want to see. + +.Procedure + +* Get the Prometheus-format metrics of the model server by making a request on the `/metrics` endpoint by running the following command: ++ +[source,terminal] +---- +$ curl "${DOMAIN}/metrics" --connect-to "${DOMAIN}::${IP}:" +---- ++ +.Partial example output +[source,terminal] +---- +# HELP ovms_requests_success Number of successful requests to a model or a DAG. +# TYPE ovms_requests_success counter +ovms_requests_success{api="KServe",interface="REST",method="ModelReady",name="ovms-resnet50"} 4 +ovms_requests_success{api="KServe",interface="REST",method="ModelMetadata",name="ovms-resnet50",version="1"} 1 +---- diff --git a/modules/microshift-rhoai-inferenceservice-ex.adoc b/modules/microshift-rhoai-inferenceservice-ex.adoc new file mode 100644 index 000000000000..8ad5e1acf9ce --- /dev/null +++ b/modules/microshift-rhoai-inferenceservice-ex.adoc @@ -0,0 +1,61 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-inferenceservice-ex_{context}"] += Creating an InferenceService custom resource + +Create an `InferenceService` custom resource (CR) to instruct KServe how to create a deployment for serving your AI model. KServe uses the `ServingRuntime` based on the `modelFormat` value specified in the `InferenceService` CR. + +.Prerequisites + +* You configured the `ServingRuntimes` CR. +* You have root user access to your machine. +* The {oc-first} is installed. + +.Procedure + +. Create the `InferenceService` CR. ++ +.Example `InferenceService` object with an `openvino_ir` model format +[source,yaml] +---- +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: ovms-resnet50 +spec: + predictor: + model: + protocolVersion: v2 + modelFormat: + name: openvino_ir + storageUri: "oci://localhost/ovms-resnet50:test" + args: + - --layout=NHWC:NCHW # <1> +---- +<1> An additional argument to make {ovms} ({ov}) accept the request input data in a different layout than the model was originally exported with. Extra arguments are passed through to the {ov} container. + +. Save the the `InferenceService` example to a file, then create it on the cluster by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ oc create -n __ -f ./FILE.yaml <1> +---- +<1> Replace __ with your namespace name. ++ +.Example output +[source,terminal] +---- +inferenceservice.serving.kserve.io/ovms-resnet50 created +---- ++ +[NOTE] +==== +A deployment and a pod are expected to appear in the specified namespace. Depending on the size of the image specified in the `ServingRuntime` CR and the size of the ModelCar OCI image, it might take several minutes for the pod to be ready. +==== + +.Next step + +* Verify that the model-serving runtime is ready. diff --git a/modules/microshift-rhoai-install.adoc b/modules/microshift-rhoai-install.adoc new file mode 100644 index 000000000000..d48f2ba7c376 --- /dev/null +++ b/modules/microshift-rhoai-install.adoc @@ -0,0 +1,81 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-install_{context}"] += Installing the {rhoai-full} RPM + +To use AI models in {microshift-short} deployments, use this procedure to install the {rhoai-full} ({rhoai}) RPM with a new {microshift-short} installation. You can also install the RPM on an existing {microshift-short} instance if you restart the system. + +:FeatureName: {rhoai-full} +include::snippets/technology-preview.adoc[leveloffset=+1] + +//[IMPORTANT] +//==== +//Installing the `microshift-ai-model-serving` RPM before running the `$ systemctl start microshift` command for the first time can cause {microshift-short} to fail to start. However, {microshift-short} automatically restarts successfully in this case. +//==== + +.Prerequisites + +* The system requirements for installing {microshift-short} have been met. +* You have root user access to your machine. +* The {oc-first} is installed. +* You configured your LVM VG with the capacity needed for the PVs of your workload. +* You have the RAM and disk space required for your AI model. +* You configured the required accelerators, hardware, operating system, and {microshift-short} to provide the resources your model needs. +* Your AI model is ready to use. + +[NOTE] +==== +The `microshift-ai-model-serving` RPM contains manifests that deploy `kserve`, with the raw deployment mode enabled, and `ServingRuntimes` objects in the `redhat-ods-applications` namespace. +==== + +.Procedure + +. Install the {microshift-short} AI-model-serving RPM package by running the following command: ++ +[source,terminal] +---- +$ sudo dnf install microshift-ai-model-serving +---- + +. As a root user, restart the {microshift-short} service by entering the following command: ++ +[source,terminal] +---- +$ sudo systemctl restart microshift +---- + +. Optional: Install the release information package by running the following command: ++ +[source,terminal] +---- +$ sudo dnf install microshift-ai-model-serving-release-info <1> +---- +<1> The release information package contains a JSON file with image references useful for offline procedures or deploying copy of a `ServingRuntime` to your namespace during a bootc image build. + +.Verification + +* Verify that the `kserve` pod is running in the `redhat-ods-applications` namespace by entering the following command: ++ +[source,terminal] +---- +$ oc get pods -n redhat-ods-applications +---- ++ +.Example output ++ +[source,text] +---- +NAME READY STATUS RESTARTS AGE +kserve-controller-manager-7fc9fc688-kttmm 1/1 Running 0 1h +---- + +.Next steps + +* Create a namespace for your AI model. +* Package your model into an OCI image. +* Configure a model-serving runtime. +* Verify that your model is ready for inferencing. +* Make requests against the model server. diff --git a/modules/microshift-rhoai-model-package-oci.adoc b/modules/microshift-rhoai-model-package-oci.adoc new file mode 100644 index 000000000000..0818efc5c18e --- /dev/null +++ b/modules/microshift-rhoai-model-package-oci.adoc @@ -0,0 +1,118 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-model-package-oci_{context}"] += Packaging your AI model into an OCI image + +You can package your model into an OCI image and use the ModelCar approach to help you set up offline environments. With the ModelCar approach, your model can be embedded just like any other container image. + +[NOTE] +==== +If you already have S3-compatible object storage or a configured persistent volume claim, you can upload your AI model to those resources, but only the ModelCar approach is tested and supported. +==== + +.Prerequisites + +* You have root user access to your machine. +* The {oc-first} is installed. +* Podman is installed. +* Your model is ready to use. +* You understand the concepts in the "How to build a ModelCar container" section of the following article about building an OCI image suitable for an vLLM model server, link:https://developers.redhat.com/articles/2025/01/30/build-and-deploy-modelcar-container-openshift-ai[Build and deploy a ModelCar container in OpenShift AI]. ++ +[NOTE] +==== +The exact directory structure depends on the model server. The following example uses a Containerfile with a ResNet-50 model that is compatible with the {ovms} {ov}. {ov} generally does not require an additional hardware accelerator. +==== + +.Procedure + +. Prepare a Containerfile with a compatible model and model server. ++ +.Example Containerfile with a ResNet-50 model used with the OVMS ++ +[source,text] +---- +FROM registry.access.redhat.com/ubi9/ubi-minimal:latest +RUN microdnf install -y wget && microdnf clean all +RUN mkdir -p /models/1 && chmod -R 755 /models/1 +RUN wget -q -P /models/1 \ + https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.bin \ + https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.xml +---- + +. Set the `IMAGE_REF` environment variable to simplify your process by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ IMAGE_REF=__ <1> +---- +<1> Replace `__` with the name of your image reference. In this example, the `__` format is used. Your image reference name is specific to your use case. + +. Build the Containerfile by running the following command: ++ +[source,terminal] +---- +$ sudo podman build -t $IMAGE_REF <1> +---- +<1> Because CRI-O and Podman share storage, using `sudo` is required to make the image part of the root's container storage and usable by {microshift-short}. ++ +.Example output: ++ +[source,text] +---- +STEP 1/4: FROM registry.access.redhat.com/ubi9/ubi-minimal:latest +Trying to pull registry.access.redhat.com/ubi9/ubi-minimal:latest... +Getting image source signatures +Checking if image destination supports signatures +Copying blob 533b69cfd644 done | +Copying blob 863e9a7e2102 done | +Copying config 098048e6f9 done | +Writing manifest to image destination +Storing signatures +STEP 2/4: RUN microdnf install -y wget && microdnf clean all +<< SNIP >> +--> 4c74352ad42e +STEP 3/4: RUN mkdir -p /models/1 && chmod -R 755 /models/1 +--> bfd31acb1e81 +STEP 4/4: RUN wget -q -P /models/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.bin https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.xml +COMMIT ovms-resnet50:test +--> 375b265c1c4b +Successfully tagged localhost/ovms-resnet50:test +375b265c1c4bc6f0a059c8739fb2b3a46e1b563728f6d9c51f26f29bb2c87 +---- + +. Optional: Push the Containerfile to your registry by running the following command: ++ +[source,terminal] +---- +$ sudo podman push $IMAGE_REF +---- ++ +[IMPORTANT] +==== +For offline use cases, include a tag other than `latest`. If the `latest` tag is used, the container that fetches and sets up the model is configured with the `imagePullPolicy:` parameter set to `Always` and the local image is ignored. If you use any other tag than `latest`, the `imagePullPolicy:` parameter is set to `IfNotPresent`. +==== + +.Verification + +* Verify that the image exists by running the following command: ++ +[source,terminal] +---- +$ sudo podman images ovms-resnet50 +---- ++ +.Example output +[source,text] +---- +REPOSITORY TAG IMAGE ID CREATED SIZE +localhost/ovms-resnet50 test 375b265c1c4b 3 minutes ago 136 MB +---- + +.Next steps + +* Configure a model-serving runtime. +* Confirm that your AI model is ready for inferencing. +* Make requests against the model server. diff --git a/modules/microshift-rhoai-model-serving-rt-verify.adoc b/modules/microshift-rhoai-model-serving-rt-verify.adoc new file mode 100644 index 000000000000..784b3d9b2441 --- /dev/null +++ b/modules/microshift-rhoai-model-serving-rt-verify.adoc @@ -0,0 +1,78 @@ + +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-model-serving-rt-verify_{context}"] += Verifying that the model-serving runtime is ready + +Verify that your model-serving runtime is ready for use by checking that the downstream generation activities are complete. + +.Prerequisites + +* You configured the `ServingRuntimes` CR. +* You created the `InferenceService` CR. +* You have root user access to your machine. +* The {oc-first} is installed. + +.Procedure + +. Check that the AI model is deployed in your custom namespace by running the following command: ++ +[source,terminal] +---- +$ oc get -n ai-demo deployment +---- ++ +.Example output +[source,terminal] +---- +NAME READY UP-TO-DATE AVAILABLE AGE +ovms-resnet50-predictor 1/1 1 1 72s +---- + +. Confirm that your deployment is in progress by running the following command: ++ +[source,terminal] +---- +$ oc rollout status -n ai-demo deployment ovms-resnet50-predictor +---- ++ +.Example output +[source,terminal] +---- +deployment "ovms-resnet50-predictor" successfully rolled out +---- + +. Check that the AI model workload pod is deployed in your custom namespace by running the following command: ++ +[source,terminal] +---- +$ oc get -n ai-demo pod +---- ++ +.Example output +[source,terminal] +---- +NAME READY STATUS RESTARTS AGE +ovms-resnet50-predictor-6fdb566b7f-bc9k5 2/2 Running 1 (72s ago) 74s +---- + +. Check for the service KServe created by running the following command: ++ +[source,terminal] +---- +$ oc get svc -n ai-demo +---- ++ +.Example output +[source,terminal] +---- +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +ovms-resnet50-predictor ClusterIP None 80/TCP 119s +---- + +.Next step + +* Create a `Route` object so that your applications can reach the {microshift-short} cluster. diff --git a/modules/microshift-rhoai-override-kserve-config.adoc b/modules/microshift-rhoai-override-kserve-config.adoc new file mode 100644 index 000000000000..a4bd1e2bffbc --- /dev/null +++ b/modules/microshift-rhoai-override-kserve-config.adoc @@ -0,0 +1,35 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-override-kserve-config_{context}"] += Overriding KServe configuration + +If you want to override KServe settings to customize your model-serving environment, you can follow the general steps for your operating system. + +Option 1:: + +. Make a copy of the existing `ConfigMap` file, `inferenceservice-config`, in the `redhat-ods-applications` namespace. + +. Edit the settings you want to change. + +. Overwrite the existing `ConfigMap` object. + +. Restart KServe by either by deleting the pod or scaling the `Deployment` pod parameter down to `0` and then back up to `1`. + +Option 2:: + +. Copy the `ConfigMap` file, `/usr/lib/microshift/manifests.d/010-microshift-ai-model-serving-kserve/inferenceservice-config-microshift-patch.yaml`. + +. Edit the settings you want to change. + +. Apply the `ConfigMap` object. + +. Restart KServe by either by deleting the pod or scaling the `Deployment` pod parameter down to `0` and then back up to `1`. + +For {op-system-ostree} and {op-system-image} systems:: + +. Create a new manifest with the `ConfigMap` file, based on either of the `/usr/lib/microshift/manifests.d/010-microshift-ai-model-serving-kserve/inferenceservice-config-microshift-patch.yaml` or `inferenceservice-config` files, in the `redhat-ods-applications` namespace. + +. Ensure that the new manifest is placed in the `/usr/lib/microshift/manifests.d/` directory. Staring with prefix `011` is recommended so that your manifest is applied after the `/usr/lib/microshift/manifests.d/010-microshift-ai-model-serving-kserve/` directory contents. diff --git a/modules/microshift-rhoai-query-model-con.adoc b/modules/microshift-rhoai-query-model-con.adoc new file mode 100644 index 000000000000..4bbdc9caaa02 --- /dev/null +++ b/modules/microshift-rhoai-query-model-con.adoc @@ -0,0 +1,13 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-query-model-con_{context}"] += About querying your AI model + +Querying your model through the API is also called model inferencing. Model inferencing is most often used to retrieve information, automate tasks, make predictions, provide data insights, or perform actions. + +In general, queries must be constructed using a format compatible with the AI model being used. A model-serving runtime formats queries automatically. The model processes the query according to the underlying training and data, then provides an output. The output is expected to align with the purpose of the model itself, whether that be to give an answer, make a prediction, or perform a task. + +The following examples outline general steps to make sure your model is ready for inferencing, and what you might expect in a query output from the serving runtime. diff --git a/modules/microshift-rhoai-query-model.adoc b/modules/microshift-rhoai-query-model.adoc new file mode 100644 index 000000000000..bfa6769162de --- /dev/null +++ b/modules/microshift-rhoai-query-model.adoc @@ -0,0 +1,77 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-query-model_{context}"] += Querying your AI model + +Make an inference request against the AI model server that is using the `ovms-resnet50` model. + +.Prerequisites + +* The {microshift-short} cluster is running. +* You configured the model-serving runtime. +* You uploaded your AI model to {microshift-short}. + +.Procedure + +* Make an inference request against the model server that is using the `ovms-resnet50` model by running the following command: ++ +[source,terminal] +---- +$ curl \ + --data-binary "@./request.json" \ + --header "Inference-Header-Content-Length: ${HEADER_LEN}" \ + "${DOMAIN}/v2/models/ovms-resnet50/infer" \ + --connect-to "${DOMAIN}::${IP}:" > response.json +---- ++ +.Example inferencing output, saved to a `response.json` ++ +[source,json] +---- +{ + "model_name": "ovms-resnet50", + "model_version": "1", + "outputs": [{ + "name": "1463", + "shape": [1, 1000], + "datatype": "FP32", + "data": [ ....... ] <1> + }] +} +---- +<1> The contents of `.outputs[0].data` were omitted from the example for brevity. + +.Verification + +. To determine the model's prediction, get the index of the highest element in the `.outputs[0].data` to determine the model's predicted value by using the following Python script: ++ +[source,python] +---- +import json +with open('response.json') as f: + response = json.load(f) +data = response["outputs"][0]["data"] +argmax = data.index(max(data)) +print(argmax) +---- ++ +.Example output +[source,text] +---- +309 <1> +---- +<1> In this example, the element labeled `309` is the model's response. + +. Validate the output against link:https://github.com/openvinotoolkit/model_server/blob/main/client/common/resnet_input_images.txt[resnet's input data], for example: ++ +[source,text] +---- +../../../../demos/common/static/images/bee.jpeg 309 +---- + +.Next steps + +* Optional. Query the AI model using other images available in the resnet input data. diff --git a/modules/microshift-rhoai-serving-ai-models-con.adoc b/modules/microshift-rhoai-serving-ai-models-con.adoc new file mode 100644 index 000000000000..94d09fcdc0fe --- /dev/null +++ b/modules/microshift-rhoai-serving-ai-models-con.adoc @@ -0,0 +1,40 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-serving-ai-models-con_{context}"] += Serving AI models on {microshift-short} + +You can serve models on the {rhoai} single-model serving platform in {microshift-short} by configuring a model-serving runtime using the `ServingRuntime` and `InferenceService` custom resource (CRs). + +Model-serving runtimes for AI models in {microshift-short}:: + +A model-serving runtime is an environment for deploying and managing an AI model, providing integration with a specified model server and the model frameworks that it supports. Creating the model-serving runtime means configuring the objects that select the right model format for the AI model and serve the queries, among other detailed functions specific your deployment. + +The `ServingRuntime` custom resource:: + +The `ServingRuntime` CR is a YAML file that defines the templates for pods that can dynamically load and unload AI model formats and exposes a service endpoint for querying the model through the API. Each `ServingRuntime` CR contains the information needed to run AI models, including the container image of the runtime and a list of the model formats that the model-serving runtime supports. Other configuration settings for the model-serving runtime can be set with environment variables defined in the container specification. + +The `InferenceService` custom resource:: + +The `InferenceService` CR is a YAML file that creates a server or inference service to process inference queries, pass them to the model, then return the inference output. In {microshift-short}, the output is returned in the CLI. This inference service configuration file can also include many other options, such as specifying a hardware accelerator. + +[IMPORTANT] +==== +As a single-node Kubernetes distribution, {microshift-short} does not support multi-model deployments. You must use the single-model serving platform. In each {microshift-short} deployment, you can use one AI model, but potentially more than one model runtime. +==== + +Workflow for configuring a model-serving runtime:: + +* Select the model-serving runtime that supports the format of your AI model. + +* Create the `ServingRuntime` CR in your workload namespace. +//CRD is shipped with product; the CR is what users are creating. + +* If the {microshift-short} cluster is already running, you can export the required `ServingRuntime` CR to a file and edit it. + +* If the {microshift-short} cluster is not running, or if you want to manually prepare a manifest, you can use the original definition on the disk, which is is part of the `microshift-ai-model-serving` RPM. + +* Create the `InferenceService` CR in your workload namespace. +//CRD is shipped with product; the CR is what users are creating. diff --git a/modules/microshift-rhoai-servingruntimes-ex.adoc b/modules/microshift-rhoai-servingruntimes-ex.adoc new file mode 100644 index 000000000000..e7a3789f1066 --- /dev/null +++ b/modules/microshift-rhoai-servingruntimes-ex.adoc @@ -0,0 +1,75 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-servingruntimes-ex_{context}"] += Creating a ServingRuntime CR for use in {microshift-short} + +Create a `ServingRuntime` custom resource (CR) based on installed manifests and release information. The included steps are an example of reusing the included `microshift-ai-model-serving` manifest files to re-create the {ovms} ({ov}) model-serving runtime in the workload namespace. + +[NOTE] +==== +This approach does not require a live cluster, so it can be part of CI/CD automation. +==== + +.Prerequisites + +* Both the `microshift-ai-model-serving` and `microshift-ai-model-serving-release-info` RPMs are installed. +* You have root user access to your machine. +* The {oc-first} is installed. + +.Procedure + +. Extract the image reference of the `ServingRuntime` CR you want to use from the {microshift-short} release information file by running the following command: ++ +[source,terminal] +---- +$ OVMS_IMAGE="$(jq -r '.images | with_entries(select(.key == "ovms-image")) | .[]' /usr/share/microshift/release/release-ai-model-serving-"$(uname -i)".json)" <1> +---- +<1> In this example, the image reference for the {ov} model-serving runtime is extracted. + +. Copy the original `ServingRuntime` YAML file by running the following command: ++ +[source,terminal] +---- +$ cp /usr/lib/microshift/manifests.d/050-microshift-ai-model-serving-runtimes/ovms-kserve.yaml ./ovms-kserve.yaml +---- + +. Add the actual image reference to the `image:` parameter field value of the `ServingRuntime` YAML by running the following command: ++ +[source,terminal] +---- +$ sed -i "s,image: ovms-image,image: ${OVMS_IMAGE}," ./ovms-kserve.yaml +---- + +. Create the `ServingRuntime` object in a custom namespace using the YAML file by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ oc create -n __ -f ./ovms-kserve.yaml <1> +---- +<1> Replace `__` with the name of your namespace. + +[IMPORTANT] +==== +If the `ServingRuntime` CR is part of a new manifest, set the namespace in the `kustomization.yaml` file, for example: + +.Example Kustomize manifest namespace value +[source,yaml] +---- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: ai-demo +resources: + - ovms-kserve.yaml +#... +---- +==== + +.Next steps + +* Create the `InferenceService` object. +* Verify that your model is ready for inferencing. +* Query the model. +* Optional: examine the model metrics. diff --git a/modules/microshift-rhoai-supported-crds.adoc b/modules/microshift-rhoai-supported-crds.adoc new file mode 100644 index 000000000000..9afed7d724ad --- /dev/null +++ b/modules/microshift-rhoai-supported-crds.adoc @@ -0,0 +1,17 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: REFERENCE +[id="microshift-rhoai-supported-crds_{context}"] += Supported {rhoai} custom resource definitions + +The following {rhoai} custom resource definitions (CRDs) are supported: + +* `InferenceServices` +* `TrainedModels` +* `ServingRuntimes` +* `InferenceGraphs` +* `ClusterStorageContainers` +* `ClusterLocalModels` +* `LocalModelNodeGroups` diff --git a/modules/microshift-rhoai-supported-mserv-runtimes.adoc b/modules/microshift-rhoai-supported-mserv-runtimes.adoc new file mode 100644 index 000000000000..7fcc00b7be96 --- /dev/null +++ b/modules/microshift-rhoai-supported-mserv-runtimes.adoc @@ -0,0 +1,26 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: REFERENCE +[id="microshift-rhoai-supported-models_{context}"] += Supported {rhoai} model-serving runtimes + +The following {rhoai} model-serving runtimes are verified for {microshift-short} deployments: + +* vLLM ServingRuntime for KServe +* {ovms} ++ +[IMPORTANT] +==== +The {ovms} does not support the IPv6 networking protocol. Check each model server before use to ensure that it supports your networking configuration. +==== + +The following runtimes are available for development purposes with {microshift-short}: + +* Caikit Text Generation Inference Server (Caikit-TGIS) ServingRuntime for KServe +* Caikit Standalone ServingRuntime for KServe +* Text Generation Inference Server (TGIS) Standalone ServingRuntime for KServe +* vLLM ServingRuntime with Gaudi accelerators support for KServe +* vLLM ROCm ServingRuntime for KServe +* Custom runtimes that you create and test diff --git a/modules/microshift-rhoai-verify-model-connected.adoc b/modules/microshift-rhoai-verify-model-connected.adoc new file mode 100644 index 000000000000..221a45ccbaec --- /dev/null +++ b/modules/microshift-rhoai-verify-model-connected.adoc @@ -0,0 +1,79 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: PROCEDURE +[id="microshift-rhoai-verify-model-connected_{context}"] += Verifying that your AI model is accessible + +Before querying the model through the API, you can check to be certain that the model is accessible and ready to provide answers based on the connected data. The following examples continue with the {ovms}. + +.Prerequisites + +* You configured the AI model-serving runtime. +* You uploaded your AI model to {microshift-short}. +* The {microshift-short} cluster is running. +* You installed {oc-first}. + +.Procedure + +. Get the IP address of the {microshift-short} cluster and assign it to the `IP` variable as the following example command shows: ++ +[source,terminal] +---- +$ IP=$(oc get nodes -o json | jq -r '.items[0].status.addresses[0].address') +---- + +. Identify the name of the route you created by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +$ oc get route -n ai-test __ -o yaml <1> +---- +<1> Replace `__` with the actual name of your route. + +. Extract and assign the `HOST` value of the route to the `DOMAIN` variable by running the following command: ++ +[source,terminal,subs="+quotes"] +---- +DOMAIN=$(oc get route -n ai-test __ -o=jsonpath="{ .status.ingress[0].host }") <1> +---- +<1> Replace `__` with the actual name of your route. + +. Enable data transfer from the route to the {microshift-short} IP address by running the following command: ++ +[source,terminal] +---- +$ curl -i "${DOMAIN}/v2/models/ovms-resnet50/ready" --connect-to "${DOMAIN}::${IP}:" <1> +---- +<1> Instead of using the `--connect-to "${DOMAIN}::${IP}:"` flag, you can also use real DNS, or add the IP address and the domain to the `/etc/hosts` file. ++ +.Example output +[source,text] +---- +HTTP/1.1 200 OK +content-type: application/json +date: Wed, 12 Mar 2025 16:01:32 GMT +content-length: 0 +set-cookie: 56bb4b6df4f80f0b59f56aa0a5a91c1a=4af1408b4a1c40925456f73033d4a7d1; path=/; HttpOnly +---- + +. Query the model metadata by running the following command: ++ +[source,terminal] +---- +$ curl "${DOMAIN}/v2/models/ovms-resnet50" --connect-to "${DOMAIN}::${IP}:" +---- ++ +.Example output: +[source,json] +---- +{"name":"ovms-resnet50","versions":["1"],"platform":"OpenVINO","inputs":[{"name":"0","datatype":"FP32","shape":[1,224,224,3]}],"outputs":[{"name":"1463","datatype":"FP32","shape":[1,1000]}] +---- + +.Next steps + +* Verify that your model is ready for inferencing. +* Query the model. +* Verify the model response. +* Optional: Get the model server metrics. diff --git a/modules/microshift-rhoai-workflow.adoc b/modules/microshift-rhoai-workflow.adoc new file mode 100644 index 000000000000..0f93e1ed5630 --- /dev/null +++ b/modules/microshift-rhoai-workflow.adoc @@ -0,0 +1,50 @@ +// Module included in the following assemblies: +// +// * microshift_ai/microshift-rhoai.adoc + +:_mod-docs-content-type: CONCEPT +[id="microshift-rhoai-workflow_{context}"] += Workflow for using {rhoai} with {microshift-short} + +Using {rhoai} with {microshift-short} requires the following general workflow: + +Getting your AI model ready:: + +* Choose the artificial intelligence (AI) model that best aligns with your edge application and the decisions that need to be made at {microshift-short} deployment sites. +* In the cloud or data center, develop, train, and test your model. +* Plan for the system requirements and additional resources your AI model requires to run. + +Setting up the deployment environment:: + +* Configure your {op-system-bundle} for the specific hardware your deployment runs on, including driver and device plugins. + +* To enable GPU or other hardware accelerators for {microshift-short}, follow the guidance specific for your edge device about what you need to install. For example, to use an NVIDIA GPU accelerator, begin by reading the following NVIDIA documentation: link:https://docs.nvidia.com/datacenter/cloud-native/edge/latest/nvidia-gpu-with-device-edge.html#running-a-gpu-accelerated-workload-on-red-hat-device-edge[Running a GPU-Accelerated Workload on Red Hat Device Edge] (NVIDIA documentation). + +* For troubleshooting, consult the device documentation or product support. ++ +[TIP] +==== +Using only a driver and device plugin instead of an Operator might be more resource efficient. +==== + +Installing the {microshift-short} {rhoai} RPM:: + +* Install the `microshift-ai-model-serving` RPM package. + +* Restart {microshift-short} if you are adding the RPM while {microshift-short} is running. + +Getting ready to deploy:: + +* Package your AI model into an OCI image, otherwise known as the ModelCar format. If you already have S3-compatible storage or a persistent volume claim set up, you can skip this step, but only the ModelCar format is tested and supported for {microshift-short}. + +* Select a model-serving runtime, which acts as your model server. Configure the runtime with the serving runtime and inference service. + +** Copy the `ServingRuntime` custom resource (CR) from the default `redhat-ods-applications` namespace to your own namespace. + +** Create the `InferenceService` CR. + +* Optional: Create a `Route` object so that your model can connect outside the cluster. + +Using your model:: + +* Make requests against the model server. For example, another pod running in your {microshift-short} deployment that is attached to a camera can stream an image back to the model-serving runtime. The model-serving runtime prepares that image as data for model inferencing. If the model was trained in the binary identification of a bee, the AI model outputs the likelihood that the image data is a bee.