Skip to content

Commit 69d965a

Browse files
committed
Add makefile and cloudbuild file to build and push lora-syncer
Signed-off-by: Kunjan <[email protected]>
1 parent cd2fc96 commit 69d965a

File tree

3 files changed

+76
-64
lines changed

3 files changed

+76
-64
lines changed

site-src/guides/index.md

-64
Original file line numberDiff line numberDiff line change
@@ -19,70 +19,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
1919
kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
2020
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
2121
```
22-
**OPTIONALLY**: Enable Dynamic loading of Lora adapters.
23-
24-
[Deploy sample vllm deployment with Dynamic lora adapter enabled and Lora syncer sidecar](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/tools/dynamic-lora-sidecar/deployment.yaml)
25-
26-
***Safely rollout v2 adapter***
27-
28-
1. Update lora configmap
29-
30-
``` yaml
31-
32-
apiVersion: v1
33-
kind: ConfigMap
34-
metadata:
35-
name: dynamic-lora-config
36-
data:
37-
configmap.yaml: |
38-
vLLMLoRAConfig:
39-
ensureExist:
40-
models:
41-
- id: chatbot-v1
42-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v1
43-
- id: chatbot-v2
44-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v2
45-
```
46-
47-
2. Configure a canary rollout with traffic split using LLMService. In this example, 10% of traffic to the chatbot model will be sent to v2.
48-
49-
``` yaml
50-
model:
51-
name: chatbot
52-
targetModels:
53-
targetModelName: chatbot-v1
54-
weight: 90
55-
targetModelName: chatbot-v2
56-
weight: 10
57-
```
58-
59-
3. Finish rollout by setting the traffic to the new version 100%.
60-
```yaml
61-
model:
62-
name: chatbot
63-
targetModels:
64-
targetModelName: chatbot-v2
65-
weight: 100
66-
```
67-
68-
4. Remove v1 from dynamic lora configmap.
69-
```yaml
70-
apiVersion: v1
71-
kind: ConfigMap
72-
metadata:
73-
name: dynamic-lora-config
74-
data:
75-
configmap.yaml: |
76-
vLLMLoRAConfig:
77-
ensureExist:
78-
models:
79-
- id: chatbot-v2
80-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v2
81-
ensureNotExist: # Explicitly unregisters the adapter from model servers
82-
models:
83-
- id: chatbot-v1
84-
source: gs://[TEAM-A-MODELS-BUCKET]/chatbot-v1
85-
```
8622

8723

8824

tools/dynamic-lora-sidecar/Makefile

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
IMAGE_NAME := lora-syncer
2+
IMAGE_REGISTRY ?= us-central1-docker.pkg.dev/k8s-staging-images/llm-instance-gateway
3+
IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME)
4+
5+
GIT_TAG ?= $(shell git describe --tags --dirty --always)
6+
EXTRA_TAG ?= $(if $(_PULL_BASE_REF),$(_PULL_BASE_REF),main)
7+
IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG)
8+
EXTRA_IMAGE_TAG ?= $(IMAGE_REPO):$(EXTRA_TAG)
9+
10+
11+
PLATFORMS ?= linux/amd64
12+
13+
14+
DOCKER_BUILDX_CMD ?= docker buildx
15+
IMAGE_BUILD_CMD ?= $(DOCKER_BUILDX_CMD) build
16+
IMAGE_BUILD_EXTRA_OPTS ?=
17+
18+
# --- Targets ---
19+
.PHONY: image-local-build
20+
image-local-build:
21+
BUILDER=$(shell $(DOCKER_BUILDX_CMD) create --use)
22+
$(MAKE) image-build PUSH=$(PUSH)
23+
$(DOCKER_BUILDX_CMD) rm $$BUILDER
24+
25+
.PHONY: image-local-push
26+
image-local-push: PUSH=--push
27+
image-local-push: image-local-build
28+
29+
.PHONY: image-build
30+
image-build:
31+
$(IMAGE_BUILD_CMD) -t $(IMAGE_TAG) \
32+
--platform=$(PLATFORMS) \
33+
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
34+
--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
35+
$(PUSH) \
36+
$(IMAGE_BUILD_EXTRA_OPTS) ./
37+
38+
.PHONY: image-push
39+
image-push: PUSH=--push
40+
image-push: image-build
41+
42+
.PHONY: run
43+
run:
44+
docker run -v $(CURDIR)/config:/config -u appuser $(IMAGE_TAG) # Use the user name
45+
46+
.PHONY: clean
47+
clean:
48+
docker rmi $(IMAGE_TAG) $(EXTRA_IMAGE_TAG) 2>/dev/null || true
49+
50+
.PHONY: clean-dangling
51+
clean-dangling:
52+
docker rmi $(docker images -f "dangling=true" -q) 2>/dev/null || true
53+
54+
.PHONY: test
55+
test:
56+
python -m unittest discover
57+
58+
.PHONY: all
59+
all: test image-build
+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# See https://cloud.google.com/cloud-build/docs/build-config
2+
timeout: 3000s
3+
4+
steps:
5+
- name: gcr.io/k8s-testimages/gcb-docker-gcloud:v20220830-45cbff55bc
6+
entrypoint: make
7+
args:
8+
- image-push
9+
env:
10+
- GIT_TAG=$_GIT_TAG
11+
- EXTRA_TAG=$_PULL_BASE_REF
12+
- DOCKER_BUILDX_CMD=/buildx-entrypoint
13+
14+
substitutions:
15+
_GIT_TAG: '0.0.0' # Default value for Git tag
16+
_PULL_BASE_REF: 'main' # Default value for branch/tag
17+
# No options needed!

0 commit comments

Comments
 (0)