chore(docker): reduce size between docker builds (#7571)

ebr · web-flow · commit 4de6fd3ae665 · 2025-03-04T20:42:28.000-05:00
by adding a layer with all the pytorch dependencies that don't change most of the time. ## Summary Every time the [`main` docker images](https://github.com/invoke-ai/InvokeAI/pkgs/container/invokeai) rebuild and I pull `main-cuda`, it gets another 3+ GB, which seems like about a zillion times too much since most things don't change from one commit on `main` to the next. This is an attempt to follow the guidance in [Using uv in Docker: Intermediate Layers](https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers) so there's one layer that installs all the dependencies—including PyTorch with its bundled nvidia libraries—_before_ the project's own frequently-changing files are copied in to the image. ## Related Issues / Discussions - [Improved docker layer cache with uv](https://discord.com/channels/1020123559063990373/1329975172022927370) - [astral: Can `uv pip install` torch, but not `uv sync` it](https://discord.com/channels/1039017663004942429/1329986610770612347) ## QA Instructions Hopefully the CI system building the docker images is sufficient. But there is one change to `pyproject.toml` related to xformers, so it'd be worth checking that `python -m xformers.info` still says it has triton on the platforms that expect it. ## Merge Plan I don't expect this to be a disruptive merge. (An earlier revision of this PR moved the venv, but I've reverted that change at ebr's recommendation.) ## Checklist - [ ] _The PR has a short but descriptive title, suitable for a changelog_ - [ ] _Tests added / updated (if applicable)_ - [ ] _Documentation added / updated (if applicable)_ - [ ] _Updated `What's New` copy (if doing a release after this PR)_
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -13,48 +13,63 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
         git
 
 # Install `uv` for package management
-COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.6.0 /uv /uvx /bin/
 
 ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 ENV INVOKEAI_SRC=/opt/invokeai
 ENV PYTHON_VERSION=3.11
+ENV UV_PYTHON=3.11
 ENV UV_COMPILE_BYTECODE=1
 ENV UV_LINK_MODE=copy
+ENV UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
+ENV UV_INDEX="https://download.pytorch.org/whl/cu124"
 
 ARG GPU_DRIVER=cuda
-ARG TARGETPLATFORM="linux/amd64"
 # unused but available
 ARG BUILDPLATFORM
 
 # Switch to the `ubuntu` user to work around dependency issues with uv-installed python
 RUN mkdir -p ${VIRTUAL_ENV} && \
     mkdir -p ${INVOKEAI_SRC} && \
-    chmod -R a+w /opt
+    chmod -R a+w /opt && \
+    mkdir ~ubuntu/.cache && chown ubuntu: ~ubuntu/.cache
 USER ubuntu
 
-# Install python and create the venv
-RUN uv python install ${PYTHON_VERSION} && \
-    uv venv --relocatable --prompt "invoke" --python ${PYTHON_VERSION} ${VIRTUAL_ENV}
+# Install python
+RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
+  uv python install ${PYTHON_VERSION}
 
 WORKDIR ${INVOKEAI_SRC}
-COPY invokeai ./invokeai
-COPY pyproject.toml ./
 
-# Editable mode helps use the same image for development:
-# the local working copy can be bind-mounted into the image
-# at path defined by ${INVOKEAI_SRC}
+# Install project's dependencies as a separate layer so they aren't rebuilt every commit.
+# bind-mount instead of copy to defer adding sources to the image until next layer.
+#
 # NOTE: there are no pytorch builds for arm64 + cuda, only cpu
 # x86_64/CUDA is the default
 RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=invokeai/version,target=invokeai/version \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
+      UV_INDEX="https://download.pytorch.org/whl/cpu"; \
+    elif [ "$GPU_DRIVER" = "rocm" ]; then \
+      UV_INDEX="https://download.pytorch.org/whl/rocm6.1"; \
+    fi && \
+    uv sync --no-install-project
+
+# Now that the bulk of the dependencies have been installed, copy in the project files that change more frequently.
+COPY invokeai invokeai
+COPY pyproject.toml .
+
+RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
     if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
-        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cpu"; \
+      UV_INDEX="https://download.pytorch.org/whl/cpu"; \
     elif [ "$GPU_DRIVER" = "rocm" ]; then \
-        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/rocm6.1"; \
-    else \
-        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cu124"; \
+      UV_INDEX="https://download.pytorch.org/whl/rocm6.1"; \
     fi && \
-    uv pip install --python ${PYTHON_VERSION} $extra_index_url_arg -e "."
+    uv sync
+
 
 #### Build the Web UI ------------------------------------
 
@@ -98,6 +113,7 @@ RUN apt update && apt install -y --no-install-recommends \
 
 ENV INVOKEAI_SRC=/opt/invokeai
 ENV VIRTUAL_ENV=/opt/venv
+ENV UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
 ENV PYTHON_VERSION=3.11
 ENV INVOKEAI_ROOT=/invokeai
 ENV INVOKEAI_HOST=0.0.0.0
@@ -109,7 +125,7 @@ ENV CONTAINER_GID=${CONTAINER_GID:-1000}
 # Install `uv` for package management
 # and install python for the ubuntu user (expected to exist on ubuntu >=24.x)
 # this is too tiny to optimize with multi-stage builds, but maybe we'll come back to it
-COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/
+COPY --from=ghcr.io/astral-sh/uv:0.6.0 /uv /uvx /bin/
 USER ubuntu
 RUN uv python install ${PYTHON_VERSION}
 USER root
diff --git a/pyproject.toml b/pyproject.toml
@@ -101,8 +101,7 @@ dependencies = [
 "xformers" = [
   # Core generation dependencies, pinned for reproducible builds.
   "xformers>=0.0.28.post1; sys_platform!='darwin'",
-  # Auxiliary dependencies, pinned only if necessary.
-  "triton; sys_platform=='linux'",
+  # torch 2.4+cu carries its own triton dependency
 ]
 "onnx" = ["onnxruntime"]
 "onnx-cuda" = ["onnxruntime-gpu"]