diff --git a/.github/workflows/release_wheel.yml b/.github/workflows/release_wheel.yml index 8712de0d4..aa24e59c2 100644 --- a/.github/workflows/release_wheel.yml +++ b/.github/workflows/release_wheel.yml @@ -34,6 +34,8 @@ jobs: torch: "2.3" - python: "3.8" # torch 2.5+ drops python 3.8 torch: "2.5" + - cuda: "12.4" + torch: "2.5" runs-on: [self-hosted] steps: diff --git a/.github/workflows/release_wheel_sglang.yml b/.github/workflows/release_wheel_sglang.yml new file mode 100644 index 000000000..378f737d3 --- /dev/null +++ b/.github/workflows/release_wheel_sglang.yml @@ -0,0 +1,95 @@ +name: Release +on: + workflow_dispatch: + inputs: + tag_name: + required: true + type: string + workflow_call: + inputs: + tag_name: + required: true + type: string + secrets: + WHL_TOKEN: + required: true + +env: + TORCH_CUDA_ARCH_LIST: "7.5 8.0 8.9 9.0+PTX" + +jobs: + build: + strategy: + fail-fast: false + matrix: + cuda: ["12.4"] + torch: ["2.5"] + + runs-on: [self-hosted] + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Build wheel + run: | + chown -R $CI_UID:$CI_GID "$GITHUB_WORKSPACE" + docker run --rm -t \ + -v "$CI_RUNNER_CACHE_DIR":/ci-cache \ + -v "$GITHUB_WORKSPACE":/app \ + -e FLASHINFER_CI_CACHE=/ci-cache \ + -e FLASHINFER_CI_CUDA_VERSION=${{ matrix.cuda }} \ + -e FLASHINFER_CI_TORCH_VERSION=${{ matrix.torch }} \ + -e TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" \ + -e MAX_JOBS=128 \ + --user $CI_UID:$CI_GID \ + pytorch/manylinux-builder:cuda${{ matrix.cuda }} \ + bash /app/scripts/run-ci-build-wheel.sh + timeout-minutes: 120 + - run: du -h dist/* + + - uses: actions/upload-artifact@v4 + with: + name: wheel-cuda${{ matrix.cuda }}-torch${{ matrix.torch }} + path: dist/* + + release: + needs: build + runs-on: [self-hosted] + steps: + - uses: actions/download-artifact@v4 + with: + path: dist/ + merge-multiple: true + pattern: wheel-* + + - run: ls -lah dist/ + + - uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ inputs.tag_name }} + files: | + dist/flashinfer*.whl + + - uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ inputs.tag_name }} + files: | + dist/flashinfer-*.tar.gz + + - name: Clone wheel index + run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl + env: + WHL_TOKEN: ${{ secrets.WHL_TOKEN }} + + - name: Update wheel index + run: python3 scripts/update_whl_index.py + + - name: Push wheel index + run: | + cd flashinfer-whl + git config --local user.name "github-actions[bot]" + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add -A + git commit -m "update whl" + git push