diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index c5c09dcb5..bdc277231 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -67,7 +67,7 @@ jobs:
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
build-linux-x64:
- uses: ./.github/workflows/template-build-linux-x64.yml
+ uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
with:
@@ -79,6 +79,22 @@ jobs:
channel: beta
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: amd64
+
+ build-linux-arm64:
+ uses: ./.github/workflows/template-build-linux.yml
+ secrets: inherit
+ needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ with:
+ ref: ${{ github.ref }}
+ public_provider: github
+ new_version: ${{ needs.get-update-version.outputs.new_version }}
+ runs-on: ubuntu-2004-arm64
+ cmake-flags: "-DCORTEX_VARIANT=beta -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ channel: beta
+ upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
@@ -111,7 +127,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
noti-discord:
- needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release]
+ needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, build-linux-arm64, update_release]
runs-on: ubuntu-latest
permissions:
contents: write
@@ -136,9 +152,13 @@ jobs:
- Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg
- Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg
- Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal.tar.gz
- - Linux Deb:
+ - Linux amd64 Deb:
- Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb
- Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb
- Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64.tar.gz
+ - Linux amd64 Deb:
+ - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-network-installer.deb
+ - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64-local-installer.deb
+ - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-arm64.tar.gz
- Docker: menloltd/cortex:beta-${{ env.VERSION }}
- Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }}
\ No newline at end of file
diff --git a/.github/workflows/cortex-cpp-quality-gate.yml b/.github/workflows/cortex-cpp-quality-gate.yml
index 8a76e4669..fd98930a1 100644
--- a/.github/workflows/cortex-cpp-quality-gate.yml
+++ b/.github/workflows/cortex-cpp-quality-gate.yml
@@ -20,6 +20,12 @@ jobs:
fail-fast: false
matrix:
include:
+ - os: "linux"
+ name: "arm64"
+ runs-on: "ubuntu-2004-arm64"
+ cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+ build-deps-cmake-flags: ""
+ ccache-dir: ""
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
@@ -52,6 +58,7 @@ jobs:
submodules: recursive
- name: use python
+ continue-on-error: true
uses: actions/setup-python@v5
with:
python-version: "3.10"
@@ -90,15 +97,44 @@ jobs:
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
- name: Configure vcpkg
+ if: runner.os != 'Linux'
+ run: |
+ cd engine
+ make configure-vcpkg
+
+ - name: Configure vcpkg linux amd64
+ if: runner.os != 'Linux'
+ run: |
+ cd engine
+ make configure-vcpkg
+
+ - name: Configure vcpkg linux arm64
+ if: runner.os == 'Linux'
run: |
cd engine
+ # Set env if arch is arm64
+ if [ "${{ matrix.name }}" == "arm64" ]; then
+ sudo apt install ninja-build pkg-config -y
+ export VCPKG_FORCE_SYSTEM_BINARIES=1
+ fi
make configure-vcpkg
- name: Build
+ if: runner.os != 'Linux'
run: |
cd engine
make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
+ - name: Build
+ if: runner.os == 'Linux'
+ run: |
+ cd engine
+ if [ "${{ matrix.name }}" == "arm64" ]; then
+ export VCPKG_FORCE_SYSTEM_BINARIES=1
+ fi
+ make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
+
+
- name: Run setup config
run: |
cd engine
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index 9a31ef5ff..1f076dc97 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -74,7 +74,7 @@ jobs:
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
build-linux-x64:
- uses: ./.github/workflows/template-build-linux-x64.yml
+ uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
with:
@@ -85,11 +85,26 @@ jobs:
cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
channel: nightly
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: amd64
+
+ build-linux-arm64:
+ uses: ./.github/workflows/template-build-linux.yml
+ secrets: inherit
+ needs: [get-update-version, set-public-provider, get-cortex-llamacpp-latest-version]
+ with:
+ ref: ${{ needs.set-public-provider.outputs.ref }}
+ public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
+ new_version: ${{ needs.get-update-version.outputs.new_version }}
+ runs-on: ubuntu-2004-arm64
+ cmake-flags: "-DCORTEX_VARIANT=nightly -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ channel: nightly
+ cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: arm64
update-latest-version:
runs-on: ubuntu-latest
if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
- needs: [get-update-version, set-public-provider, build-linux-x64, build-macos, build-windows-x64, get-cortex-llamacpp-latest-version]
+ needs: [get-update-version, set-public-provider, build-linux-x64, build-linux-arm64, build-macos, build-windows-x64, get-cortex-llamacpp-latest-version]
steps:
- name: Update latest version
id: update-latest-version
@@ -100,9 +115,11 @@ jobs:
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-universal-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-arm64/cortex-nightly.tar.gz
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/mac-universal-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-universal/cortex-nightly.tar.gz
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-amd64/cortex-nightly.tar.gz
+ aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-arm64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-arm64/cortex-nightly.tar.gz
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/windows-amd64-cortex-nightly.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/windows-amd64/cortex-nightly.tar.gz
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-mac-universal-network-installer.pkg s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/mac-universal/cortex-mac-universal-network-installer.pkg
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-linux-amd64-network-installer.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-amd64/cortex-linux-amd64-network-installer.deb
+ aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-linux-arm64-network-installer.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/linux-arm64/cortex-linux-arm64-network-installer.deb
aws s3 cp s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-windows-amd64-network-installer.exe s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/latest/windows-amd64/cortex-windows-amd64-network-installer.exe
env:
diff --git a/.github/workflows/python-script-package.yml b/.github/workflows/python-script-package.yml
new file mode 100644
index 000000000..5ea65be9c
--- /dev/null
+++ b/.github/workflows/python-script-package.yml
@@ -0,0 +1,72 @@
+name: Build and Package Python Code
+
+on:
+ workflow_dispatch:
+ inputs:
+ model_dir:
+ description: "Path to model directory in github repo"
+ required: true
+ repo_name:
+ description: "name of repo to be checked out"
+ required: true
+ branch_name:
+ description: "name of branch to be checked out"
+ required: true
+ default: main
+ hf_repo:
+ description: "name of huggingface repo to be pushed"
+ required: true
+ hf_prefix_branch:
+ description: "prefix of hf branch"
+ required: false
+
+env:
+ MODEL_DIR: ${{ inputs.model_dir }}
+ REPO_NAME: ${{ inputs.repo_name}}
+ BRANCH_NAME: ${{ inputs.branch_name }}
+ HF_REPO: ${{ inputs.hf_repo }}
+ HF_PREFIX_BRANCH: ${{ inputs.hf_prefix_branch }}
+
+jobs:
+ build-and-test:
+ runs-on: ${{ matrix.runs-on }}
+ timeout-minutes: 3600
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: "linux"
+ name: "amd64"
+ runs-on: "ubuntu-20-04-cuda-12-0"
+ - os: "mac"
+ name: "amd64"
+ runs-on: "macos-selfhosted-12"
+ - os: "mac"
+ name: "arm64"
+ runs-on: "macos-selfhosted-12-arm64"
+ - os: "windows"
+ name: "amd64"
+ runs-on: "windows-cuda-12-0"
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: recursive
+ repository: ${{env.REPO_NAME}}
+ ref: ${{env.BRANCH_NAME}}
+ - name: use python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install hf-transfer huggingface_hub
+
+ - name: Upload Artifact
+ run: |
+ huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
+ cd ${{env.MODEL_DIR}} && huggingface-cli upload ${{env.HF_REPO}} . . --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }}
+ huggingface-cli logout
\ No newline at end of file
diff --git a/.github/workflows/python-venv-package.yml b/.github/workflows/python-venv-package.yml
new file mode 100644
index 000000000..8bed4eb97
--- /dev/null
+++ b/.github/workflows/python-venv-package.yml
@@ -0,0 +1,275 @@
+name: Build and Package Python Virtual Environment
+
+on:
+ workflow_dispatch:
+ inputs:
+ model_dir:
+ description: "Path to model directory in github repo"
+ required: true
+ model_name:
+ description: "name of model to be release"
+ required: true
+ repo_name:
+ description: "name of repo to be checked out"
+ required: true
+ branch_name:
+ description: "name of branch to be checked out"
+ required: true
+ default: main
+ hf_repo:
+ description: "name of huggingface repo to be pushed"
+ required: true
+ hf_prefix_branch:
+ description: "prefix of hf branch"
+ required: false
+
+
+
+env:
+ MODEL_DIR: ${{ inputs.model_dir }}
+ MODEL_NAME: ${{ inputs.model_name }}
+ REPO_NAME: ${{ inputs.repo_name }}
+ BRANCH_NAME: ${{ inputs.branch_name }}
+ HF_REPO: ${{ inputs.hf_repo }}
+ HF_PREFIX_BRANCH: ${{ inputs.hf_prefix_branch }}
+
+jobs:
+ build-and-test:
+ runs-on: ${{ matrix.runs-on }}
+ timeout-minutes: 3600
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - os: "linux"
+ name: "amd64"
+ runs-on: "ubuntu-20-04-cuda-12-0"
+ - os: "mac"
+ name: "amd64"
+ runs-on: "macos-selfhosted-12"
+ - os: "mac"
+ name: "arm64"
+ runs-on: "macos-selfhosted-12-arm64"
+ - os: "windows"
+ name: "amd64"
+ runs-on: "windows-cuda-12-0"
+ steps:
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+ with:
+ submodules: recursive
+ repository: ${{env.REPO_NAME}}
+ ref: ${{env.BRANCH_NAME}}
+ - uses: conda-incubator/setup-miniconda@v3
+ if: runner.os != 'windows'
+ with:
+ auto-update-conda: true
+ python-version: 3.11
+ - name: use python
+ if : runner.os == 'windows'
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Get Cer for code signing
+ if: runner.os == 'macOS'
+ run: base64 -d <<< "$CODE_SIGN_P12_BASE64" > /tmp/codesign.p12
+ shell: bash
+ env:
+ CODE_SIGN_P12_BASE64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
+
+ - uses: apple-actions/import-codesign-certs@v2
+ continue-on-error: true
+ if: runner.os == 'macOS'
+ with:
+ p12-file-base64: ${{ secrets.CODE_SIGN_P12_BASE64 }}
+ p12-password: ${{ secrets.CODE_SIGN_P12_PASSWORD }}
+
+ - name: Get Cer for code signing
+ if: runner.os == 'macOS'
+ run: base64 -d <<< "$NOTARIZE_P8_BASE64" > /tmp/notary-key.p8
+ shell: bash
+ env:
+ NOTARIZE_P8_BASE64: ${{ secrets.NOTARIZE_P8_BASE64 }}
+
+ - name: Install dependencies Windows
+ if: runner.os == 'windows'
+ shell: pwsh
+ run: |
+ python3 -m pip install fastapi
+ python3 -m pip freeze | % { python3 -m pip uninstall -y $_ }
+ python3 -m pip install --upgrade pip
+ python3 -m pip install -I -r ${{env.MODEL_DIR}}/requirements.cuda.txt
+ python3 -m pip install python-dotenv
+ - name: Install dependencies Linux
+ if: runner.os == 'linux'
+ run: |
+ conda create -y -n ${{env.MODEL_NAME}} python=3.11
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate ${{env.MODEL_NAME}}
+ python -m pip install fastapi
+ python -m pip freeze | xargs python -m pip uninstall -y
+ python -m pip install --upgrade pip
+ python -m pip install -r ${{env.MODEL_DIR}}/requirements.cuda.txt
+ python -m pip install python-dotenv
+ - name: Install dependencies Mac
+ if: runner.os == 'macOS'
+ run: |
+ conda create -y -n ${{env.MODEL_NAME}} python=3.11
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate ${{env.MODEL_NAME}}
+ python -m pip install fastapi
+ python -m pip freeze | xargs python -m pip uninstall -y
+ python -m pip install --upgrade pip
+ python -m pip install -r ${{env.MODEL_DIR}}/requirements.txt
+ python -m pip install python-dotenv
+
+ - name: prepare python package windows
+ if : runner.os == 'windows'
+ shell: pwsh
+ run: |
+ $pythonPath = where.exe python
+ echo "Python path (where.exe): $pythonPath"
+ $pythonFolder = Split-Path -Path "$pythonPath" -Parent
+ echo "PYTHON_FOLDER=$pythonFolder" >> $env:GITHUB_ENV
+ copy "$pythonFolder\python*.*" "$pythonFolder\Scripts\"
+
+ - name: prepare python package macos
+ if : runner.os == 'macOs'
+ run: |
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate ${{env.MODEL_NAME}}
+ PYTHON_PATH=$(which python)
+ echo $PYTHON_PATH
+ PYTHON_FOLDER=$(dirname $(dirname "$PYTHON_PATH"))
+ echo "PYTHON_FOLDER=$PYTHON_FOLDER" >> $GITHUB_ENV
+ echo "github end PYTHON_FOLDER: ${{env.PYTHON_FOLDER}}"
+ - name: prepare python package linux
+ if : runner.os == 'linux'
+ run: |
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate ${{env.MODEL_NAME}}
+ PYTHON_PATH=$(which python)
+ echo $PYTHON_PATH
+ PYTHON_FOLDER=$(dirname $(dirname "$PYTHON_PATH"))
+ rm -rf $PYTHON_FOLDER/lib/python3.1
+ echo "PYTHON_FOLDER=$PYTHON_FOLDER" >> $GITHUB_ENV
+ echo "github end PYTHON_FOLDER: ${{env.PYTHON_FOLDER}}"
+
+ - name: create plist file
+ if: runner.os == 'macOS'
+ run: |
+ cat << EOF > /tmp/entitlements.plist
+
+
+
+
+
+ com.apple.security.cs.allow-jit
+
+ com.apple.security.cs.allow-unsigned-executable-memory
+
+
+
+ com.apple.security.app-sandbox
+
+ com.apple.security.network.client
+
+ com.apple.security.network.server
+
+ com.apple.security.device.audio-input
+
+ com.apple.security.device.microphone
+
+ com.apple.security.device.camera
+
+ com.apple.security.files.user-selected.read-write
+
+ com.apple.security.cs.disable-library-validation
+
+ com.apple.security.cs.allow-dyld-environment-variables
+
+ com.apple.security.cs.allow-executable-memory
+
+
+
+ EOF
+
+ - name: Notary macOS Binary
+ if: runner.os == 'macOS'
+ run: |
+ codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime ${{env.PYTHON_FOLDER}}/bin/python
+ codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime ${{env.PYTHON_FOLDER}}/bin/python3
+ # Code sign all .so files and .dylib files
+
+ find ${{env.PYTHON_FOLDER}} -type f \( -name "*.so" -o -name "*.dylib" \) -exec codesign --force --entitlements="/tmp/entitlements.plist" -s "${{ secrets.DEVELOPER_ID }}" --options=runtime {} \;
+
+ curl -sSfL https://raw.githubusercontent.com/anchore/quill/main/install.sh | sudo sh -s -- -b /usr/local/bin
+ # Notarize the binary
+ quill notarize ${{env.PYTHON_FOLDER}}/bin/python
+ quill notarize ${{env.PYTHON_FOLDER}}/bin/python3
+ find ${{env.PYTHON_FOLDER}} -type f \( -name "*.so" -o -name "*.dylib" \) -exec quill notarize {} \;
+ env:
+ QUILL_NOTARY_KEY_ID: ${{ secrets.NOTARY_KEY_ID }}
+ QUILL_NOTARY_ISSUER: ${{ secrets.NOTARY_ISSUER }}
+ QUILL_NOTARY_KEY: "/tmp/notary-key.p8"
+
+
+ - name: Upload Artifact MacOS
+ if : runner.os == 'macOS'
+ run: |
+ brew install zip
+ cd ${{env.PYTHON_FOLDER}} && zip -r venv.zip *
+ conda create -y -n hf-upload python=3.11
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate hf-upload
+ python -m pip install hf-transfer huggingface_hub
+ huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
+ huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }}
+ rm -rf venv.zip
+ huggingface-cli logout
+
+ - name: Upload Artifact Linux
+ if : runner.os == 'linux'
+ run: |
+ sudo apt-get install -y zip
+ cd ${{env.PYTHON_FOLDER}} && zip -r venv.zip *
+ conda create -y -n hf-upload python=3.11
+ source $HOME/miniconda3/bin/activate base
+ conda init
+ conda activate hf-upload
+ python -m pip install hf-transfer huggingface_hub
+ huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
+ huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }}
+ rm -rf venv.zip
+ huggingface-cli logout
+
+
+ - name: Upload Artifact Windows
+ if : runner.os == 'windows'
+ shell: pwsh
+ run: |
+ Compress-Archive -Path ${{env.PYTHON_FOLDER}}/* -DestinationPath venv.zip
+ python -m pip install hf-transfer huggingface_hub
+ huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
+ huggingface-cli upload ${{env.HF_REPO}} venv.zip --revision ${{env.HF_PREFIX_BRANCH}}-${{ matrix.os }}-${{ matrix.name }}
+ rm venv.zip
+ huggingface-cli logout
+
+
+ - name: Post Upload windows
+ if : runner.os == 'windows'
+ run: |
+ rm ${{env.PYTHON_FOLDER}}/Scripts/python*.*
+
+ - name: Remove Keychain
+ continue-on-error: true
+ if: always() && runner.os == 'macOS'
+ run: |
+ security delete-keychain signing_temp.keychain
diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml
index 2b0523771..b05df983d 100644
--- a/.github/workflows/stable-build.yml
+++ b/.github/workflows/stable-build.yml
@@ -67,7 +67,7 @@ jobs:
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
build-linux-x64:
- uses: ./.github/workflows/template-build-linux-x64.yml
+ uses: ./.github/workflows/template-build-linux.yml
secrets: inherit
needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
with:
@@ -79,6 +79,22 @@ jobs:
channel: stable
upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: amd64
+
+ build-linux-arm64:
+ uses: ./.github/workflows/template-build-linux.yml
+ secrets: inherit
+ needs: [get-update-version, create-draft-release, get-cortex-llamacpp-latest-version]
+ with:
+ ref: ${{ github.ref }}
+ public_provider: github
+ new_version: ${{ needs.get-update-version.outputs.new_version }}
+ runs-on: ubuntu-2004-arm64
+ cmake-flags: "-DCORTEX_VARIANT=prod -DCORTEX_CPP_VERSION='v${{ needs.get-update-version.outputs.new_version }}' -DCMAKE_TOOLCHAIN_FILE=/home/runner/actions-runner/_work/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake"
+ channel: stable
+ upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
+ cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
+ arch: arm64
build-docker-x64:
uses: ./.github/workflows/template-build-docker-x64.yml
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux.yml
similarity index 66%
rename from .github/workflows/template-build-linux-x64.yml
rename to .github/workflows/template-build-linux.yml
index d1ca73844..02cc3a187 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux.yml
@@ -1,4 +1,4 @@
-name: build-linux-x64
+name: build-linux
on:
workflow_call:
inputs:
@@ -49,6 +49,11 @@ on:
type: string
default: '0.0.0'
description: 'The version of cortex-llamacpp to use for this job'
+ arch:
+ required: false
+ type: string
+ default: 'amd64'
+ description: 'The architecture to use for this job'
secrets:
DELTA_AWS_S3_BUCKET_NAME:
required: false
@@ -60,7 +65,7 @@ on:
required: false
jobs:
- build-linux-x64:
+ build-linux:
runs-on: ${{ inputs.runs-on }}
permissions:
contents: write
@@ -72,6 +77,7 @@ jobs:
submodules: 'recursive'
- name: use python 3.9
+ continue-on-error: true
uses: actions/setup-python@v4
with:
python-version: '3.9'
@@ -124,14 +130,24 @@ jobs:
- name: Configure vcpkg
run: |
cd engine
+ # Set env if arch is arm64
+ if [ "${{ inputs.arch }}" == "arm64" ]; then
+ sudo apt install ninja-build pkg-config -y
+ export VCPKG_FORCE_SYSTEM_BINARIES=1
+ fi
make configure-vcpkg
- name: Build
run: |
cd engine
+ # Set env if arch is arm64
+ if [ "${{ inputs.arch }}" == "arm64" ]; then
+ export VCPKG_FORCE_SYSTEM_BINARIES=1
+ fi
make build CMAKE_EXTRA_FLAGS="${{ inputs.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ inputs.build-deps-cmake-flags }}"
- name: Install Python
+ continue-on-error: true
uses: actions/setup-python@v4
with:
python-version: '3.10'
@@ -145,28 +161,32 @@ jobs:
shell: bash
run: |
cd engine
- make build-installer PACKAGE_NAME="${{ steps.set-output-params.outputs.package_name }}" SOURCE_BINARY_PATH="../../cortex/${{ steps.set-output-params.outputs.destination_binary_name }}" SOURCE_BINARY_SERVER_PATH="../../cortex/${{ steps.set-output-params.outputs.destination_binary_server_name }}" VERSION=${{ inputs.new_version }} DESTINATION_BINARY_NAME="${{ steps.set-output-params.outputs.destination_binary_name }}" DESTINATION_BINARY_SERVER_NAME="${{ steps.set-output-params.outputs.destination_binary_server_name }}" DATA_FOLDER_NAME="${{ steps.set-output-params.outputs.data_folder_name }}" CONFIGURATION_FILE_NAME="${{ steps.set-output-params.outputs.configuration_file_name }}" UNINSTALLER_FILE_NAME="${{ steps.set-output-params.outputs.uninstaller_file_name }}"
+ make build-installer PACKAGE_NAME="${{ steps.set-output-params.outputs.package_name }}" SOURCE_BINARY_PATH="../../cortex/${{ steps.set-output-params.outputs.destination_binary_name }}" SOURCE_BINARY_SERVER_PATH="../../cortex/${{ steps.set-output-params.outputs.destination_binary_server_name }}" VERSION=${{ inputs.new_version }} DESTINATION_BINARY_NAME="${{ steps.set-output-params.outputs.destination_binary_name }}" DESTINATION_BINARY_SERVER_NAME="${{ steps.set-output-params.outputs.destination_binary_server_name }}" DATA_FOLDER_NAME="${{ steps.set-output-params.outputs.data_folder_name }}" CONFIGURATION_FILE_NAME="${{ steps.set-output-params.outputs.configuration_file_name }}" UNINSTALLER_FILE_NAME="${{ steps.set-output-params.outputs.uninstaller_file_name }}" ARCH="${{ inputs.arch }}"
mv ${{ steps.set-output-params.outputs.package_name }}.deb ${{ steps.set-output-params.outputs.package_name }}-network.deb
- name: Build local Installers
run: |
mkdir -p engine/templates/linux/dependencies
cd engine/templates/linux/dependencies
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-11-7.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-12-0.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-11-7.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-12-0.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-11-7.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-12-0.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-11-7.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-12-0.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-vulkan.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-linux-amd64.tar.gz
- wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-linux-amd64.tar.gz
+ if [ "${{ inputs.arch }}" == "amd64" ]; then
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-11-7.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx-cuda-12-0.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-11-7.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2-cuda-12-0.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx2.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-11-7.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512-cuda-12-0.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-avx512.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-11-7.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx-cuda-12-0.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-noavx.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-amd64-vulkan.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-11-7-linux-amd64.tar.gz
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cuda-12-0-linux-amd64.tar.gz
+ else
+ wget https://github.com/janhq/cortex.llamacpp/releases/download/v${{ inputs.cortex-llamacpp-version }}/cortex.llamacpp-${{ inputs.cortex-llamacpp-version }}-linux-arm64.tar.gz
+ fi
cd ..
# Remove network package
@@ -174,7 +194,7 @@ jobs:
rm -rf ${{ steps.set-output-params.outputs.package_name }}
rm ${{ steps.set-output-params.outputs.package_name }}.deb
chmod +x create_deb_local.sh
- ./create_deb_local.sh ${{ steps.set-output-params.outputs.package_name }} ${{ inputs.new_version }} ../../cortex/${{ steps.set-output-params.outputs.destination_binary_name }} ../../cortex/${{ steps.set-output-params.outputs.destination_binary_server_name }} ${{ steps.set-output-params.outputs.destination_binary_name }} ${{ steps.set-output-params.outputs.destination_binary_server_name }} ${{ steps.set-output-params.outputs.data_folder_name }} ${{ steps.set-output-params.outputs.configuration_file_name }};
+ ./create_deb_local.sh ${{ steps.set-output-params.outputs.package_name }} ${{ inputs.new_version }} ../../cortex/${{ steps.set-output-params.outputs.destination_binary_name }} ../../cortex/${{ steps.set-output-params.outputs.destination_binary_server_name }} ${{ steps.set-output-params.outputs.destination_binary_name }} ${{ steps.set-output-params.outputs.destination_binary_server_name }} ${{ steps.set-output-params.outputs.data_folder_name }} ${{ steps.set-output-params.outputs.configuration_file_name }} ${{ inputs.arch }};
cp ${{ steps.set-output-params.outputs.package_name }}.deb ../../${{ steps.set-output-params.outputs.package_name }}-local.deb
- name: Package
@@ -185,30 +205,30 @@ jobs:
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
- name: cortex-${{ inputs.new_version }}-linux-amd64
+ name: cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}
path: ./engine/cortex
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
- name: cortex-${{ inputs.new_version }}-linux-amd64-network-installer
+ name: cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-network-installer
path: ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
- name: cortex-${{ inputs.new_version }}-linux-amd64-local-installer
+ name: cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-local-installer
path: ./engine/${{ steps.set-output-params.outputs.package_name }}-local.deb
- name: upload to aws s3 if public provider is aws
if: inputs.public_provider == 'aws-s3'
run: |
- aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-amd64-cortex-nightly.tar.gz
- aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-linux-amd64-network-installer.deb
+ aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/linux-${{ inputs.arch }}-cortex-nightly.tar.gz
+ aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/temp-latest/cortex-linux-${{ inputs.arch }}-network-installer.deb
- aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-nightly.tar.gz
- aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-${{ inputs.new_version }}-linux-amd64-network-installer.deb
- aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-local.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-amd64/cortex-${{ inputs.new_version }}-linux-amd64-local-installer.deb
+ aws s3 cp ./engine/cortex.tar.gz s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-${{ inputs.arch }}/cortex-nightly.tar.gz
+ aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-${{ inputs.arch }}/cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-network-installer.deb
+ aws s3 cp ./engine/${{ steps.set-output-params.outputs.package_name }}-local.deb s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/cortex/v${{ inputs.new_version }}/linux-${{ inputs.arch }}/cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-local-installer.deb
env:
AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
@@ -223,7 +243,7 @@ jobs:
with:
upload_url: ${{ inputs.upload_url }}
asset_path: ./engine/cortex.tar.gz
- asset_name: cortex-${{ inputs.new_version }}-linux-amd64.tar.gz
+ asset_name: cortex-${{ inputs.new_version }}-linux${{ inputs.arch }}.tar.gz
asset_content_type: application/zip
- name: Upload release assert if public provider is github
@@ -234,7 +254,7 @@ jobs:
with:
upload_url: ${{ inputs.upload_url }}
asset_path: ./engine/${{ steps.set-output-params.outputs.package_name }}-network.deb
- asset_name: cortex-${{ inputs.new_version }}-linux-amd64-network-installer.deb
+ asset_name: cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-network-installer.deb
asset_content_type: application/octet-stream
- name: Upload release assert if public provider is github
@@ -245,5 +265,5 @@ jobs:
with:
upload_url: ${{ inputs.upload_url }}
asset_path: ./engine/${{ steps.set-output-params.outputs.package_name }}-local.deb
- asset_name: cortex-${{ inputs.new_version }}-linux-amd64-local-installer.deb
+ asset_name: cortex-${{ inputs.new_version }}-linux-${{ inputs.arch }}-local-installer.deb
asset_content_type: application/octet-stream
\ No newline at end of file
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index a05f8b24e..2deb15e5e 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -5,77 +5,470 @@
"post": {
"operationId": "AssistantsController_create",
"summary": "Create assistant",
- "description": "Creates a new assistant.",
- "parameters": [],
+ "description": "Creates a new assistant with the specified configuration.",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/CreateAssistantDto"
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string",
+ "description": "The model identifier to use for the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": ["model"]
}
}
}
},
"responses": {
- "201": {
- "description": "The assistant has been successfully created."
+ "200": {
+ "description": "Ok",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ }
}
},
"tags": ["Assistants"]
},
- "get": {
- "operationId": "AssistantsController_findAll",
- "summary": "List assistants",
- "description": "Returns a list of assistants.",
+ "patch": {
+ "operationId": "AssistantsController_update",
+ "summary": "Update assistant",
+ "description": "Updates an assistant. Requires at least one modifiable field.",
"parameters": [
{
- "name": "limit",
- "required": false,
- "in": "query",
- "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
- "schema": {
- "type": "number"
- }
- },
- {
- "name": "order",
- "required": false,
- "in": "query",
- "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "after",
- "required": false,
- "in": "query",
- "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
+ "name": "id",
+ "required": true,
+ "in": "path",
+ "description": "The unique identifier of the assistant.",
"schema": {
"type": "string"
}
},
{
- "name": "before",
- "required": false,
- "in": "query",
- "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
+ "name": "OpenAI-Beta",
+ "required": true,
+ "in": "header",
+ "description": "Beta feature header.",
"schema": {
- "type": "string"
+ "type": "string",
+ "enum": ["assistants=v2"]
}
}
],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string",
+ "description": "The model identifier to use for the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs for the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "minProperties": 1
+ }
+ }
+ }
+ },
"responses": {
"200": {
"description": "Ok",
"content": {
"application/json": {
"schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AssistantEntity"
- }
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the assistant."
+ },
+ "description": {
+ "type": "string",
+ "description": "The description of the assistant."
+ },
+ "instructions": {
+ "type": "string",
+ "description": "Instructions for the assistant's behavior."
+ },
+ "tools": {
+ "type": "array",
+ "description": "A list of tools enabled on the assistant.",
+ "items": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "enum": [
+ "code_interpreter",
+ "file_search",
+ "function"
+ ]
+ }
+ }
+ }
+ },
+ "tool_resources": {
+ "type": "object",
+ "description": "Resources used by the assistant's tools.",
+ "properties": {
+ "code_interpreter": {
+ "type": "object"
+ },
+ "file_search": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ },
+ "temperature": {
+ "type": "number",
+ "format": "float",
+ "description": "Temperature parameter for response generation."
+ },
+ "top_p": {
+ "type": "number",
+ "format": "float",
+ "description": "Top p parameter for response generation."
+ },
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "string",
+ "enum": ["auto"]
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Assistants"]
+ },
+ "get": {
+ "operationId": "AssistantsController_list",
+ "summary": "List assistants",
+ "description": "Returns a list of assistants.",
+ "responses": {
+ "200": {
+ "description": "Ok",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "properties": {
+ "object": {
+ "type": "string",
+ "enum": ["list"],
+ "description": "The object type, which is always 'list' for a list response."
+ },
+ "data": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs that can be attached to the assistant.",
+ "additionalProperties": true
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
+ }
+ }
+ },
+ "required": ["object", "data"]
}
}
}
@@ -88,7 +481,7 @@
"get": {
"operationId": "AssistantsController_findOne",
"summary": "Get assistant",
- "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+ "description": "Retrieves a specific assistant by ID.",
"parameters": [
{
"name": "id",
@@ -98,6 +491,16 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "OpenAI-Beta",
+ "required": true,
+ "in": "header",
+ "description": "Beta feature header.",
+ "schema": {
+ "type": "string",
+ "enum": ["assistants=v2"]
+ }
}
],
"responses": {
@@ -106,7 +509,38 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/AssistantEntity"
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant"],
+ "description": "The object type, which is always 'assistant'."
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Unix timestamp (in seconds) of when the assistant was created."
+ },
+ "model": {
+ "type": "string",
+ "description": "The model identifier used by the assistant."
+ },
+ "metadata": {
+ "type": "object",
+ "description": "Set of key-value pairs attached to the assistant.",
+ "additionalProperties": true
+ }
+ },
+ "required": [
+ "id",
+ "object",
+ "created_at",
+ "model",
+ "metadata"
+ ]
}
}
}
@@ -117,7 +551,7 @@
"delete": {
"operationId": "AssistantsController_remove",
"summary": "Delete assistant",
- "description": "Deletes a specific assistant defined by an assistant's `id`.",
+ "description": "Deletes a specific assistant by ID.",
"parameters": [
{
"name": "id",
@@ -131,11 +565,28 @@
],
"responses": {
"200": {
- "description": "The assistant has been successfully deleted.",
+ "description": "Ok",
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/DeleteAssistantResponseDto"
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The unique identifier of the deleted assistant."
+ },
+ "object": {
+ "type": "string",
+ "enum": ["assistant.deleted"],
+ "description": "The object type for a deleted assistant."
+ },
+ "deleted": {
+ "type": "boolean",
+ "enum": [true],
+ "description": "Indicates the assistant was successfully deleted."
+ }
+ },
+ "required": ["id", "object", "deleted"]
}
}
}
@@ -2199,6 +2650,84 @@
"tags": ["Engines"]
}
},
+ "/v1/engines/{name}/releases/{version}": {
+ "get": {
+ "summary": "List variants for a specific engine version",
+ "description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
+ "parameters": [
+ {
+ "name": "name",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+ "default": "llama-cpp"
+ },
+ "description": "The type of engine"
+ },
+ {
+ "name": "version",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string"
+ },
+ "description": "The version of the engine"
+ },
+ {
+ "name": "show",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string",
+ "enum": ["all", "compatible"],
+ "default": "all"
+ },
+ "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successfully retrieved variants list",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the variant, including OS, architecture, and capabilities",
+ "example": "linux-amd64-avx-cuda-11-7"
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "Creation timestamp of the variant",
+ "example": "2024-11-13T04:51:16Z"
+ },
+ "size": {
+ "type": "integer",
+ "description": "Size of the variant in bytes",
+ "example": 151224604
+ },
+ "download_count": {
+ "type": "integer",
+ "description": "Number of times this variant has been downloaded",
+ "example": 0
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "tags": ["Engines"]
+ }
+ },
"/v1/engines/{name}/releases/latest": {
"get": {
"summary": "Get latest release",
@@ -3378,6 +3907,7 @@
"Files",
"Hardware",
"Events",
+ "Assistants",
"Threads",
"Messages",
"Pulling Models",
@@ -4858,8 +5388,8 @@
"engine",
"version",
"inference_params",
- "TransformReq",
- "TransformResp",
+ "transform_req",
+ "transform_resp",
"metadata"
],
"properties": {
@@ -4867,9 +5397,9 @@
"type": "string",
"description": "The identifier of the model."
},
- "api_key_template": {
+ "header_template": {
"type": "string",
- "description": "Template for the API key header."
+ "description": "Template for the header."
},
"engine": {
"type": "string",
@@ -4902,7 +5432,7 @@
}
}
},
- "TransformReq": {
+ "transform_req": {
"type": "object",
"properties": {
"get_models": {
@@ -4924,7 +5454,7 @@
}
}
},
- "TransformResp": {
+ "transform_resp": {
"type": "object",
"properties": {
"chat_completions": {
@@ -5632,9 +6162,9 @@
"description": "Number of GPU layers.",
"example": 33
},
- "api_key_template": {
+ "header_template": {
"type": "string",
- "description": "Template for the API key header."
+ "description": "Template for the header."
},
"version": {
"type": "string",
diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt
index 25c0783b1..024f015a8 100644
--- a/engine/CMakeLists.txt
+++ b/engine/CMakeLists.txt
@@ -142,9 +142,14 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
+
+ ${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/extensions/python-engine/python_engine.cc
+
${CMAKE_CURRENT_SOURCE_DIR}/utils/dylib_path_manager.cc
+
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
- ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc
+
)
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
@@ -157,7 +162,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE JsonCpp::JsonCpp Drogon::Drogon Ope
target_link_libraries(${TARGET_NAME} PRIVATE SQLiteCpp)
target_link_libraries(${TARGET_NAME} PRIVATE eventpp::eventpp)
target_link_libraries(${TARGET_NAME} PRIVATE lfreist-hwinfo::hwinfo)
-
+
# ##############################################################################
if(CMAKE_CXX_STANDARD LESS 17)
diff --git a/engine/Makefile b/engine/Makefile
index 8f27eebcc..a6a4b5a79 100644
--- a/engine/Makefile
+++ b/engine/Makefile
@@ -24,6 +24,7 @@ DESTINATION_BINARY_SERVER_NAME ?= cortex-server
DATA_FOLDER_NAME ?= .cortex
CONFIGURATION_FILE_NAME ?= .cortexrc
UNINSTALLER_FILE_NAME ?= cortex-uninstall.sh
+ARCH ?= amd64
# Default target, does nothing
all:
@@ -120,7 +121,7 @@ else ifeq ($(shell uname -s),Linux)
@echo "Building installer for linux"; \
cd templates/linux; \
chmod +x create_deb.sh; \
- ./create_deb.sh $(PACKAGE_NAME) $(VERSION) $(SOURCE_BINARY_PATH) $(SOURCE_BINARY_SERVER_PATH) $(DESTINATION_BINARY_NAME) $(DESTINATION_BINARY_SERVER_NAME) $(DATA_FOLDER_NAME) $(CONFIGURATION_FILE_NAME); \
+ ./create_deb.sh $(PACKAGE_NAME) $(VERSION) $(SOURCE_BINARY_PATH) $(SOURCE_BINARY_SERVER_PATH) $(DESTINATION_BINARY_NAME) $(DESTINATION_BINARY_SERVER_NAME) $(DATA_FOLDER_NAME) $(CONFIGURATION_FILE_NAME) $(ARCH); \
cp $(PACKAGE_NAME).deb ../../
else
@echo "Building installer for Macos"; \
diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt
index df4f1a76b..4ca734d6a 100644
--- a/engine/cli/CMakeLists.txt
+++ b/engine/cli/CMakeLists.txt
@@ -83,8 +83,12 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/model_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/../services/database_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
- ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc
+
+ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/python-engine/python_engine.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/template_renderer.cc
+
${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc
${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc
diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc
index 825780895..b423a6896 100644
--- a/engine/cli/command_line_parser.cc
+++ b/engine/cli/command_line_parser.cc
@@ -49,8 +49,9 @@ CommandLineParser::CommandLineParser()
: app_("\nCortex.cpp CLI\n"),
download_service_{std::make_shared()},
dylib_path_manager_{std::make_shared()},
- engine_service_{std::make_shared(download_service_,
- dylib_path_manager_)} {
+ db_service_{std::make_shared()},
+ engine_service_{std::make_shared(
+ download_service_, dylib_path_manager_, db_service_)} {
supported_engines_ = engine_service_->GetSupportedEngineNames().value();
}
@@ -177,7 +178,7 @@ void CommandLineParser::SetupCommonCommands() {
return;
commands::RunCmd rc(cml_data_.config.apiServerHost,
std::stoi(cml_data_.config.apiServerPort),
- cml_data_.model_id, engine_service_);
+ cml_data_.model_id, db_service_, engine_service_);
rc.Exec(cml_data_.run_detach, run_settings_);
});
}
@@ -216,9 +217,10 @@ void CommandLineParser::SetupModelCommands() {
CLI_LOG(model_start_cmd->help());
return;
};
- commands::ModelStartCmd().Exec(cml_data_.config.apiServerHost,
- std::stoi(cml_data_.config.apiServerPort),
- cml_data_.model_id, run_settings_);
+ commands::ModelStartCmd(db_service_)
+ .Exec(cml_data_.config.apiServerHost,
+ std::stoi(cml_data_.config.apiServerPort), cml_data_.model_id,
+ run_settings_);
});
auto stop_model_cmd =
@@ -906,6 +908,7 @@ void CommandLineParser::ModelUpdate(CLI::App* parent) {
"ngl",
"ctx_len",
"n_parallel",
+ "cpu_threads",
"engine",
"prompt_template",
"system_template",
diff --git a/engine/cli/command_line_parser.h b/engine/cli/command_line_parser.h
index 14e10e420..5b64f7f4d 100644
--- a/engine/cli/command_line_parser.h
+++ b/engine/cli/command_line_parser.h
@@ -45,6 +45,7 @@ class CommandLineParser {
CLI::App app_;
std::shared_ptr download_service_;
std::shared_ptr dylib_path_manager_;
+ std::shared_ptr db_service_;
std::shared_ptr engine_service_;
std::vector supported_engines_;
diff --git a/engine/cli/commands/chat_completion_cmd.cc b/engine/cli/commands/chat_completion_cmd.cc
index 0067b1c08..77ee4fca3 100644
--- a/engine/cli/commands/chat_completion_cmd.cc
+++ b/engine/cli/commands/chat_completion_cmd.cc
@@ -50,17 +50,15 @@ size_t WriteCallback(char* ptr, size_t size, size_t nmemb, void* userdata) {
return data_length;
}
-
} // namespace
void ChatCompletionCmd::Exec(const std::string& host, int port,
const std::string& model_handle, std::string msg) {
namespace fs = std::filesystem;
namespace fmu = file_manager_utils;
- cortex::db::Models modellist_handler;
config::YamlHandler yaml_handler;
try {
- auto model_entry = modellist_handler.GetModelInfo(model_handle);
+ auto model_entry = db_service_->GetModelInfo(model_handle);
if (model_entry.has_error()) {
CLI_LOG("Error: " + model_entry.error());
return;
@@ -103,7 +101,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
return;
}
- std::string url = "http://" + address + "/v1/chat/completions";
+ auto url = "http://" + address + "/v1/chat/completions";
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_POST, 1L);
@@ -151,9 +149,10 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
json_data["model"] = model_handle;
json_data["stream"] = true;
- std::string json_payload = json_data.toStyledString();
-
- curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_payload.c_str());
+ auto json_str = json_data.toStyledString();
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_str.c_str());
+ curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, json_str.length());
+ curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1L);
std::string ai_chat;
StreamingCallback callback;
@@ -161,8 +160,7 @@ void ChatCompletionCmd::Exec(const std::string& host, int port,
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &callback);
-
- CURLcode res = curl_easy_perform(curl);
+ auto res = curl_easy_perform(curl);
if (res != CURLE_OK) {
CLI_LOG("CURL request failed: " << curl_easy_strerror(res));
diff --git a/engine/cli/commands/chat_completion_cmd.h b/engine/cli/commands/chat_completion_cmd.h
index a784b4604..44de5d256 100644
--- a/engine/cli/commands/chat_completion_cmd.h
+++ b/engine/cli/commands/chat_completion_cmd.h
@@ -3,16 +3,20 @@
#include
#include
#include "config/model_config.h"
+#include "services/database_service.h"
namespace commands {
class ChatCompletionCmd {
public:
+ explicit ChatCompletionCmd(std::shared_ptr db_service)
+ : db_service_(db_service) {}
void Exec(const std::string& host, int port, const std::string& model_handle,
std::string msg);
void Exec(const std::string& host, int port, const std::string& model_handle,
const config::ModelConfig& mc, std::string msg);
private:
+ std::shared_ptr db_service_;
std::vector histories_;
};
} // namespace commands
diff --git a/engine/cli/commands/model_start_cmd.cc b/engine/cli/commands/model_start_cmd.cc
index 12aec944d..ef5d5c1f2 100644
--- a/engine/cli/commands/model_start_cmd.cc
+++ b/engine/cli/commands/model_start_cmd.cc
@@ -13,7 +13,7 @@ bool ModelStartCmd::Exec(
const std::unordered_map& options,
bool print_success_log) {
std::optional model_id =
- SelectLocalModel(host, port, model_handle);
+ SelectLocalModel(host, port, model_handle, *db_service_);
if (!model_id.has_value()) {
return false;
diff --git a/engine/cli/commands/model_start_cmd.h b/engine/cli/commands/model_start_cmd.h
index 124ef463d..c69bfc32a 100644
--- a/engine/cli/commands/model_start_cmd.h
+++ b/engine/cli/commands/model_start_cmd.h
@@ -3,16 +3,23 @@
#include
#include
#include "json/json.h"
+#include "services/database_service.h"
namespace commands {
class ModelStartCmd {
public:
+ explicit ModelStartCmd(std::shared_ptr db_service)
+ : db_service_(db_service) {}
bool Exec(const std::string& host, int port, const std::string& model_handle,
const std::unordered_map& options,
bool print_success_log = true);
- private:
+
+ private:
bool UpdateConfig(Json::Value& data, const std::string& key,
const std::string& value);
+
+ private:
+ std::shared_ptr db_service_;
};
} // namespace commands
diff --git a/engine/cli/commands/model_upd_cmd.cc b/engine/cli/commands/model_upd_cmd.cc
index 6534d1fbd..1572581ec 100644
--- a/engine/cli/commands/model_upd_cmd.cc
+++ b/engine/cli/commands/model_upd_cmd.cc
@@ -228,6 +228,12 @@ void ModelUpdCmd::UpdateConfig(Json::Value& data, const std::string& key,
data["n_parallel"] = static_cast(f);
});
}},
+ {"cpu_threads",
+ [this](Json::Value &data, const std::string& k, const std::string& v) {
+ UpdateNumericField(k, v, [&data](float f) {
+ data["cpu_threads"] = static_cast(f);
+ });
+ }},
{"tp",
[this](Json::Value &data, const std::string& k, const std::string& v) {
UpdateNumericField(k, v, [&data](float f) {
diff --git a/engine/cli/commands/run_cmd.cc b/engine/cli/commands/run_cmd.cc
index 91a813d64..c01d3d806 100644
--- a/engine/cli/commands/run_cmd.cc
+++ b/engine/cli/commands/run_cmd.cc
@@ -14,12 +14,11 @@
namespace commands {
std::optional SelectLocalModel(std::string host, int port,
- const std::string& model_handle) {
+ const std::string& model_handle,
+ DatabaseService& db_service) {
std::optional model_id = model_handle;
- cortex::db::Models modellist_handler;
-
if (model_handle.empty()) {
- auto all_local_models = modellist_handler.LoadModelList();
+ auto all_local_models = db_service.LoadModelList();
if (all_local_models.has_error() || all_local_models.value().empty()) {
CLI_LOG("No local models available!");
return std::nullopt;
@@ -42,7 +41,7 @@ std::optional SelectLocalModel(std::string host, int port,
CLI_LOG("Selected: " << selection.value());
}
} else {
- auto related_models_ids = modellist_handler.FindRelatedModel(model_handle);
+ auto related_models_ids = db_service.FindRelatedModel(model_handle);
if (related_models_ids.has_error() || related_models_ids.value().empty()) {
auto result = ModelPullCmd().Exec(host, port, model_handle);
if (!result) {
@@ -69,19 +68,18 @@ std::optional SelectLocalModel(std::string host, int port,
void RunCmd::Exec(bool run_detach,
const std::unordered_map& options) {
std::optional model_id =
- SelectLocalModel(host_, port_, model_handle_);
+ SelectLocalModel(host_, port_, model_handle_, *db_service_);
if (!model_id.has_value()) {
return;
}
- cortex::db::Models modellist_handler;
config::YamlHandler yaml_handler;
auto address = host_ + ":" + std::to_string(port_);
try {
namespace fs = std::filesystem;
namespace fmu = file_manager_utils;
- auto model_entry = modellist_handler.GetModelInfo(*model_id);
+ auto model_entry = db_service_->GetModelInfo(*model_id);
if (model_entry.has_error()) {
CLI_LOG("Error: " + model_entry.error());
return;
@@ -128,7 +126,7 @@ void RunCmd::Exec(bool run_detach,
mc.engine.find(kLlamaEngine) == std::string::npos) ||
!commands::ModelStatusCmd().IsLoaded(host_, port_, *model_id)) {
- auto res = commands::ModelStartCmd()
+ auto res = commands::ModelStartCmd(db_service_)
.Exec(host_, port_, *model_id, options,
false /*print_success_log*/);
if (!res) {
@@ -144,7 +142,7 @@ void RunCmd::Exec(bool run_detach,
<< commands::GetCortexBinary() << " run " << *model_id
<< "` for interactive chat shell");
} else {
- ChatCompletionCmd().Exec(host_, port_, *model_id, mc, "");
+ ChatCompletionCmd(db_service_).Exec(host_, port_, *model_id, mc, "");
}
}
} catch (const std::exception& e) {
diff --git a/engine/cli/commands/run_cmd.h b/engine/cli/commands/run_cmd.h
index b22b064f9..ec5c61fd3 100644
--- a/engine/cli/commands/run_cmd.h
+++ b/engine/cli/commands/run_cmd.h
@@ -2,20 +2,24 @@
#include
#include
+#include "services/database_service.h"
#include "services/engine_service.h"
namespace commands {
std::optional SelectLocalModel(std::string host, int port,
- const std::string& model_handle);
+ const std::string& model_handle,
+ DatabaseService& db_service);
class RunCmd {
public:
explicit RunCmd(std::string host, int port, std::string model_handle,
+ std::shared_ptr db_service,
std::shared_ptr engine_service)
: host_{std::move(host)},
port_{port},
model_handle_{std::move(model_handle)},
+ db_service_(db_service),
engine_service_{engine_service} {};
void Exec(bool chat_flag,
@@ -25,6 +29,7 @@ class RunCmd {
std::string host_;
int port_;
std::string model_handle_;
+ std::shared_ptr db_service_;
std::shared_ptr engine_service_;
};
} // namespace commands
diff --git a/engine/cli/commands/server_start_cmd.cc b/engine/cli/commands/server_start_cmd.cc
index 3d6045cd5..4268f6362 100644
--- a/engine/cli/commands/server_start_cmd.cc
+++ b/engine/cli/commands/server_start_cmd.cc
@@ -114,7 +114,8 @@ bool ServerStartCmd::Exec(const std::string& host, int port,
// Some engines requires to add lib search path before process being created
auto download_srv = std::make_shared();
auto dylib_path_mng = std::make_shared();
- EngineService(download_srv, dylib_path_mng).RegisterEngineLibPath();
+ auto db_srv = std::make_shared();
+ EngineService(download_srv, dylib_path_mng, db_srv).RegisterEngineLibPath();
std::string p = cortex_utils::GetCurrentPath() + "/" + exe;
execl(p.c_str(), exe.c_str(), "--start-server", "--config_file_path",
diff --git a/engine/common/assistant.h b/engine/common/assistant.h
index e49147e9e..6210a0c2c 100644
--- a/engine/common/assistant.h
+++ b/engine/common/assistant.h
@@ -1,9 +1,13 @@
#pragma once
#include
+#include "common/assistant_code_interpreter_tool.h"
+#include "common/assistant_file_search_tool.h"
+#include "common/assistant_function_tool.h"
#include "common/assistant_tool.h"
-#include "common/thread_tool_resources.h"
+#include "common/tool_resources.h"
#include "common/variant_map.h"
+#include "utils/logging_utils.h"
#include "utils/result.hpp"
namespace OpenAi {
@@ -75,7 +79,49 @@ struct JanAssistant : JsonSerializable {
}
};
-struct Assistant {
+struct Assistant : JsonSerializable {
+ Assistant() = default;
+
+ ~Assistant() = default;
+
+ Assistant(const Assistant&) = delete;
+
+ Assistant& operator=(const Assistant&) = delete;
+
+ Assistant(Assistant&& other) noexcept
+ : id{std::move(other.id)},
+ object{std::move(other.object)},
+ created_at{other.created_at},
+ name{std::move(other.name)},
+ description{std::move(other.description)},
+ model(std::move(other.model)),
+ instructions(std::move(other.instructions)),
+ tools(std::move(other.tools)),
+ tool_resources(std::move(other.tool_resources)),
+ metadata(std::move(other.metadata)),
+ temperature{std::move(other.temperature)},
+ top_p{std::move(other.top_p)},
+ response_format{std::move(other.response_format)} {}
+
+ Assistant& operator=(Assistant&& other) noexcept {
+ if (this != &other) {
+ id = std::move(other.id);
+ object = std::move(other.object);
+ created_at = other.created_at;
+ name = std::move(other.name);
+ description = std::move(other.description);
+ model = std::move(other.model);
+ instructions = std::move(other.instructions);
+ tools = std::move(other.tools);
+ tool_resources = std::move(other.tool_resources);
+ metadata = std::move(other.metadata);
+ temperature = std::move(other.temperature);
+ top_p = std::move(other.top_p);
+ response_format = std::move(other.response_format);
+ }
+ return *this;
+ }
+
/**
* The identifier, which can be referenced in API endpoints.
*/
@@ -126,8 +172,7 @@ struct Assistant {
* requires a list of file IDs, while the file_search tool requires a list
* of vector store IDs.
*/
- std::optional>
- tool_resources;
+ std::unique_ptr tool_resources;
/**
* Set of 16 key-value pairs that can be attached to an object. This can be
@@ -153,5 +198,223 @@ struct Assistant {
* We generally recommend altering this or temperature but not both.
*/
std::optional top_p;
+
+ std::variant response_format;
+
+ cpp::result ToJson() override {
+ try {
+ Json::Value root;
+
+ root["id"] = std::move(id);
+ root["object"] = "assistant";
+ root["created_at"] = created_at;
+ if (name.has_value()) {
+ root["name"] = name.value();
+ }
+ if (description.has_value()) {
+ root["description"] = description.value();
+ }
+ root["model"] = model;
+ if (instructions.has_value()) {
+ root["instructions"] = instructions.value();
+ }
+
+ Json::Value tools_jarr{Json::arrayValue};
+ for (auto& tool_ptr : tools) {
+ if (auto it = tool_ptr->ToJson(); it.has_value()) {
+ tools_jarr.append(it.value());
+ } else {
+ CTL_WRN("Failed to convert content to json: " + it.error());
+ }
+ }
+ root["tools"] = tools_jarr;
+ if (tool_resources) {
+ Json::Value tool_resources_json{Json::objectValue};
+
+ if (auto* code_interpreter =
+ dynamic_cast(tool_resources.get())) {
+ auto result = code_interpreter->ToJson();
+ if (result.has_value()) {
+ tool_resources_json["code_interpreter"] = result.value();
+ } else {
+ CTL_WRN("Failed to convert code_interpreter to json: " +
+ result.error());
+ }
+ } else if (auto* file_search = dynamic_cast(
+ tool_resources.get())) {
+ auto result = file_search->ToJson();
+ if (result.has_value()) {
+ tool_resources_json["file_search"] = result.value();
+ } else {
+ CTL_WRN("Failed to convert file_search to json: " + result.error());
+ }
+ }
+
+ // Only add tool_resources to root if we successfully serialized some resources
+ if (!tool_resources_json.empty()) {
+ root["tool_resources"] = tool_resources_json;
+ }
+ }
+ Json::Value metadata_json{Json::objectValue};
+ for (const auto& [key, value] : metadata) {
+ if (std::holds_alternative(value)) {
+ metadata_json[key] = std::get(value);
+ } else if (std::holds_alternative(value)) {
+ metadata_json[key] = std::get(value);
+ } else if (std::holds_alternative(value)) {
+ metadata_json[key] = std::get(value);
+ } else {
+ metadata_json[key] = std::get(value);
+ }
+ }
+ root["metadata"] = metadata_json;
+
+ if (temperature.has_value()) {
+ root["temperature"] = temperature.value();
+ }
+ if (top_p.has_value()) {
+ root["top_p"] = top_p.value();
+ }
+ return root;
+ } catch (const std::exception& e) {
+ return cpp::fail("ToJson failed: " + std::string(e.what()));
+ }
+ }
+
+ static cpp::result FromJson(Json::Value&& json) {
+ try {
+ Assistant assistant;
+
+ // Parse required fields
+ if (!json.isMember("id") || !json["id"].isString()) {
+ return cpp::fail("Missing or invalid 'id' field");
+ }
+ assistant.id = json["id"].asString();
+
+ if (!json.isMember("object") || !json["object"].isString() ||
+ json["object"].asString() != "assistant") {
+ return cpp::fail("Missing or invalid 'object' field");
+ }
+
+ if (!json.isMember("created_at") || !json["created_at"].isUInt64()) {
+ return cpp::fail("Missing or invalid 'created_at' field");
+ }
+ assistant.created_at = json["created_at"].asUInt64();
+
+ if (!json.isMember("model") || !json["model"].isString()) {
+ return cpp::fail("Missing or invalid 'model' field");
+ }
+ assistant.model = json["model"].asString();
+
+ // Parse optional fields
+ if (json.isMember("name") && json["name"].isString()) {
+ assistant.name = json["name"].asString();
+ }
+
+ if (json.isMember("description") && json["description"].isString()) {
+ assistant.description = json["description"].asString();
+ }
+
+ if (json.isMember("instructions") && json["instructions"].isString()) {
+ assistant.instructions = json["instructions"].asString();
+ }
+
+ // Parse tools array
+ if (json.isMember("tools") && json["tools"].isArray()) {
+ auto tools_array = json["tools"];
+ for (const auto& tool : tools_array) {
+ if (!tool.isMember("type") || !tool["type"].isString()) {
+ CTL_WRN("Tool missing type field or invalid type");
+ continue;
+ }
+
+ std::string tool_type = tool["type"].asString();
+ if (tool_type == "file_search") {
+ auto result = AssistantFileSearchTool::FromJson(tool);
+ if (result.has_value()) {
+ assistant.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse file_search tool: " + result.error());
+ }
+ } else if (tool_type == "code_interpreter") {
+ auto result = AssistantCodeInterpreterTool::FromJson();
+ if (result.has_value()) {
+ assistant.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter tool: " +
+ result.error());
+ }
+ } else if (tool_type == "function") {
+ auto result = AssistantFunctionTool::FromJson(tool);
+ if (result.has_value()) {
+ assistant.tools.push_back(std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse function tool: " + result.error());
+ }
+ } else {
+ CTL_WRN("Unknown tool type: " + tool_type);
+ }
+ }
+ }
+
+ if (json.isMember("tool_resources") &&
+ json["tool_resources"].isObject()) {
+ const auto& tool_resources_json = json["tool_resources"];
+
+ // Parse code interpreter resources
+ if (tool_resources_json.isMember("code_interpreter")) {
+ auto result = OpenAi::CodeInterpreter::FromJson(
+ tool_resources_json["code_interpreter"]);
+ if (result.has_value()) {
+ assistant.tool_resources =
+ std::make_unique(
+ std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter resources: " +
+ result.error());
+ }
+ }
+
+ // Parse file search resources
+ if (tool_resources_json.isMember("file_search")) {
+ auto result =
+ OpenAi::FileSearch::FromJson(tool_resources_json["file_search"]);
+ if (result.has_value()) {
+ assistant.tool_resources =
+ std::make_unique(std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse file_search resources: " + result.error());
+ }
+ }
+ }
+
+ // Parse metadata
+ if (json.isMember("metadata") && json["metadata"].isObject()) {
+ auto res = Cortex::ConvertJsonValueToMap(json["metadata"]);
+ if (res.has_value()) {
+ assistant.metadata = res.value();
+ } else {
+ CTL_WRN("Failed to convert metadata to map: " + res.error());
+ }
+ }
+
+ if (json.isMember("temperature") && json["temperature"].isDouble()) {
+ assistant.temperature = json["temperature"].asFloat();
+ }
+
+ if (json.isMember("top_p") && json["top_p"].isDouble()) {
+ assistant.top_p = json["top_p"].asFloat();
+ }
+
+ return assistant;
+ } catch (const std::exception& e) {
+ return cpp::fail("FromJson failed: " + std::string(e.what()));
+ }
+ }
};
} // namespace OpenAi
diff --git a/engine/common/assistant_code_interpreter_tool.h b/engine/common/assistant_code_interpreter_tool.h
new file mode 100644
index 000000000..43bfac47c
--- /dev/null
+++ b/engine/common/assistant_code_interpreter_tool.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "common/assistant_tool.h"
+
+namespace OpenAi {
+struct AssistantCodeInterpreterTool : public AssistantTool {
+ AssistantCodeInterpreterTool() : AssistantTool("code_interpreter") {}
+
+ AssistantCodeInterpreterTool(const AssistantCodeInterpreterTool&) = delete;
+
+ AssistantCodeInterpreterTool& operator=(const AssistantCodeInterpreterTool&) =
+ delete;
+
+ AssistantCodeInterpreterTool(AssistantCodeInterpreterTool&&) = default;
+
+ AssistantCodeInterpreterTool& operator=(AssistantCodeInterpreterTool&&) =
+ default;
+
+ ~AssistantCodeInterpreterTool() = default;
+
+ static cpp::result FromJson() {
+ AssistantCodeInterpreterTool tool;
+ return std::move(tool);
+ }
+
+ cpp::result ToJson() override {
+ Json::Value json;
+ json["type"] = type;
+ return json;
+ }
+};
+} // namespace OpenAi
diff --git a/engine/common/assistant_file_search_tool.h b/engine/common/assistant_file_search_tool.h
new file mode 100644
index 000000000..2abaa7f6e
--- /dev/null
+++ b/engine/common/assistant_file_search_tool.h
@@ -0,0 +1,151 @@
+#pragma once
+
+#include "common/assistant_tool.h"
+#include "common/json_serializable.h"
+
+namespace OpenAi {
+struct FileSearchRankingOption : public JsonSerializable {
+ /**
+ * The ranker to use for the file search. If not specified will use the auto ranker.
+ */
+ std::string ranker;
+
+ /**
+ * The score threshold for the file search. All values must be a
+ * floating point number between 0 and 1.
+ */
+ float score_threshold;
+
+ FileSearchRankingOption(float score_threshold,
+ const std::string& ranker = "auto")
+ : ranker{ranker}, score_threshold{score_threshold} {}
+
+ FileSearchRankingOption(const FileSearchRankingOption&) = delete;
+
+ FileSearchRankingOption& operator=(const FileSearchRankingOption&) = delete;
+
+ FileSearchRankingOption(FileSearchRankingOption&&) = default;
+
+ FileSearchRankingOption& operator=(FileSearchRankingOption&&) = default;
+
+ ~FileSearchRankingOption() = default;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ if (!json.isMember("score_threshold")) {
+ return cpp::fail("score_threshold must be provided");
+ }
+
+ FileSearchRankingOption option{
+ json["score_threshold"].asFloat(),
+ std::move(json.get("ranker", "auto").asString())};
+ return option;
+ }
+
+ cpp::result ToJson() override {
+ Json::Value json;
+ json["ranker"] = ranker;
+ json["score_threshold"] = score_threshold;
+ return json;
+ }
+};
+
+/**
+ * Overrides for the file search tool.
+ */
+struct AssistantFileSearch : public JsonSerializable {
+ /**
+ * The maximum number of results the file search tool should output.
+ * The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo.
+ * This number should be between 1 and 50 inclusive.
+ *
+ * Note that the file search tool may output fewer than max_num_results results.
+ * See the file search tool documentation for more information.
+ */
+ int max_num_results;
+
+ /**
+ * The ranking options for the file search. If not specified,
+ * the file search tool will use the auto ranker and a score_threshold of 0.
+ *
+ * See the file search tool documentation for more information.
+ */
+ FileSearchRankingOption ranking_options;
+
+ AssistantFileSearch(int max_num_results,
+ FileSearchRankingOption&& ranking_options)
+ : max_num_results{max_num_results},
+ ranking_options{std::move(ranking_options)} {}
+
+ AssistantFileSearch(const AssistantFileSearch&) = delete;
+
+ AssistantFileSearch& operator=(const AssistantFileSearch&) = delete;
+
+ AssistantFileSearch(AssistantFileSearch&&) = default;
+
+ AssistantFileSearch& operator=(AssistantFileSearch&&) = default;
+
+ ~AssistantFileSearch() = default;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ try {
+ AssistantFileSearch search{
+ json["max_num_results"].asInt(),
+ FileSearchRankingOption::FromJson(json["ranking_options"]).value()};
+ return search;
+ } catch (const std::exception& e) {
+ return cpp::fail(std::string("FromJson failed: ") + e.what());
+ }
+ }
+
+ cpp::result ToJson() override {
+ Json::Value root;
+ root["max_num_results"] = max_num_results;
+ root["ranking_options"] = ranking_options.ToJson().value();
+ return root;
+ }
+};
+
+struct AssistantFileSearchTool : public AssistantTool {
+ AssistantFileSearch file_search;
+
+ AssistantFileSearchTool(AssistantFileSearch& file_search)
+ : AssistantTool("file_search"), file_search{std::move(file_search)} {}
+
+ AssistantFileSearchTool(const AssistantFileSearchTool&) = delete;
+
+ AssistantFileSearchTool& operator=(const AssistantFileSearchTool&) = delete;
+
+ AssistantFileSearchTool(AssistantFileSearchTool&&) = default;
+
+ AssistantFileSearchTool& operator=(AssistantFileSearchTool&&) = default;
+
+ ~AssistantFileSearchTool() = default;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ try {
+ AssistantFileSearch search{json["file_search"]["max_num_results"].asInt(),
+ FileSearchRankingOption::FromJson(
+ json["file_search"]["ranking_options"])
+ .value()};
+ AssistantFileSearchTool tool{search};
+ return tool;
+ } catch (const std::exception& e) {
+ return cpp::fail(std::string("FromJson failed: ") + e.what());
+ }
+ }
+
+ cpp::result ToJson() override {
+ try {
+ Json::Value root;
+ root["type"] = type;
+ root["file_search"] = file_search.ToJson().value();
+ return root;
+ } catch (const std::exception& e) {
+ return cpp::fail(std::string("ToJson failed: ") + e.what());
+ }
+ }
+};
+}; // namespace OpenAi
diff --git a/engine/common/assistant_function_tool.h b/engine/common/assistant_function_tool.h
new file mode 100644
index 000000000..7998cb8ff
--- /dev/null
+++ b/engine/common/assistant_function_tool.h
@@ -0,0 +1,130 @@
+#pragma once
+
+#include
+#include "common/assistant_tool.h"
+#include "common/json_serializable.h"
+
+namespace OpenAi {
+struct AssistantFunction : public JsonSerializable {
+ AssistantFunction(const std::string& description, const std::string& name,
+ const Json::Value& parameters,
+ const std::optional& strict)
+ : description{std::move(description)},
+ name{std::move(name)},
+ parameters{std::move(parameters)},
+ strict{strict} {}
+
+ AssistantFunction(const AssistantFunction&) = delete;
+
+ AssistantFunction& operator=(const AssistantFunction&) = delete;
+
+ AssistantFunction(AssistantFunction&&) = default;
+
+ AssistantFunction& operator=(AssistantFunction&&) = default;
+
+ ~AssistantFunction() = default;
+
+ /**
+ * A description of what the function does, used by the model to choose
+ * when and how to call the function.
+ */
+ std::string description;
+
+ /**
+ * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain
+ * underscores and dashes, with a maximum length of 64.
+ */
+ std::string name;
+
+ /**
+ * The parameters the functions accepts, described as a JSON Schema object.
+ * See the guide for examples, and the JSON Schema reference for documentation
+ * about the format.
+ *
+ * Omitting parameters defines a function with an empty parameter list.
+ */
+ Json::Value parameters;
+
+ /**
+ * Whether to enable strict schema adherence when generating the function call.
+ * If set to true, the model will follow the exact schema defined in the parameters
+ * field. Only a subset of JSON Schema is supported when strict is true.
+ *
+ * Learn more about Structured Outputs in the function calling guide.
+ */
+ std::optional strict;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ if (json.empty()) {
+ return cpp::fail("Function json can't be empty");
+ }
+
+ if (!json.isMember("name") || json.get("name", "").asString().empty()) {
+ return cpp::fail("Function name can't be empty");
+ }
+
+ if (!json.isMember("description")) {
+ return cpp::fail("Function description is mandatory");
+ }
+
+ if (!json.isMember("parameters")) {
+ return cpp::fail("Function parameters are mandatory");
+ }
+
+ std::optional is_strict = std::nullopt;
+ if (json.isMember("strict")) {
+ is_strict = json["strict"].asBool();
+ }
+ AssistantFunction function{json["description"].asString(),
+ json["name"].asString(), json["parameters"],
+ is_strict};
+ function.parameters = json["parameters"];
+ return function;
+ }
+
+ cpp::result ToJson() override {
+ Json::Value json;
+ json["description"] = description;
+ json["name"] = name;
+ if (strict.has_value()) {
+ json["strict"] = *strict;
+ }
+ json["parameters"] = parameters;
+ return json;
+ }
+};
+
+struct AssistantFunctionTool : public AssistantTool {
+ AssistantFunctionTool(AssistantFunction& function)
+ : AssistantTool("function"), function{std::move(function)} {}
+
+ AssistantFunctionTool(const AssistantFunctionTool&) = delete;
+
+ AssistantFunctionTool& operator=(const AssistantFunctionTool&) = delete;
+
+ AssistantFunctionTool(AssistantFunctionTool&&) = default;
+
+ AssistantFunctionTool& operator=(AssistantFunctionTool&&) = default;
+
+ ~AssistantFunctionTool() = default;
+
+ AssistantFunction function;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ auto function_res = AssistantFunction::FromJson(json["function"]);
+ if (function_res.has_error()) {
+ return cpp::fail("Failed to parse function: " + function_res.error());
+ }
+ return AssistantFunctionTool{function_res.value()};
+ }
+
+ cpp::result ToJson() override {
+ Json::Value root;
+ root["type"] = type;
+ root["function"] = function.ToJson().value();
+ return root;
+ }
+};
+}; // namespace OpenAi
diff --git a/engine/common/assistant_tool.h b/engine/common/assistant_tool.h
index 622721708..d02392392 100644
--- a/engine/common/assistant_tool.h
+++ b/engine/common/assistant_tool.h
@@ -1,91 +1,27 @@
#pragma once
-#include
#include
+#include "common/json_serializable.h"
namespace OpenAi {
-struct AssistantTool {
+struct AssistantTool : public JsonSerializable {
std::string type;
AssistantTool(const std::string& type) : type{type} {}
- virtual ~AssistantTool() = default;
-};
-
-struct AssistantCodeInterpreterTool : public AssistantTool {
- AssistantCodeInterpreterTool() : AssistantTool{"code_interpreter"} {}
-
- ~AssistantCodeInterpreterTool() = default;
-};
-
-struct AssistantFileSearchTool : public AssistantTool {
- AssistantFileSearchTool() : AssistantTool("file_search") {}
-
- ~AssistantFileSearchTool() = default;
+ AssistantTool(const AssistantTool&) = delete;
- /**
- * The ranking options for the file search. If not specified,
- * the file search tool will use the auto ranker and a score_threshold of 0.
- *
- * See the file search tool documentation for more information.
- */
- struct RankingOption {
- /**
- * The ranker to use for the file search. If not specified will use the auto ranker.
- */
- std::string ranker;
+ AssistantTool& operator=(const AssistantTool&) = delete;
- /**
- * The score threshold for the file search. All values must be a
- * floating point number between 0 and 1.
- */
- float score_threshold;
- };
+ AssistantTool(AssistantTool&& other) noexcept : type{std::move(other.type)} {}
- /**
- * Overrides for the file search tool.
- */
- struct FileSearch {
- /**
- * The maximum number of results the file search tool should output.
- * The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo.
- * This number should be between 1 and 50 inclusive.
- *
- * Note that the file search tool may output fewer than max_num_results results.
- * See the file search tool documentation for more information.
- */
- int max_num_result;
- };
-};
-
-struct AssistantFunctionTool : public AssistantTool {
- AssistantFunctionTool() : AssistantTool("function") {}
-
- ~AssistantFunctionTool() = default;
-
- struct Function {
- /**
- * A description of what the function does, used by the model to choose
- * when and how to call the function.
- */
- std::string description;
+ AssistantTool& operator=(AssistantTool&& other) noexcept {
+ if (this != &other) {
+ type = std::move(other.type);
+ }
+ return *this;
+ }
- /**
- * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain
- * underscores and dashes, with a maximum length of 64.
- */
- std::string name;
-
- // TODO: namh handle parameters
-
- /**
- * Whether to enable strict schema adherence when generating the function call.
- * If set to true, the model will follow the exact schema defined in the parameters
- * field. Only a subset of JSON Schema is supported when strict is true.
- *
- * Learn more about Structured Outputs in the function calling guide.
- */
- std::optional strict;
- };
+ virtual ~AssistantTool() = default;
};
} // namespace OpenAi
diff --git a/engine/common/base.h b/engine/common/base.h
index 478cc7feb..b5de09059 100644
--- a/engine/common/base.h
+++ b/engine/common/base.h
@@ -5,7 +5,7 @@ using namespace drogon;
class BaseModel {
public:
- virtual ~BaseModel() {}
+ virtual ~BaseModel() = default;
// Model management
virtual void LoadModel(
@@ -27,7 +27,7 @@ class BaseModel {
class BaseChatCompletion {
public:
- virtual ~BaseChatCompletion() {}
+ virtual ~BaseChatCompletion() = default;
// General chat method
virtual void ChatCompletion(
@@ -37,7 +37,7 @@ class BaseChatCompletion {
class BaseEmbedding {
public:
- virtual ~BaseEmbedding() {}
+ virtual ~BaseEmbedding() = default;
// Implement embedding functionality specific to chat
virtual void Embedding(
@@ -46,3 +46,4 @@ class BaseEmbedding {
// The derived class can also override other methods if needed
};
+
diff --git a/engine/common/download_task.h b/engine/common/download_task.h
index 95e736394..53f1902c5 100644
--- a/engine/common/download_task.h
+++ b/engine/common/download_task.h
@@ -6,7 +6,14 @@
#include
#include
-enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit, Cortex };
+enum class DownloadType {
+ Model,
+ Engine,
+ Miscellaneous,
+ CudaToolkit,
+ Cortex,
+ Environments
+};
struct DownloadItem {
@@ -48,6 +55,8 @@ inline std::string DownloadTypeToString(DownloadType type) {
return "CudaToolkit";
case DownloadType::Cortex:
return "Cortex";
+ case DownloadType::Environments:
+ return "Environments";
default:
return "Unknown";
}
@@ -64,6 +73,8 @@ inline DownloadType DownloadTypeFromString(const std::string& str) {
return DownloadType::CudaToolkit;
} else if (str == "Cortex") {
return DownloadType::Cortex;
+ } else if (str == "Environments") {
+ return DownloadType::Environments;
} else {
return DownloadType::Miscellaneous;
}
diff --git a/engine/common/dto/assistant_create_dto.h b/engine/common/dto/assistant_create_dto.h
new file mode 100644
index 000000000..19d79b833
--- /dev/null
+++ b/engine/common/dto/assistant_create_dto.h
@@ -0,0 +1,211 @@
+#pragma once
+
+#include
+#include
+#include "common/assistant_code_interpreter_tool.h"
+#include "common/assistant_file_search_tool.h"
+#include "common/assistant_function_tool.h"
+#include "common/assistant_tool.h"
+#include "common/dto/base_dto.h"
+#include "common/tool_resources.h"
+#include "common/variant_map.h"
+#include "utils/logging_utils.h"
+
+namespace dto {
+struct CreateAssistantDto : public BaseDto {
+ CreateAssistantDto() = default;
+
+ ~CreateAssistantDto() = default;
+
+ CreateAssistantDto(const CreateAssistantDto&) = delete;
+
+ CreateAssistantDto& operator=(const CreateAssistantDto&) = delete;
+
+ CreateAssistantDto(CreateAssistantDto&& other) noexcept
+ : model{std::move(other.model)},
+ name{std::move(other.name)},
+ description{std::move(other.description)},
+ instructions{std::move(other.instructions)},
+ tools{std::move(other.tools)},
+ tool_resources{std::move(other.tool_resources)},
+ metadata{std::move(other.metadata)},
+ temperature{std::move(other.temperature)},
+ top_p{std::move(other.top_p)},
+ response_format{std::move(other.response_format)} {}
+
+ CreateAssistantDto& operator=(CreateAssistantDto&& other) noexcept {
+ if (this != &other) {
+ model = std::move(other.model);
+ name = std::move(other.name);
+ description = std::move(other.description);
+ instructions = std::move(other.instructions);
+ tools = std::move(other.tools);
+ tool_resources = std::move(other.tool_resources),
+ metadata = std::move(other.metadata);
+ temperature = std::move(other.temperature);
+ top_p = std::move(other.top_p);
+ response_format = std::move(other.response_format);
+ }
+ return *this;
+ }
+
+ std::string model;
+
+ std::optional name;
+
+ std::optional description;
+
+ std::optional instructions;
+
+ /**
+ * A list of tool enabled on the assistant. There can be a maximum of
+ * 128 tools per assistant. Tools can be of types code_interpreter,
+ * file_search, or function.
+ */
+ std::vector> tools;
+
+ /**
+ * A set of resources that are used by the assistant's tools. The resources
+ * are specific to the type of tool. For example, the code_interpreter tool
+ * requires a list of file IDs, while the file_search tool requires a list
+ * of vector store IDs.
+ */
+ std::unique_ptr tool_resources;
+
+ std::optional metadata;
+
+ std::optional temperature;
+
+ std::optional top_p;
+
+ std::optional> response_format;
+
+ cpp::result Validate() const override {
+ if (model.empty()) {
+ return cpp::fail("Model is mandatory");
+ }
+
+ if (response_format.has_value()) {
+ const auto& variant_value = response_format.value();
+ if (std::holds_alternative(variant_value)) {
+ if (std::get(variant_value) != "auto") {
+ return cpp::fail("Invalid response_format");
+ }
+ }
+ }
+
+ return {};
+ }
+
+ static CreateAssistantDto FromJson(Json::Value&& root) {
+ if (root.empty()) {
+ throw std::runtime_error("Json passed in FromJson can't be empty");
+ }
+ CreateAssistantDto dto;
+ dto.model = std::move(root["model"].asString());
+ if (root.isMember("name")) {
+ dto.name = std::move(root["name"].asString());
+ }
+ if (root.isMember("description")) {
+ dto.description = std::move(root["description"].asString());
+ }
+ if (root.isMember("instructions")) {
+ dto.instructions = std::move(root["instructions"].asString());
+ }
+ if (root["metadata"].isObject() && !root["metadata"].empty()) {
+ auto res = Cortex::ConvertJsonValueToMap(root["metadata"]);
+ if (res.has_error()) {
+ CTL_WRN("Failed to convert metadata to map: " + res.error());
+ } else {
+ dto.metadata = std::move(res.value());
+ }
+ }
+ if (root.isMember("temperature")) {
+ dto.temperature = root["temperature"].asFloat();
+ }
+ if (root.isMember("top_p")) {
+ dto.top_p = root["top_p"].asFloat();
+ }
+ if (root.isMember("tools") && root["tools"].isArray()) {
+ auto tools_array = root["tools"];
+ for (const auto& tool : tools_array) {
+ if (!tool.isMember("type") || !tool["type"].isString()) {
+ CTL_WRN("Tool missing type field or invalid type");
+ continue;
+ }
+
+ std::string tool_type = tool["type"].asString();
+ if (tool_type == "file_search") {
+ auto result = OpenAi::AssistantFileSearchTool::FromJson(tool);
+ if (result.has_value()) {
+ dto.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse file_search tool: " + result.error());
+ }
+ } else if (tool_type == "code_interpreter") {
+ auto result = OpenAi::AssistantCodeInterpreterTool::FromJson();
+ if (result.has_value()) {
+ dto.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter tool: " + result.error());
+ }
+ } else if (tool_type == "function") {
+ auto result = OpenAi::AssistantFunctionTool::FromJson(tool);
+ if (result.has_value()) {
+ dto.tools.push_back(std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse function tool: " + result.error());
+ }
+ } else {
+ CTL_WRN("Unknown tool type: " + tool_type);
+ }
+ }
+ }
+ if (root.isMember("tool_resources") && root["tool_resources"].isObject()) {
+ const auto& tool_resources_json = root["tool_resources"];
+
+ // Parse code interpreter resources
+ if (tool_resources_json.isMember("code_interpreter")) {
+ auto result = OpenAi::CodeInterpreter::FromJson(
+ tool_resources_json["code_interpreter"]);
+ if (result.has_value()) {
+ dto.tool_resources = std::make_unique(
+ std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter resources: " +
+ result.error());
+ }
+ }
+
+ // Parse file search resources
+ if (tool_resources_json.isMember("file_search")) {
+ auto result =
+ OpenAi::FileSearch::FromJson(tool_resources_json["file_search"]);
+ if (result.has_value()) {
+ dto.tool_resources =
+ std::make_unique(std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse file_search resources: " + result.error());
+ }
+ }
+ }
+ if (root.isMember("response_format")) {
+ const auto& response_format = root["response_format"];
+ if (response_format.isString()) {
+ dto.response_format = response_format.asString();
+ } else if (response_format.isObject()) {
+ dto.response_format = response_format;
+ } else {
+ throw std::runtime_error(
+ "response_format must be either a string or an object");
+ }
+ }
+ return dto;
+ }
+};
+} // namespace dto
diff --git a/engine/common/dto/assistant_update_dto.h b/engine/common/dto/assistant_update_dto.h
new file mode 100644
index 000000000..01e5844d7
--- /dev/null
+++ b/engine/common/dto/assistant_update_dto.h
@@ -0,0 +1,201 @@
+#pragma once
+
+#include "common/assistant_code_interpreter_tool.h"
+#include "common/assistant_file_search_tool.h"
+#include "common/assistant_function_tool.h"
+#include "common/dto/base_dto.h"
+#include "common/tool_resources.h"
+#include "common/variant_map.h"
+#include "utils/logging_utils.h"
+
+namespace dto {
+struct UpdateAssistantDto : public BaseDto {
+ UpdateAssistantDto() = default;
+
+ ~UpdateAssistantDto() = default;
+
+ UpdateAssistantDto(const UpdateAssistantDto&) = delete;
+
+ UpdateAssistantDto& operator=(const UpdateAssistantDto&) = delete;
+
+ UpdateAssistantDto(UpdateAssistantDto&& other) noexcept
+ : model{std::move(other.model)},
+ name{std::move(other.name)},
+ description{std::move(other.description)},
+ instructions{std::move(other.instructions)},
+ tools{std::move(other.tools)},
+ tool_resources{std::move(other.tool_resources)},
+ metadata{std::move(other.metadata)},
+ temperature{std::move(other.temperature)},
+ top_p{std::move(other.top_p)},
+ response_format{std::move(other.response_format)} {}
+
+ UpdateAssistantDto& operator=(UpdateAssistantDto&& other) noexcept {
+ if (this != &other) {
+ model = std::move(other.model);
+ name = std::move(other.name);
+ description = std::move(other.description);
+ instructions = std::move(other.instructions);
+ tools = std::move(other.tools);
+ tool_resources = std::move(other.tool_resources),
+ metadata = std::move(other.metadata);
+ temperature = std::move(other.temperature);
+ top_p = std::move(other.top_p);
+ response_format = std::move(other.response_format);
+ }
+ return *this;
+ }
+ std::optional model;
+
+ std::optional name;
+
+ std::optional description;
+
+ std::optional instructions;
+
+ /**
+ * A list of tool enabled on the assistant. There can be a maximum of
+ * 128 tools per assistant. Tools can be of types code_interpreter,
+ * file_search, or function.
+ */
+ std::vector> tools;
+
+ /**
+ * A set of resources that are used by the assistant's tools. The resources
+ * are specific to the type of tool. For example, the code_interpreter tool
+ * requires a list of file IDs, while the file_search tool requires a list
+ * of vector store IDs.
+ */
+ std::unique_ptr tool_resources;
+
+ std::optional metadata;
+
+ std::optional temperature;
+
+ std::optional top_p;
+
+ std::optional> response_format;
+
+ cpp::result Validate() const override {
+ if (!model.has_value() && !name.has_value() && !description.has_value() &&
+ !instructions.has_value() && !metadata.has_value() &&
+ !temperature.has_value() && !top_p.has_value() &&
+ !response_format.has_value()) {
+ return cpp::fail("At least one field must be provided");
+ }
+
+ return {};
+ }
+
+ static UpdateAssistantDto FromJson(Json::Value&& root) {
+ if (root.empty()) {
+ throw std::runtime_error("Json passed in FromJson can't be empty");
+ }
+ UpdateAssistantDto dto;
+ dto.model = std::move(root["model"].asString());
+ if (root.isMember("name")) {
+ dto.name = std::move(root["name"].asString());
+ }
+ if (root.isMember("description")) {
+ dto.description = std::move(root["description"].asString());
+ }
+ if (root.isMember("instruction")) {
+ dto.instructions = std::move(root["instruction"].asString());
+ }
+ if (root["metadata"].isObject() && !root["metadata"].empty()) {
+ auto res = Cortex::ConvertJsonValueToMap(root["metadata"]);
+ if (res.has_error()) {
+ CTL_WRN("Failed to convert metadata to map: " + res.error());
+ } else {
+ dto.metadata = std::move(res.value());
+ }
+ }
+ if (root.isMember("temperature")) {
+ dto.temperature = root["temperature"].asFloat();
+ }
+ if (root.isMember("top_p")) {
+ dto.top_p = root["top_p"].asFloat();
+ }
+ if (root.isMember("tools") && root["tools"].isArray()) {
+ auto tools_array = root["tools"];
+ for (const auto& tool : tools_array) {
+ if (!tool.isMember("type") || !tool["type"].isString()) {
+ CTL_WRN("Tool missing type field or invalid type");
+ continue;
+ }
+
+ std::string tool_type = tool["type"].asString();
+ if (tool_type == "file_search") {
+ auto result = OpenAi::AssistantFileSearchTool::FromJson(tool);
+ if (result.has_value()) {
+ dto.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse file_search tool: " + result.error());
+ }
+ } else if (tool_type == "code_interpreter") {
+ auto result = OpenAi::AssistantCodeInterpreterTool::FromJson();
+ if (result.has_value()) {
+ dto.tools.push_back(
+ std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter tool: " + result.error());
+ }
+ } else if (tool_type == "function") {
+ auto result = OpenAi::AssistantFunctionTool::FromJson(tool);
+ if (result.has_value()) {
+ dto.tools.push_back(std::make_unique(
+ std::move(result.value())));
+ } else {
+ CTL_WRN("Failed to parse function tool: " + result.error());
+ }
+ } else {
+ CTL_WRN("Unknown tool type: " + tool_type);
+ }
+ }
+ }
+ if (root.isMember("tool_resources") && root["tool_resources"].isObject()) {
+ const auto& tool_resources_json = root["tool_resources"];
+
+ // Parse code interpreter resources
+ if (tool_resources_json.isMember("code_interpreter")) {
+ auto result = OpenAi::CodeInterpreter::FromJson(
+ tool_resources_json["code_interpreter"]);
+ if (result.has_value()) {
+ dto.tool_resources = std::make_unique(
+ std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse code_interpreter resources: " +
+ result.error());
+ }
+ }
+
+ // Parse file search resources
+ if (tool_resources_json.isMember("file_search")) {
+ auto result =
+ OpenAi::FileSearch::FromJson(tool_resources_json["file_search"]);
+ if (result.has_value()) {
+ dto.tool_resources =
+ std::make_unique(std::move(result.value()));
+ } else {
+ CTL_WRN("Failed to parse file_search resources: " + result.error());
+ }
+ }
+ }
+ if (root.isMember("response_format")) {
+ const auto& response_format = root["response_format"];
+ if (response_format.isString()) {
+ dto.response_format = response_format.asString();
+ } else if (response_format.isObject()) {
+ dto.response_format = response_format;
+ } else {
+ throw std::runtime_error(
+ "response_format must be either a string or an object");
+ }
+ }
+ return dto;
+ };
+};
+} // namespace dto
diff --git a/engine/common/dto/base_dto.h b/engine/common/dto/base_dto.h
new file mode 100644
index 000000000..ed7460aa3
--- /dev/null
+++ b/engine/common/dto/base_dto.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include
+#include "utils/result.hpp"
+
+namespace dto {
+template
+struct BaseDto {
+ virtual ~BaseDto() = default;
+
+ /**
+ * Validate itself.
+ */
+ virtual cpp::result Validate() const = 0;
+};
+} // namespace dto
diff --git a/engine/common/engine_servicei.h b/engine/common/engine_servicei.h
index a4b0c8732..ceb9b2fec 100644
--- a/engine/common/engine_servicei.h
+++ b/engine/common/engine_servicei.h
@@ -25,12 +25,14 @@ struct EngineVariantResponse {
std::string name;
std::string version;
std::string engine;
+ std::string type;
Json::Value ToJson() const {
Json::Value root;
root["name"] = name;
root["version"] = version;
root["engine"] = engine;
+ root["type"] = type.empty() ? "local" : type;
return root;
}
};
@@ -57,7 +59,7 @@ class EngineServiceI {
virtual cpp::result
GetEngineByNameAndVariant(
const std::string& engine_name,
- const std::optional variant = std::nullopt) = 0;
-
- virtual bool IsRemoteEngine(const std::string& engine_name) = 0;
+ const std::optional variant = std::nullopt) const = 0;
+
+ virtual bool IsRemoteEngine(const std::string& engine_name) const = 0;
};
diff --git a/engine/common/message_attachment.h b/engine/common/message_attachment.h
index 767ec9bea..6a0fb02e9 100644
--- a/engine/common/message_attachment.h
+++ b/engine/common/message_attachment.h
@@ -4,22 +4,27 @@
#include "common/json_serializable.h"
namespace OpenAi {
-
// The tools to add this file to.
struct Tool {
std::string type;
Tool(const std::string& type) : type{type} {}
+
+ virtual ~Tool() = default;
};
// The type of tool being defined: code_interpreter
-struct CodeInterpreter : Tool {
- CodeInterpreter() : Tool{"code_interpreter"} {}
+struct MessageCodeInterpreter : Tool {
+ MessageCodeInterpreter() : Tool{"code_interpreter"} {}
+
+ ~MessageCodeInterpreter() = default;
};
// The type of tool being defined: file_search
-struct FileSearch : Tool {
- FileSearch() : Tool{"file_search"} {}
+struct MessageFileSearch : Tool {
+ MessageFileSearch() : Tool{"file_search"} {}
+
+ ~MessageFileSearch() = default;
};
// A list of files attached to the message, and the tools they were added to.
diff --git a/engine/common/model_metadata.h b/engine/common/model_metadata.h
new file mode 100644
index 000000000..af733665b
--- /dev/null
+++ b/engine/common/model_metadata.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include
+#include "common/tokenizer.h"
+
+struct ModelMetadata {
+ uint32_t version;
+ uint64_t tensor_count;
+ uint64_t metadata_kv_count;
+ std::shared_ptr tokenizer;
+
+ std::string ToString() const {
+ std::ostringstream ss;
+ ss << "ModelMetadata {\n"
+ << "version: " << version << "\n"
+ << "tensor_count: " << tensor_count << "\n"
+ << "metadata_kv_count: " << metadata_kv_count << "\n"
+ << "tokenizer: ";
+
+ if (tokenizer) {
+ ss << "\n" << tokenizer->ToString();
+ } else {
+ ss << "null";
+ }
+
+ ss << "\n}";
+ return ss.str();
+ }
+};
diff --git a/engine/common/repository/assistant_repository.h b/engine/common/repository/assistant_repository.h
new file mode 100644
index 000000000..d0ff1908d
--- /dev/null
+++ b/engine/common/repository/assistant_repository.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "common/assistant.h"
+#include "utils/result.hpp"
+
+class AssistantRepository {
+ public:
+ virtual cpp::result, std::string>
+ ListAssistants(uint8_t limit, const std::string& order,
+ const std::string& after, const std::string& before) const = 0;
+
+ virtual cpp::result CreateAssistant(
+ OpenAi::Assistant& assistant) = 0;
+
+ virtual cpp::result RetrieveAssistant(
+ const std::string assistant_id) const = 0;
+
+ virtual cpp::result ModifyAssistant(
+ OpenAi::Assistant& assistant) = 0;
+
+ virtual cpp::result DeleteAssistant(
+ const std::string& assitant_id) = 0;
+
+ virtual ~AssistantRepository() = default;
+};
diff --git a/engine/common/thread.h b/engine/common/thread.h
index 2bd5d866b..dc57ba32d 100644
--- a/engine/common/thread.h
+++ b/engine/common/thread.h
@@ -4,7 +4,7 @@
#include
#include
#include "common/assistant.h"
-#include "common/thread_tool_resources.h"
+#include "common/tool_resources.h"
#include "common/variant_map.h"
#include "json_serializable.h"
#include "utils/logging_utils.h"
@@ -36,7 +36,7 @@ struct Thread : JsonSerializable {
* of tool. For example, the code_interpreter tool requires a list of
* file IDs, while the file_search tool requires a list of vector store IDs.
*/
- std::unique_ptr tool_resources;
+ std::unique_ptr tool_resources;
/**
* Set of 16 key-value pairs that can be attached to an object.
@@ -65,7 +65,7 @@ struct Thread : JsonSerializable {
const auto& tool_json = json["tool_resources"];
if (tool_json.isMember("code_interpreter")) {
- auto code_interpreter = std::make_unique();
+ auto code_interpreter = std::make_unique();
const auto& file_ids = tool_json["code_interpreter"]["file_ids"];
if (file_ids.isArray()) {
for (const auto& file_id : file_ids) {
@@ -74,7 +74,7 @@ struct Thread : JsonSerializable {
}
thread.tool_resources = std::move(code_interpreter);
} else if (tool_json.isMember("file_search")) {
- auto file_search = std::make_unique();
+ auto file_search = std::make_unique();
const auto& store_ids = tool_json["file_search"]["vector_store_ids"];
if (store_ids.isArray()) {
for (const auto& store_id : store_ids) {
@@ -148,10 +148,10 @@ struct Thread : JsonSerializable {
Json::Value tool_json;
if (auto code_interpreter =
- dynamic_cast(tool_resources.get())) {
+ dynamic_cast(tool_resources.get())) {
tool_json["code_interpreter"] = tool_result.value();
} else if (auto file_search =
- dynamic_cast(tool_resources.get())) {
+ dynamic_cast(tool_resources.get())) {
tool_json["file_search"] = tool_result.value();
}
json["tool_resources"] = tool_json;
diff --git a/engine/common/thread_tool_resources.h b/engine/common/thread_tool_resources.h
deleted file mode 100644
index 3c22a4480..000000000
--- a/engine/common/thread_tool_resources.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#pragma once
-
-#include
-#include
-#include "common/json_serializable.h"
-
-namespace OpenAi {
-
-struct ThreadToolResources : JsonSerializable {
- ~ThreadToolResources() = default;
-
- virtual cpp::result ToJson() override = 0;
-};
-
-struct ThreadCodeInterpreter : ThreadToolResources {
- std::vector file_ids;
-
- cpp::result ToJson() override {
- try {
- Json::Value json;
- Json::Value file_ids_json{Json::arrayValue};
- for (auto& file_id : file_ids) {
- file_ids_json.append(file_id);
- }
- json["file_ids"] = file_ids_json;
- return json;
- } catch (const std::exception& e) {
- return cpp::fail(std::string("ToJson failed: ") + e.what());
- }
- }
-};
-
-struct ThreadFileSearch : ThreadToolResources {
- std::vector vector_store_ids;
-
- cpp::result ToJson() override {
- try {
- Json::Value json;
- Json::Value vector_store_ids_json{Json::arrayValue};
- for (auto& vector_store_id : vector_store_ids) {
- vector_store_ids_json.append(vector_store_id);
- }
- json["vector_store_ids"] = vector_store_ids_json;
- return json;
- } catch (const std::exception& e) {
- return cpp::fail(std::string("ToJson failed: ") + e.what());
- }
- }
-};
-} // namespace OpenAi
diff --git a/engine/common/tokenizer.h b/engine/common/tokenizer.h
new file mode 100644
index 000000000..33367f06b
--- /dev/null
+++ b/engine/common/tokenizer.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include
+#include
+
+struct Tokenizer {
+ std::string eos_token = "";
+ bool add_eos_token = true;
+
+ std::string bos_token = "";
+ bool add_bos_token = true;
+
+ std::string unknown_token = "";
+ std::string padding_token = "";
+
+ std::string chat_template = "";
+
+ bool add_generation_prompt = true;
+
+ // Helper function for common fields
+ std::string BaseToString() const {
+ std::ostringstream ss;
+ ss << "eos_token: \"" << eos_token << "\"\n"
+ << "add_eos_token: " << (add_eos_token ? "true" : "false") << "\n"
+ << "bos_token: \"" << bos_token << "\"\n"
+ << "add_bos_token: " << (add_bos_token ? "true" : "false") << "\n"
+ << "unknown_token: \"" << unknown_token << "\"\n"
+ << "padding_token: \"" << padding_token << "\"\n"
+ << "chat_template: \"" << chat_template << "\"\n"
+ << "add_generation_prompt: "
+ << (add_generation_prompt ? "true" : "false") << "\"";
+ return ss.str();
+ }
+
+ virtual ~Tokenizer() = default;
+
+ virtual std::string ToString() = 0;
+};
+
+struct GgufTokenizer : public Tokenizer {
+ std::string pre = "";
+
+ ~GgufTokenizer() override = default;
+
+ std::string ToString() override {
+ std::ostringstream ss;
+ ss << "GgufTokenizer {\n";
+ // Add base class members
+ ss << BaseToString() << "\n";
+ // Add derived class members
+ ss << "pre: \"" << pre << "\"\n";
+ ss << "}";
+ return ss.str();
+ }
+};
+
+struct SafeTensorTokenizer : public Tokenizer {
+ bool add_prefix_space = true;
+
+ ~SafeTensorTokenizer() = default;
+
+ std::string ToString() override {
+ std::ostringstream ss;
+ ss << "SafeTensorTokenizer {\n";
+ // Add base class members
+ ss << BaseToString() << "\n";
+ // Add derived class members
+ ss << "add_prefix_space: " << (add_prefix_space ? "true" : "false") << "\n";
+ ss << "}";
+ return ss.str();
+ }
+};
diff --git a/engine/common/tool_resources.h b/engine/common/tool_resources.h
new file mode 100644
index 000000000..5aadb3f8b
--- /dev/null
+++ b/engine/common/tool_resources.h
@@ -0,0 +1,114 @@
+#pragma once
+
+#include
+#include
+#include "common/json_serializable.h"
+
+namespace OpenAi {
+
+struct ToolResources : JsonSerializable {
+ ToolResources() = default;
+
+ ToolResources(const ToolResources&) = delete;
+
+ ToolResources& operator=(const ToolResources&) = delete;
+
+ ToolResources(ToolResources&&) noexcept = default;
+
+ ToolResources& operator=(ToolResources&&) noexcept = default;
+
+ virtual ~ToolResources() = default;
+
+ virtual cpp::result ToJson() override = 0;
+};
+
+struct CodeInterpreter : ToolResources {
+ CodeInterpreter() = default;
+
+ ~CodeInterpreter() override = default;
+
+ CodeInterpreter(const CodeInterpreter&) = delete;
+
+ CodeInterpreter& operator=(const CodeInterpreter&) = delete;
+
+ CodeInterpreter(CodeInterpreter&& other) noexcept
+ : ToolResources(std::move(other)), file_ids(std::move(other.file_ids)) {}
+
+ CodeInterpreter& operator=(CodeInterpreter&& other) noexcept {
+ if (this != &other) {
+ ToolResources::operator=(std::move(other));
+ file_ids = std::move(other.file_ids);
+ }
+ return *this;
+ }
+
+ std::vector file_ids;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ CodeInterpreter code_interpreter;
+ if (json.isMember("file_ids")) {
+ for (const auto& file_id : json["file_ids"]) {
+ code_interpreter.file_ids.push_back(file_id.asString());
+ }
+ }
+ return code_interpreter;
+ }
+
+ cpp::result ToJson() override {
+ Json::Value json;
+ Json::Value file_ids_json{Json::arrayValue};
+ for (auto& file_id : file_ids) {
+ file_ids_json.append(file_id);
+ }
+ json["file_ids"] = file_ids_json;
+ return json;
+ }
+};
+
+struct FileSearch : ToolResources {
+ FileSearch() = default;
+
+ ~FileSearch() override = default;
+
+ FileSearch(const FileSearch&) = delete;
+
+ FileSearch& operator=(const FileSearch&) = delete;
+
+ FileSearch(FileSearch&& other) noexcept
+ : ToolResources(std::move(other)),
+ vector_store_ids{std::move(other.vector_store_ids)} {}
+
+ FileSearch& operator=(FileSearch&& other) noexcept {
+ if (this != &other) {
+ ToolResources::operator=(std::move(other));
+
+ vector_store_ids = std::move(other.vector_store_ids);
+ }
+ return *this;
+ }
+
+ std::vector vector_store_ids;
+
+ static cpp::result FromJson(
+ const Json::Value& json) {
+ FileSearch file_search;
+ if (json.isMember("vector_store_ids")) {
+ for (const auto& vector_store_id : json["vector_store_ids"]) {
+ file_search.vector_store_ids.push_back(vector_store_id.asString());
+ }
+ }
+ return file_search;
+ }
+
+ cpp::result ToJson() override {
+ Json::Value json;
+ Json::Value vector_store_ids_json{Json::arrayValue};
+ for (auto& vector_store_id : vector_store_ids) {
+ vector_store_ids_json.append(vector_store_id);
+ }
+ json["vector_store_ids"] = vector_store_ids_json;
+ return json;
+ }
+};
+} // namespace OpenAi
diff --git a/engine/config/model_config.h b/engine/config/model_config.h
index a799adb27..1d51cfb01 100644
--- a/engine/config/model_config.h
+++ b/engine/config/model_config.h
@@ -1,14 +1,16 @@
#pragma once
#include
-#include
+#include
#include
+#include
+
+#include
#include
#include
#include
#include
#include
-#include "config/remote_template.h"
#include "utils/format_utils.h"
#include "utils/remote_models_utils.h"
@@ -16,15 +18,15 @@ namespace config {
struct RemoteModelConfig {
std::string model;
- std::string api_key_template;
+ std::string header_template;
std::string engine;
std::string version;
- std::size_t created;
+ size_t created;
std::string object = "model";
std::string owned_by = "";
Json::Value inference_params;
- Json::Value TransformReq;
- Json::Value TransformResp;
+ Json::Value transform_req;
+ Json::Value transform_resp;
Json::Value metadata;
void LoadFromJson(const Json::Value& json) {
if (!json.isObject()) {
@@ -33,8 +35,8 @@ struct RemoteModelConfig {
// Load basic string fields
model = json.get("model", model).asString();
- api_key_template =
- json.get("api_key_template", api_key_template).asString();
+ header_template =
+ json.get("header_template", header_template).asString();
engine = json.get("engine", engine).asString();
version = json.get("version", version).asString();
created =
@@ -44,31 +46,8 @@ struct RemoteModelConfig {
// Load JSON object fields directly
inference_params = json.get("inference_params", inference_params);
- TransformReq = json.get("TransformReq", TransformReq);
- // Use default template if it is empty, currently we only support 2 remote engines
- auto is_anthropic = [](const std::string& model) {
- return model.find("claude") != std::string::npos;
- };
- if (TransformReq["chat_completions"]["template"].isNull()) {
- if (is_anthropic(model)) {
- TransformReq["chat_completions"]["template"] =
- kAnthropicTransformReqTemplate;
- } else {
- TransformReq["chat_completions"]["template"] =
- kOpenAITransformReqTemplate;
- }
- }
- TransformResp = json.get("TransformResp", TransformResp);
- if (TransformResp["chat_completions"]["template"].isNull()) {
- if (is_anthropic(model)) {
- TransformResp["chat_completions"]["template"] =
- kAnthropicTransformRespTemplate;
- } else {
- TransformResp["chat_completions"]["template"] =
- kOpenAITransformRespTemplate;
- }
- }
-
+ transform_req = json.get("transform_req", transform_req);
+ transform_resp = json.get("transform_resp", transform_resp);
metadata = json.get("metadata", metadata);
}
@@ -77,7 +56,7 @@ struct RemoteModelConfig {
// Add basic string fields
json["model"] = model;
- json["api_key_template"] = api_key_template;
+ json["header_template"] = header_template;
json["engine"] = engine;
json["version"] = version;
json["created"] = static_cast(created);
@@ -86,8 +65,8 @@ struct RemoteModelConfig {
// Add JSON object fields directly
json["inference_params"] = inference_params;
- json["TransformReq"] = TransformReq;
- json["TransformResp"] = TransformResp;
+ json["transform_req"] = transform_req;
+ json["transform_resp"] = transform_resp;
json["metadata"] = metadata;
return json;
@@ -98,7 +77,7 @@ struct RemoteModelConfig {
// Convert basic fields
root["model"] = model;
- root["api_key_template"] = api_key_template;
+ root["header_template"] = header_template;
root["engine"] = engine;
root["version"] = version;
root["object"] = object;
@@ -108,8 +87,8 @@ struct RemoteModelConfig {
// Convert Json::Value to YAML::Node using utility function
root["inference_params"] =
remote_models_utils::jsonToYaml(inference_params);
- root["TransformReq"] = remote_models_utils::jsonToYaml(TransformReq);
- root["TransformResp"] = remote_models_utils::jsonToYaml(TransformResp);
+ root["transform_req"] = remote_models_utils::jsonToYaml(transform_req);
+ root["transform_resp"] = remote_models_utils::jsonToYaml(transform_resp);
root["metadata"] = remote_models_utils::jsonToYaml(metadata);
// Save to file
@@ -131,7 +110,7 @@ struct RemoteModelConfig {
// Load basic fields
model = root["model"].as("");
- api_key_template = root["api_key_template"].as("");
+ header_template = root["header_template"].as("");
engine = root["engine"].as("");
version = root["version"] ? root["version"].as() : "";
created = root["created"] ? root["created"].as() : 0;
@@ -141,8 +120,8 @@ struct RemoteModelConfig {
// Load complex fields using utility function
inference_params =
remote_models_utils::yamlToJson(root["inference_params"]);
- TransformReq = remote_models_utils::yamlToJson(root["TransformReq"]);
- TransformResp = remote_models_utils::yamlToJson(root["TransformResp"]);
+ transform_req = remote_models_utils::yamlToJson(root["transform_req"]);
+ transform_resp = remote_models_utils::yamlToJson(root["transform_resp"]);
metadata = remote_models_utils::yamlToJson(root["metadata"]);
}
};
@@ -161,6 +140,7 @@ struct ModelConfig {
int ngl = std::numeric_limits::quiet_NaN();
int ctx_len = std::numeric_limits::quiet_NaN();
int n_parallel = 1;
+ int cpu_threads = -1;
std::string engine;
std::string prompt_template;
std::string system_template;
@@ -269,6 +249,8 @@ struct ModelConfig {
ctx_len = json["ctx_len"].asInt();
if (json.isMember("n_parallel"))
n_parallel = json["n_parallel"].asInt();
+ if (json.isMember("cpu_threads"))
+ cpu_threads = json["cpu_threads"].asInt();
if (json.isMember("engine"))
engine = json["engine"].asString();
if (json.isMember("prompt_template"))
@@ -359,6 +341,9 @@ struct ModelConfig {
obj["ngl"] = ngl;
obj["ctx_len"] = ctx_len;
obj["n_parallel"] = n_parallel;
+ if (cpu_threads > 0) {
+ obj["cpu_threads"] = cpu_threads;
+ }
obj["engine"] = engine;
obj["prompt_template"] = prompt_template;
obj["system_template"] = system_template;
@@ -471,6 +456,8 @@ struct ModelConfig {
format_utils::MAGENTA);
oss << format_utils::print_kv("n_parallel", std::to_string(n_parallel),
format_utils::MAGENTA);
+ oss << format_utils::print_kv("cpu_threads", std::to_string(cpu_threads),
+ format_utils::MAGENTA);
if (ngl != std::numeric_limits::quiet_NaN())
oss << format_utils::print_kv("ngl", std::to_string(ngl),
format_utils::MAGENTA);
@@ -482,4 +469,340 @@ struct ModelConfig {
}
};
+struct Endpoint {
+ std::string method;
+ std::string path;
+ std::string transform_request;
+ std::string transform_response;
+};
+
+struct PythonModelConfig {
+ // General Metadata
+ std::string id;
+ std::string model;
+ std::string name;
+ int version;
+
+ // Inference Parameters
+ Endpoint load_model;
+ Endpoint destroy;
+ Endpoint inference;
+ Endpoint heath_check;
+ std::vector extra_endpoints;
+
+ // Model Load Parameters
+ std::string port;
+ std::string script;
+ std::string log_path;
+ std::string log_level;
+ std::string environment;
+ std::vector command; // New command field
+ std::vector files;
+ std::vector depends;
+ std::string engine;
+ Json::Value extra_params; // Accept dynamic extra parameters
+
+ // Method to convert C++ struct to YAML
+ void ToYaml(const std::string& filepath) const {
+ YAML::Emitter out;
+ out << YAML::BeginMap;
+
+ out << YAML::Key << "id" << YAML::Value << id;
+ out << YAML::Key << "model" << YAML::Value << model;
+ out << YAML::Key << "name" << YAML::Value << name;
+ out << YAML::Key << "version" << YAML::Value << version;
+
+ // Inference Parameters
+ out << YAML::Key << "load_model" << YAML::Value << YAML::BeginMap;
+ out << YAML::Key << "method" << YAML::Value << load_model.method;
+ out << YAML::Key << "path" << YAML::Value << load_model.path;
+ out << YAML::Key << "transform_request" << YAML::Value
+ << load_model.transform_request;
+ out << YAML::Key << "transform_response" << YAML::Value
+ << load_model.transform_response;
+ out << YAML::EndMap;
+
+ out << YAML::Key << "destroy" << YAML::Value << YAML::BeginMap;
+ out << YAML::Key << "method" << YAML::Value << destroy.method;
+ out << YAML::Key << "path" << YAML::Value << destroy.path;
+ out << YAML::EndMap;
+
+ out << YAML::Key << "inference" << YAML::Value << YAML::BeginMap;
+ out << YAML::Key << "method" << YAML::Value << inference.method;
+ out << YAML::Key << "path" << YAML::Value << inference.path;
+ out << YAML::EndMap;
+
+ out << YAML::Key << "extra_endpoints" << YAML::Value << YAML::BeginSeq;
+ for (const auto& endpoint : extra_endpoints) {
+ out << YAML::BeginMap;
+ out << YAML::Key << "method" << YAML::Value << endpoint.method;
+ out << YAML::Key << "path" << YAML::Value << endpoint.path;
+ out << YAML::EndMap;
+ }
+ out << YAML::EndSeq;
+
+ // Model Load Parameters
+ out << YAML::Key << "port" << YAML::Value << port;
+ out << YAML::Key << "script" << YAML::Value << script;
+ out << YAML::Key << "log_path" << YAML::Value << log_path;
+ out << YAML::Key << "log_level" << YAML::Value << log_level;
+ out << YAML::Key << "environment" << YAML::Value << environment;
+
+ // Serialize command as YAML list
+ out << YAML::Key << "command" << YAML::Value << YAML::BeginSeq;
+ for (const auto& cmd : command) {
+ out << cmd;
+ }
+ out << YAML::EndSeq;
+
+ // Serialize files as YAML list
+ out << YAML::Key << "files" << YAML::Value << YAML::BeginSeq;
+ for (const auto& file : files) {
+ out << file;
+ }
+ out << YAML::EndSeq;
+
+ // Serialize command as YAML list
+ out << YAML::Key << "depends" << YAML::Value << YAML::BeginSeq;
+ for (const auto& depend : depends) {
+ out << depend;
+ }
+ out << YAML::EndSeq;
+
+ out << YAML::Key << "engine" << YAML::Value << engine;
+
+ // Serialize extra_params as YAML
+ out << YAML::Key << "extra_params" << YAML::Value << YAML::BeginMap;
+ for (Json::ValueConstIterator iter = extra_params.begin();
+ iter != extra_params.end(); ++iter) {
+ out << YAML::Key << iter.key().asString() << YAML::Value
+ << iter->asString();
+ }
+ out << YAML::EndMap;
+
+ std::ofstream fout(filepath);
+ if (!fout.is_open()) {
+ throw std::runtime_error("Failed to open file for writing: " + filepath);
+ }
+ fout << out.c_str();
+ }
+
+ // Method to populate struct from YAML file
+ void ReadFromYaml(const std::string& filePath) {
+ YAML::Node config = YAML::LoadFile(filePath);
+
+ if (config["id"])
+ id = config["id"].as();
+ if (config["model"])
+ model = config["model"].as();
+ if (config["name"])
+ name = config["name"].as();
+ if (config["version"])
+ version = config["version"].as();
+
+ // Inference Parameters
+
+ auto ip = config;
+ if (ip["load_model"]) {
+ load_model.method = ip["load_model"]["method"].as();
+ load_model.path = ip["load_model"]["path"].as();
+ load_model.transform_request =
+ ip["load_model"]["transform_request"].as();
+ load_model.transform_response =
+ ip["load_model"]["transform_response"].as();
+ }
+ if (ip["destroy"]) {
+ destroy.method = ip["destroy"]["method"].as();
+ destroy.path = ip["destroy"]["path"].as();
+ }
+ if (ip["inference"]) {
+ inference.method = ip["inference"]["method"].as();
+ inference.path = ip["inference"]["path"].as();
+ }
+ if (ip["extra_endpoints"] && ip["extra_endpoints"].IsSequence()) {
+ for (const auto& endpoint : ip["extra_endpoints"]) {
+ Endpoint e;
+ e.method = endpoint["method"].as();
+ e.path = endpoint["path"].as();
+ extra_endpoints.push_back(e);
+ }
+ }
+
+ // Model Load Parameters
+
+ auto mlp = config;
+ if (mlp["port"])
+ port = mlp["port"].as();
+ if (mlp["script"])
+ script = mlp["script"].as();
+ if (mlp["log_path"])
+ log_path = mlp["log_path"].as();
+ if (mlp["log_level"])
+ log_level = mlp["log_level"].as();
+ if (mlp["environment"])
+ environment = mlp["environment"].as();
+ if (mlp["engine"])
+ engine = mlp["engine"].as();
+
+ if (mlp["command"] && mlp["command"].IsSequence()) {
+ for (const auto& cmd : mlp["command"]) {
+ command.push_back(cmd.as());
+ }
+ }
+
+ if (mlp["files"] && mlp["files"].IsSequence()) {
+ for (const auto& file : mlp["files"]) {
+ files.push_back(file.as());
+ }
+ }
+
+ if (mlp["depends"] && mlp["depends"].IsSequence()) {
+ for (const auto& depend : mlp["depends"]) {
+ depends.push_back(depend.as());
+ }
+ }
+
+ if (mlp["extra_params"]) {
+ for (YAML::const_iterator it = mlp["extra_params"].begin();
+ it != mlp["extra_params"].end(); ++it) {
+ extra_params[it->first.as()] =
+ it->second.as();
+ }
+ }
+ }
+
+ // Method to convert the struct to JSON
+ Json::Value ToJson() const {
+ Json::Value root;
+
+ root["id"] = id;
+ root["model"] = model;
+ root["name"] = name;
+ root["version"] = version;
+
+ // Inference Parameters
+ root["load_model"]["method"] = load_model.method;
+ root["load_model"]["path"] = load_model.path;
+ root["load_model"]["transform_request"] = load_model.transform_request;
+ root["load_model"]["transform_response"] = load_model.transform_response;
+
+ root["destroy"]["method"] = destroy.method;
+ root["destroy"]["path"] = destroy.path;
+
+ root["inference"]["method"] = inference.method;
+ root["inference"]["path"] = inference.path;
+
+ for (const auto& endpoint : extra_endpoints) {
+ Json::Value e;
+ e["method"] = endpoint.method;
+ e["path"] = endpoint.path;
+ root["extra_endpoints"].append(e);
+ }
+
+ // Model Load Parameters
+ root["port"] = port;
+ root["log_path"] = log_path;
+ root["log_level"] = log_level;
+ root["environment"] = environment;
+ root["script"] = script;
+
+ // Serialize command as JSON array
+ for (const auto& cmd : command) {
+ root["command"].append(cmd);
+ }
+
+ for (const auto& file : files) {
+ root["files"].append(file);
+ }
+
+ for (const auto& depend : depends) {
+ root["depends"].append(depend);
+ }
+
+ root["engine"] = engine;
+ root["extra_params"] = extra_params; // Serialize the JSON value directly
+
+ return root;
+ }
+
+ // Method to populate struct from JSON
+ void FromJson(const Json::Value& root) {
+
+ if (root.isMember("id"))
+ id = root["id"].asString();
+ if (root.isMember("model"))
+ model = root["model"].asString();
+ if (root.isMember("name"))
+ name = root["name"].asString();
+ if (root.isMember("version"))
+ version = root["version"].asInt();
+
+ // Inference Parameters
+
+ const Json::Value& ip = root;
+ if (ip.isMember("load_model")) {
+ load_model.method = ip["load_model"]["method"].asString();
+ load_model.path = ip["load_model"]["path"].asString();
+ load_model.transform_request =
+ ip["load_model"]["transform_request"].asString();
+ load_model.transform_response =
+ ip["load_model"]["transform_response"].asString();
+ }
+ if (ip.isMember("destroy")) {
+ destroy.method = ip["destroy"]["method"].asString();
+ destroy.path = ip["destroy"]["path"].asString();
+ }
+ if (ip.isMember("inference")) {
+ inference.method = ip["inference"]["method"].asString();
+ inference.path = ip["inference"]["path"].asString();
+ }
+ if (ip.isMember("extra_endpoints")) {
+ for (const auto& endpoint : ip["extra_endpoints"]) {
+ Endpoint e;
+ e.method = endpoint["method"].asString();
+ e.path = endpoint["path"].asString();
+ extra_endpoints.push_back(e);
+ }
+ }
+
+ // Model Load Parameters
+
+ const Json::Value& mlp = root;
+ if (mlp.isMember("port"))
+ port = mlp["port"].asString();
+ if (mlp.isMember("log_path"))
+ log_path = mlp["log_path"].asString();
+ if (mlp.isMember("log_level"))
+ log_level = mlp["log_level"].asString();
+ if (mlp.isMember("environment"))
+ environment = mlp["environment"].asString();
+ if (mlp.isMember("engine"))
+ engine = mlp["engine"].asString();
+ if (mlp.isMember("script"))
+ script = mlp["script"].asString();
+
+ if (mlp.isMember("command")) {
+ for (const auto& cmd : mlp["command"]) {
+ command.push_back(cmd.asString());
+ }
+ }
+
+ if (mlp.isMember("files")) {
+ for (const auto& file : mlp["files"]) {
+ files.push_back(file.asString());
+ }
+ }
+
+ if (mlp.isMember("depends")) {
+ for (const auto& depend : mlp["depends"]) {
+ depends.push_back(depend.asString());
+ }
+ }
+
+ if (mlp.isMember("extra_params")) {
+ extra_params = mlp["extra_params"]; // Directly assign the JSON value
+ }
+ }
+};
+
} // namespace config
diff --git a/engine/config/remote_template.h b/engine/config/remote_template.h
deleted file mode 100644
index 8a17aaa9a..000000000
--- a/engine/config/remote_template.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#include
-
-namespace config {
-const std::string kOpenAITransformReqTemplate =
- R"({ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} })";
-const std::string kOpenAITransformRespTemplate =
- R"({ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == "id" or key == "choices" or key == "created" or key == "model" or key == "service_tier" or key == "system_fingerprint" or key == "object" or key == "usage" -%} {%- if not first -%},{%- endif -%} "{{ key }}": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} })";
-const std::string kAnthropicTransformReqTemplate =
- R"({
- {% for key, value in input_request %}
- {% if key == "messages" %}
- {% if input_request.messages.0.role == "system" %}
- "system": "{{ input_request.messages.0.content }}",
- "messages": [
- {% for message in input_request.messages %}
- {% if not loop.is_first %}
- {"role": "{{ message.role }}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %}
- {% endif %}
- {% endfor %}
- ]
- {% else %}
- "messages": [
- {% for message in input_request.messages %}
- {"role": " {{ message.role}}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %}
- {% endfor %}
- ]
- {% endif %}
- {% else if key == "system" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %}
- "{{ key }}": {{ tojson(value) }}
- {% endif %}
- {% if not loop.is_last %},{% endif %}
- {% endfor %} })";
-const std::string kAnthropicTransformRespTemplate = R"({
- "id": "{{ input_request.id }}",
- "created": null,
- "object": "chat.completion",
- "model": "{{ input_request.model }}",
- "choices": [
- {
- "index": 0,
- "message": {
- "role": "{{ input_request.role }}",
- "content": "{% if input_request.content and input_request.content.0.type == "text" %} {{input_request.content.0.text}} {% endif %}",
- "refusal": null
- },
- "logprobs": null,
- "finish_reason": "{{ input_request.stop_reason }}"
- }
- ],
- "usage": {
- "prompt_tokens": {{ input_request.usage.input_tokens }},
- "completion_tokens": {{ input_request.usage.output_tokens }},
- "total_tokens": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }},
- "prompt_tokens_details": {
- "cached_tokens": 0
- },
- "completion_tokens_details": {
- "reasoning_tokens": 0,
- "accepted_prediction_tokens": 0,
- "rejected_prediction_tokens": 0
- }
- },
- "system_fingerprint": "fp_6b68a8204b"
- })";
-
-} // namespace config
\ No newline at end of file
diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc
index bbe7f430c..57b2b3ecb 100644
--- a/engine/config/yaml_config.cc
+++ b/engine/config/yaml_config.cc
@@ -119,6 +119,8 @@ void YamlHandler::ModelConfigFromYaml() {
tmp.ctx_len = yaml_node_["ctx_len"].as();
if (yaml_node_["n_parallel"])
tmp.n_parallel = yaml_node_["n_parallel"].as();
+ if (yaml_node_["cpu_threads"])
+ tmp.cpu_threads = yaml_node_["cpu_threads"].as();
if (yaml_node_["tp"])
tmp.tp = yaml_node_["tp"].as();
if (yaml_node_["stream"])
@@ -224,6 +226,8 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
yaml_node_["ctx_len"] = model_config_.ctx_len;
if (!std::isnan(static_cast(model_config_.n_parallel)))
yaml_node_["n_parallel"] = model_config_.n_parallel;
+ if (!std::isnan(static_cast(model_config_.cpu_threads)))
+ yaml_node_["cpu_threads"] = model_config_.cpu_threads;
if (!std::isnan(static_cast(model_config_.tp)))
yaml_node_["tp"] = model_config_.tp;
if (!std::isnan(static_cast(model_config_.stream)))
@@ -283,110 +287,112 @@ void YamlHandler::UpdateModelConfig(ModelConfig new_model_config) {
// Method to write all attributes to a YAML file
void YamlHandler::WriteYamlFile(const std::string& file_path) const {
try {
- std::ofstream outFile(file_path);
- if (!outFile) {
+ std::ofstream out_file(file_path);
+ if (!out_file) {
throw std::runtime_error("Failed to open output file.");
}
// Write GENERAL GGUF METADATA
- outFile << "# BEGIN GENERAL GGUF METADATA\n";
- outFile << format_utils::writeKeyValue(
+ out_file << "# BEGIN GENERAL GGUF METADATA\n";
+ out_file << format_utils::WriteKeyValue(
"id", yaml_node_["id"],
"Model ID unique between models (author / quantization)");
- outFile << format_utils::writeKeyValue(
+ out_file << format_utils::WriteKeyValue(
"model", yaml_node_["model"],
"Model ID which is used for request construct - should be "
"unique between models (author / quantization)");
- outFile << format_utils::writeKeyValue("name", yaml_node_["name"],
+ out_file << format_utils::WriteKeyValue("name", yaml_node_["name"],
"metadata.general.name");
if (yaml_node_["version"]) {
- outFile << "version: " << yaml_node_["version"].as() << "\n";
+ out_file << "version: " << yaml_node_["version"].as() << "\n";
}
if (yaml_node_["files"] && yaml_node_["files"].size()) {
- outFile << "files: # Can be relative OR absolute local file "
+ out_file << "files: # Can be relative OR absolute local file "
"path\n";
for (const auto& source : yaml_node_["files"]) {
- outFile << " - " << source << "\n";
+ out_file << " - " << source << "\n";
}
}
- outFile << "# END GENERAL GGUF METADATA\n";
- outFile << "\n";
+ out_file << "# END GENERAL GGUF METADATA\n";
+ out_file << "\n";
// Write INFERENCE PARAMETERS
- outFile << "# BEGIN INFERENCE PARAMETERS\n";
- outFile << "# BEGIN REQUIRED\n";
+ out_file << "# BEGIN INFERENCE PARAMETERS\n";
+ out_file << "# BEGIN REQUIRED\n";
if (yaml_node_["stop"] && yaml_node_["stop"].size()) {
- outFile << "stop: # tokenizer.ggml.eos_token_id\n";
+ out_file << "stop: # tokenizer.ggml.eos_token_id\n";
for (const auto& stop : yaml_node_["stop"]) {
- outFile << " - " << stop << "\n";
+ out_file << " - " << stop << "\n";
}
}
- outFile << "# END REQUIRED\n";
- outFile << "\n";
- outFile << "# BEGIN OPTIONAL\n";
- outFile << format_utils::writeKeyValue("size", yaml_node_["size"]);
- outFile << format_utils::writeKeyValue("stream", yaml_node_["stream"],
+ out_file << "# END REQUIRED\n";
+ out_file << "\n";
+ out_file << "# BEGIN OPTIONAL\n";
+ out_file << format_utils::WriteKeyValue("size", yaml_node_["size"]);
+ out_file << format_utils::WriteKeyValue("stream", yaml_node_["stream"],
"Default true?");
- outFile << format_utils::writeKeyValue("top_p", yaml_node_["top_p"],
+ out_file << format_utils::WriteKeyValue("top_p", yaml_node_["top_p"],
"Ranges: 0 to 1");
- outFile << format_utils::writeKeyValue(
+ out_file << format_utils::WriteKeyValue(
"temperature", yaml_node_["temperature"], "Ranges: 0 to 1");
- outFile << format_utils::writeKeyValue(
+ out_file << format_utils::WriteKeyValue(
"frequency_penalty", yaml_node_["frequency_penalty"], "Ranges: 0 to 1");
- outFile << format_utils::writeKeyValue(
+ out_file << format_utils::WriteKeyValue(
"presence_penalty", yaml_node_["presence_penalty"], "Ranges: 0 to 1");
- outFile << format_utils::writeKeyValue(
+ out_file << format_utils::WriteKeyValue(
"max_tokens", yaml_node_["max_tokens"],
"Should be default to context length");
- outFile << format_utils::writeKeyValue("seed", yaml_node_["seed"]);
- outFile << format_utils::writeKeyValue("dynatemp_range",
+ out_file << format_utils::WriteKeyValue("seed", yaml_node_["seed"]);
+ out_file << format_utils::WriteKeyValue("dynatemp_range",
yaml_node_["dynatemp_range"]);
- outFile << format_utils::writeKeyValue("dynatemp_exponent",
+ out_file << format_utils::WriteKeyValue("dynatemp_exponent",
yaml_node_["dynatemp_exponent"]);
- outFile << format_utils::writeKeyValue("top_k", yaml_node_["top_k"]);
- outFile << format_utils::writeKeyValue("min_p", yaml_node_["min_p"]);
- outFile << format_utils::writeKeyValue("tfs_z", yaml_node_["tfs_z"]);
- outFile << format_utils::writeKeyValue("typ_p", yaml_node_["typ_p"]);
- outFile << format_utils::writeKeyValue("repeat_last_n",
+ out_file << format_utils::WriteKeyValue("top_k", yaml_node_["top_k"]);
+ out_file << format_utils::WriteKeyValue("min_p", yaml_node_["min_p"]);
+ out_file << format_utils::WriteKeyValue("tfs_z", yaml_node_["tfs_z"]);
+ out_file << format_utils::WriteKeyValue("typ_p", yaml_node_["typ_p"]);
+ out_file << format_utils::WriteKeyValue("repeat_last_n",
yaml_node_["repeat_last_n"]);
- outFile << format_utils::writeKeyValue("repeat_penalty",
+ out_file << format_utils::WriteKeyValue("repeat_penalty",
yaml_node_["repeat_penalty"]);
- outFile << format_utils::writeKeyValue("mirostat", yaml_node_["mirostat"]);
- outFile << format_utils::writeKeyValue("mirostat_tau",
+ out_file << format_utils::WriteKeyValue("mirostat", yaml_node_["mirostat"]);
+ out_file << format_utils::WriteKeyValue("mirostat_tau",
yaml_node_["mirostat_tau"]);
- outFile << format_utils::writeKeyValue("mirostat_eta",
+ out_file << format_utils::WriteKeyValue("mirostat_eta",
yaml_node_["mirostat_eta"]);
- outFile << format_utils::writeKeyValue("penalize_nl",
+ out_file << format_utils::WriteKeyValue("penalize_nl",
yaml_node_["penalize_nl"]);
- outFile << format_utils::writeKeyValue("ignore_eos",
+ out_file << format_utils::WriteKeyValue("ignore_eos",
yaml_node_["ignore_eos"]);
- outFile << format_utils::writeKeyValue("n_probs", yaml_node_["n_probs"]);
- outFile << format_utils::writeKeyValue("min_keep", yaml_node_["min_keep"]);
- outFile << format_utils::writeKeyValue("grammar", yaml_node_["grammar"]);
- outFile << "# END OPTIONAL\n";
- outFile << "# END INFERENCE PARAMETERS\n";
- outFile << "\n";
+ out_file << format_utils::WriteKeyValue("n_probs", yaml_node_["n_probs"]);
+ out_file << format_utils::WriteKeyValue("min_keep", yaml_node_["min_keep"]);
+ out_file << format_utils::WriteKeyValue("grammar", yaml_node_["grammar"]);
+ out_file << "# END OPTIONAL\n";
+ out_file << "# END INFERENCE PARAMETERS\n";
+ out_file << "\n";
// Write MODEL LOAD PARAMETERS
- outFile << "# BEGIN MODEL LOAD PARAMETERS\n";
- outFile << "# BEGIN REQUIRED\n";
- outFile << format_utils::writeKeyValue("engine", yaml_node_["engine"],
+ out_file << "# BEGIN MODEL LOAD PARAMETERS\n";
+ out_file << "# BEGIN REQUIRED\n";
+ out_file << format_utils::WriteKeyValue("engine", yaml_node_["engine"],
"engine to run model");
- outFile << "prompt_template:";
- outFile << " " << yaml_node_["prompt_template"] << "\n";
- outFile << "# END REQUIRED\n";
- outFile << "\n";
- outFile << "# BEGIN OPTIONAL\n";
- outFile << format_utils::writeKeyValue(
+ out_file << "prompt_template:";
+ out_file << " " << yaml_node_["prompt_template"] << "\n";
+ out_file << "# END REQUIRED\n";
+ out_file << "\n";
+ out_file << "# BEGIN OPTIONAL\n";
+ out_file << format_utils::WriteKeyValue(
"ctx_len", yaml_node_["ctx_len"],
"llama.context_length | 0 or undefined = loaded from model");
- outFile << format_utils::writeKeyValue("n_parallel",
+ out_file << format_utils::WriteKeyValue("n_parallel",
yaml_node_["n_parallel"]);
- outFile << format_utils::writeKeyValue("ngl", yaml_node_["ngl"],
+ out_file << format_utils::WriteKeyValue("cpu_threads",
+ yaml_node_["cpu_threads"]);
+ out_file << format_utils::WriteKeyValue("ngl", yaml_node_["ngl"],
"Undefined = loaded from model");
- outFile << "# END OPTIONAL\n";
- outFile << "# END MODEL LOAD PARAMETERS\n";
+ out_file << "# END OPTIONAL\n";
+ out_file << "# END MODEL LOAD PARAMETERS\n";
- outFile.close();
+ out_file.close();
} catch (const std::exception& e) {
std::cerr << "Error writing to file: " << e.what() << std::endl;
throw;
diff --git a/engine/controllers/assistants.cc b/engine/controllers/assistants.cc
index 405d7ed3c..530e180a5 100644
--- a/engine/controllers/assistants.cc
+++ b/engine/controllers/assistants.cc
@@ -1,4 +1,6 @@
#include "assistants.h"
+#include "common/api-dto/delete_success_response.h"
+#include "common/dto/assistant_create_dto.h"
#include "utils/cortex_utils.h"
#include "utils/logging_utils.h"
@@ -6,7 +8,12 @@ void Assistants::RetrieveAssistant(
const HttpRequestPtr& req,
std::function&& callback,
const std::string& assistant_id) const {
- CTL_INF("RetrieveAssistant: " + assistant_id);
+ const auto& headers = req->headers();
+ auto it = headers.find(kOpenAiAssistantKeyV2);
+ if (it != headers.end() && it->second == kOpenAiAssistantValueV2) {
+ return RetrieveAssistantV2(req, std::move(callback), assistant_id);
+ }
+
auto res = assistant_service_->RetrieveAssistant(assistant_id);
if (res.has_error()) {
Json::Value ret;
@@ -33,6 +40,78 @@ void Assistants::RetrieveAssistant(
}
}
+void Assistants::RetrieveAssistantV2(
+ const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id) const {
+ auto res = assistant_service_->RetrieveAssistantV2(assistant_id);
+
+ if (res.has_error()) {
+ Json::Value ret;
+ ret["message"] = res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ } else {
+ auto to_json_res = res->ToJson();
+ if (to_json_res.has_error()) {
+ CTL_ERR("Failed to convert assistant to json: " + to_json_res.error());
+ Json::Value ret;
+ ret["message"] = to_json_res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ } else {
+ // TODO: namh need to use the text response because it contains model config
+ auto resp =
+ cortex_utils::CreateCortexHttpJsonResponse(res->ToJson().value());
+ resp->setStatusCode(k200OK);
+ callback(resp);
+ }
+ }
+}
+
+void Assistants::CreateAssistantV2(
+ const HttpRequestPtr& req,
+ std::function&& callback) {
+ auto json_body = req->getJsonObject();
+ if (json_body == nullptr) {
+ Json::Value ret;
+ ret["message"] = "Request body can't be empty";
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto dto = dto::CreateAssistantDto::FromJson(std::move(*json_body));
+ CTL_INF("CreateAssistantV2: " << dto.model);
+ auto validate_res = dto.Validate();
+ if (validate_res.has_error()) {
+ Json::Value ret;
+ ret["message"] = validate_res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto res = assistant_service_->CreateAssistantV2(dto);
+ if (res.has_error()) {
+ Json::Value ret;
+ ret["message"] = res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto to_json_res = res->ToJson();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(to_json_res.value());
+ resp->setStatusCode(k200OK);
+ callback(resp);
+}
+
void Assistants::CreateAssistant(
const HttpRequestPtr& req,
std::function&& callback,
@@ -88,10 +167,55 @@ void Assistants::CreateAssistant(
callback(resp);
}
+void Assistants::ModifyAssistantV2(
+ const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id) {
+ auto json_body = req->getJsonObject();
+ if (json_body == nullptr) {
+ Json::Value ret;
+ ret["message"] = "Request body can't be empty";
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto dto = dto::UpdateAssistantDto::FromJson(std::move(*json_body));
+ auto validate_res = dto.Validate();
+ if (validate_res.has_error()) {
+ Json::Value ret;
+ ret["message"] = validate_res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto res = assistant_service_->ModifyAssistantV2(assistant_id, dto);
+ if (res.has_error()) {
+ Json::Value ret;
+ ret["message"] = res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(res->ToJson().value());
+ resp->setStatusCode(k200OK);
+ callback(resp);
+}
+
void Assistants::ModifyAssistant(
const HttpRequestPtr& req,
std::function&& callback,
const std::string& assistant_id) {
+ const auto& headers = req->headers();
+ auto it = headers.find(kOpenAiAssistantKeyV2);
+ if (it != headers.end() && it->second == kOpenAiAssistantValueV2) {
+ return ModifyAssistantV2(req, std::move(callback), assistant_id);
+ }
auto json_body = req->getJsonObject();
if (json_body == nullptr) {
Json::Value ret;
@@ -142,3 +266,62 @@ void Assistants::ModifyAssistant(
resp->setStatusCode(k200OK);
callback(resp);
}
+
+void Assistants::ListAssistants(
+ const HttpRequestPtr& req,
+ std::function&& callback,
+ std::optional limit, std::optional order,
+ std::optional after, std::optional before) const {
+
+ auto res = assistant_service_->ListAssistants(
+ std::stoi(limit.value_or("20")), order.value_or("desc"),
+ after.value_or(""), before.value_or(""));
+ if (res.has_error()) {
+ Json::Value root;
+ root["message"] = res.error();
+ auto response = cortex_utils::CreateCortexHttpJsonResponse(root);
+ response->setStatusCode(k400BadRequest);
+ callback(response);
+ return;
+ }
+
+ Json::Value assistant_list(Json::arrayValue);
+ for (auto& msg : res.value()) {
+ if (auto it = msg.ToJson(); it.has_value()) {
+ assistant_list.append(it.value());
+ } else {
+ CTL_WRN("Failed to convert message to json: " + it.error());
+ }
+ }
+
+ Json::Value root;
+ root["object"] = "list";
+ root["data"] = assistant_list;
+ auto response = cortex_utils::CreateCortexHttpJsonResponse(root);
+ response->setStatusCode(k200OK);
+ callback(response);
+}
+
+void Assistants::DeleteAssistant(
+ const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id) {
+ auto res = assistant_service_->DeleteAssistantV2(assistant_id);
+ if (res.has_error()) {
+ Json::Value ret;
+ ret["message"] = res.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ api_response::DeleteSuccessResponse response;
+ response.id = assistant_id;
+ response.object = "assistant.deleted";
+ response.deleted = true;
+ auto resp =
+ cortex_utils::CreateCortexHttpJsonResponse(response.ToJson().value());
+ resp->setStatusCode(k200OK);
+ callback(resp);
+}
diff --git a/engine/controllers/assistants.h b/engine/controllers/assistants.h
index 94ddd14b1..30111bb01 100644
--- a/engine/controllers/assistants.h
+++ b/engine/controllers/assistants.h
@@ -7,33 +7,72 @@
using namespace drogon;
class Assistants : public drogon::HttpController {
+ constexpr static auto kOpenAiAssistantKeyV2 = "openai-beta";
+ constexpr static auto kOpenAiAssistantValueV2 = "assistants=v2";
+
public:
METHOD_LIST_BEGIN
+ ADD_METHOD_TO(
+ Assistants::ListAssistants,
+ "/v1/"
+ "assistants?limit={limit}&order={order}&after={after}&before={before}",
+ Get);
+
+ ADD_METHOD_TO(Assistants::DeleteAssistant, "/v1/assistants/{assistant_id}",
+ Options, Delete);
+
ADD_METHOD_TO(Assistants::RetrieveAssistant, "/v1/assistants/{assistant_id}",
Get);
ADD_METHOD_TO(Assistants::CreateAssistant, "/v1/assistants/{assistant_id}",
Options, Post);
+ ADD_METHOD_TO(Assistants::CreateAssistantV2, "/v1/assistants", Options, Post);
+
ADD_METHOD_TO(Assistants::ModifyAssistant, "/v1/assistants/{assistant_id}",
Options, Patch);
+
METHOD_LIST_END
explicit Assistants(std::shared_ptr assistant_srv)
: assistant_service_{assistant_srv} {};
+ void ListAssistants(const HttpRequestPtr& req,
+ std::function&& callback,
+ std::optional limit,
+ std::optional order,
+ std::optional after,
+ std::optional before) const;
+
void RetrieveAssistant(const HttpRequestPtr& req,
std::function&& callback,
const std::string& assistant_id) const;
+ void RetrieveAssistantV2(
+ const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id) const;
+
+ void DeleteAssistant(const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id);
+
void CreateAssistant(const HttpRequestPtr& req,
std::function&& callback,
const std::string& assistant_id);
+ void CreateAssistantV2(
+ const HttpRequestPtr& req,
+ std::function&& callback);
+
void ModifyAssistant(const HttpRequestPtr& req,
std::function&& callback,
const std::string& assistant_id);
+ void ModifyAssistantV2(const HttpRequestPtr& req,
+ std::function&& callback,
+ const std::string& assistant_id);
+
private:
std::shared_ptr assistant_service_;
};
diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc
index a92d6805f..8cf98785e 100644
--- a/engine/controllers/engines.cc
+++ b/engine/controllers/engines.cc
@@ -3,7 +3,9 @@
#include "utils/archive_utils.h"
#include "utils/cortex_utils.h"
#include "utils/engine_constants.h"
+#include "utils/http_util.h"
#include "utils/logging_utils.h"
+#include "utils/scope_exit.h"
#include "utils/string_utils.h"
namespace {
@@ -129,7 +131,8 @@ void Engines::GetEngineReleases(
void Engines::GetEngineVariants(
const HttpRequestPtr& req,
std::function&& callback,
- const std::string& engine, const std::string& version) const {
+ const std::string& engine, const std::string& version,
+ std::optional show) const {
if (engine.empty()) {
Json::Value res;
res["message"] = "Engine name is required";
@@ -140,7 +143,18 @@ void Engines::GetEngineVariants(
return;
}
- auto result = engine_service_->GetEngineVariants(engine, version);
+ auto show_value = show.value_or("all");
+ if (show_value != "all" && show_value != "compatible") {
+ Json::Value res;
+ res["message"] = "Invalid show value. Can either be `all` or `compatible`";
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+ resp->setStatusCode(k400BadRequest);
+ callback(resp);
+ return;
+ }
+
+ auto result = engine_service_->GetEngineVariants(engine, version,
+ show_value == "compatible");
auto normalize_version = string_utils::RemoveSubstring(version, "v");
Json::Value releases(Json::arrayValue);
@@ -173,21 +187,58 @@ void Engines::InstallEngine(
norm_version = version;
}
- if ((req->getJsonObject()) &&
- (*(req->getJsonObject())).get("type", "").asString() == "remote") {
- auto type = (*(req->getJsonObject())).get("type", "").asString();
- auto api_key = (*(req->getJsonObject())).get("api_key", "").asString();
- auto url = (*(req->getJsonObject())).get("url", "").asString();
+ auto result =
+ engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
+ if (result.has_error()) {
+ Json::Value res;
+ res["message"] = result.error();
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+ resp->setStatusCode(k400BadRequest);
+ CTL_INF("Error: " << result.error());
+ callback(resp);
+ } else {
+ Json::Value res;
+ res["message"] = "Engine starts installing!";
+ auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
+ resp->setStatusCode(k200OK);
+ CTL_INF("Engine starts installing!");
+ callback(resp);
+ }
+}
+
+void Engines::InstallRemoteEngine(
+ const HttpRequestPtr& req,
+ std::function&& callback) {
+ if (!http_util::HasFieldInReq(req, callback, "engine")) {
+ return;
+ }
+ std::optional norm_variant = std::nullopt;
+ std::string norm_version{"latest"};
+
+ if (req->getJsonObject() != nullptr) {
+ auto variant = (*(req->getJsonObject())).get("variant", "").asString();
+ auto version =
+ (*(req->getJsonObject())).get("version", "latest").asString();
+
+ if (!variant.empty()) {
+ norm_variant = variant;
+ }
+ norm_version = version;
+ }
+
+ std::string engine;
+ if (auto o = req->getJsonObject(); o) {
+ engine = (*o).get("engine", "").asString();
+ auto type = (*o).get("type", "").asString();
+ auto api_key = (*o).get("api_key", "").asString();
+ auto url = (*o).get("url", "").asString();
auto variant = norm_variant.value_or("all-platforms");
- auto status = (*(req->getJsonObject())).get("status", "Default").asString();
+ auto status = (*o).get("status", "Default").asString();
std::string metadata;
- if ((*(req->getJsonObject())).isMember("metadata") &&
- (*(req->getJsonObject()))["metadata"].isObject()) {
- metadata = (*(req->getJsonObject()))
- .get("metadata", Json::Value(Json::objectValue))
- .toStyledString();
- } else if ((*(req->getJsonObject())).isMember("metadata") &&
- !(*(req->getJsonObject()))["metadata"].isObject()) {
+ if ((*o).isMember("metadata") && (*o)["metadata"].isObject()) {
+ metadata =
+ (*o).get("metadata", Json::Value(Json::objectValue)).toStyledString();
+ } else if ((*o).isMember("metadata") && !(*o)["metadata"].isObject()) {
Json::Value res;
res["message"] = "metadata must be object";
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
@@ -196,8 +247,7 @@ void Engines::InstallEngine(
return;
}
- auto get_models_url = (*(req->getJsonObject()))
- .get("metadata", Json::Value(Json::objectValue))
+ auto get_models_url = (*o).get("metadata", Json::Value(Json::objectValue))
.get("get_models_url", "")
.asString();
@@ -250,25 +300,6 @@ void Engines::InstallEngine(
resp->setStatusCode(k200OK);
callback(resp);
}
- return;
- }
-
- auto result =
- engine_service_->InstallEngineAsync(engine, norm_version, norm_variant);
- if (result.has_error()) {
- Json::Value res;
- res["message"] = result.error();
- auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
- resp->setStatusCode(k400BadRequest);
- CTL_INF("Error: " << result.error());
- callback(resp);
- } else {
- Json::Value res;
- res["message"] = "Engine starts installing!";
- auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
- resp->setStatusCode(k200OK);
- CTL_INF("Engine starts installing!");
- callback(resp);
}
}
@@ -276,6 +307,24 @@ void Engines::GetInstalledEngineVariants(
const HttpRequestPtr& req,
std::function