@@ -9,31 +9,31 @@ CORE_RANGE=${CORE_RANGE:-48-95}
9
9
NUMA_NODE=${NUMA_NODE:- 1}
10
10
11
11
# Try building the docker image
12
- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build -t cpu-test -f Dockerfile.cpu .
13
- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" -t cpu-test-avx2 -f Dockerfile.cpu .
12
+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build -t cpu-test- " $BUILDKITE_BUILD_NUMBER " -f Dockerfile.cpu .
13
+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" -t cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2 -f Dockerfile.cpu .
14
14
15
15
# Setup cleanup
16
- remove_docker_container () { docker rm -f cpu-test-" $NUMA_NODE " cpu-test-avx2-" $NUMA_NODE " || true ; }
16
+ remove_docker_container () { docker rm -f cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" cpu-test- " $BUILDKITE_BUILD_NUMBER " -avx2-" $NUMA_NODE " || true ; }
17
17
trap remove_docker_container EXIT
18
18
remove_docker_container
19
19
20
20
# Run the image, setting --shm-size=4g for tensor parallel.
21
21
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
22
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $NUMA_NODE " cpu-test
22
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" cpu-test- " $BUILDKITE_BUILD_NUMBER "
23
23
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
24
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2-" $NUMA_NODE " cpu-test-avx2
24
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2-" $NUMA_NODE " cpu-test- " $BUILDKITE_BUILD_NUMBER " -avx2
25
25
26
26
function cpu_tests() {
27
27
set -e
28
28
export NUMA_NODE=$2
29
29
30
30
# offline inference
31
- docker exec cpu-test-avx2-" $NUMA_NODE " bash -c "
31
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2-" $NUMA_NODE " bash -c "
32
32
set -e
33
33
python3 examples/offline_inference.py"
34
34
35
35
# Run basic model test
36
- docker exec cpu-test-" $NUMA_NODE " bash -c "
36
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
37
37
set -e
38
38
pip install pytest pytest-asyncio \
39
39
decord einops librosa peft Pillow sentence-transformers soundfile \
@@ -46,26 +46,26 @@ function cpu_tests() {
46
46
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
47
47
48
48
# Run compressed-tensor test
49
- docker exec cpu-test-" $NUMA_NODE " bash -c "
49
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
50
50
set -e
51
51
pytest -s -v \
52
52
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
53
53
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
54
54
55
55
# Run AWQ test
56
- docker exec cpu-test-" $NUMA_NODE " bash -c "
56
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
57
57
set -e
58
58
pytest -s -v \
59
59
tests/quantization/test_ipex_quant.py"
60
60
61
61
# Run chunked-prefill and prefix-cache test
62
- docker exec cpu-test-" $NUMA_NODE " bash -c "
62
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
63
63
set -e
64
64
pytest -s -v -k cpu_model \
65
65
tests/basic_correctness/test_chunked_prefill.py"
66
66
67
67
# online inference
68
- docker exec cpu-test-" $NUMA_NODE " bash -c "
68
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
69
69
set -e
70
70
export VLLM_CPU_KVCACHE_SPACE=10
71
71
export VLLM_CPU_OMP_THREADS_BIND=$1
0 commit comments