diff --git a/vllm-benchmarks/cron.sh b/vllm-benchmarks/cron.sh index 5e5721f..08ec2e7 100755 --- a/vllm-benchmarks/cron.sh +++ b/vllm-benchmarks/cron.sh @@ -27,14 +27,14 @@ run() { NOT_EXIST=0 - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/benchmark_results.json" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${COMMIT}/${GPU_DEVICE}/benchmark_results.json" aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 if [[ ${NOT_EXIST:-0} == "0" ]]; then echo "${COMMIT}" > commit echo "Mark ${COMMIT} as the latest commit that has been benchmarked on main" - S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit" + S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit" aws s3 cp commit "s3://ossci-benchmarks/${S3_PATH}" fi } @@ -47,18 +47,30 @@ run_benchmarks() { rm commit || true # Get the last green commit from S3 - S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/commit" - aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" . - LAST_GREEN_COMMIT=$(cat commit) + S3_PATH="last-green-commits/vllm-project/vllm/${HEAD_BRANCH}/${GPU_DEVICE}/commit" + aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 + + if [[ ${NOT_EXIST:-0} == "0" ]]; then + aws s3 cp "s3://ossci-benchmarks/${S3_PATH}" . + LAST_GREEN_COMMIT=$(cat commit) - if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then - echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked" + if [[ "${LAST_GREEN_COMMIT}" == "${HEAD_SHA}" ]]; then + echo "Skip ${HEAD_BRANCH}/${HEAD_SHA} because all older commits have already been benchmarked" + else + COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT}) + echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done + fi else - COMMITS=$(python get_commits.py --repo vllm --from-commit ${LAST_GREEN_COMMIT}) - echo "${COMMITS}" | while IFS= read -r COMMIT ; do run ${COMMIT} ; done + run "${HEAD_SHA}" fi } +if command -v nvidia-smi; then + declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') +elif command -v amd-smi; then + declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') +fi + while : do pull_vllm diff --git a/vllm-benchmarks/run.sh b/vllm-benchmarks/run.sh index 680e187..6dd2980 100755 --- a/vllm-benchmarks/run.sh +++ b/vllm-benchmarks/run.sh @@ -12,9 +12,6 @@ cleanup() { if [[ "${CLEANUP_BENCHMARK_RESULTS:-1}" == "1" ]]; then rm -rf vllm/benchmarks/results fi - - # https://github.com/vllm-project/vllm/issues/13392 - rm -rf ~/.cache/vllm/torch_compile_cache } setup_vllm() { @@ -43,8 +40,15 @@ build_vllm() { # TODO (huydhn) I'll setup remote cache for this later SCCACHE_CACHE_SIZE=100G sccache --start-server || true # Build and install vLLM - pip install -r requirements-build.txt - pip install --editable . + if command -v nvidia-smi; then + pip install -r requirements/build.txt + pip install --editable . + elif command -v amd-smi; then + pip install -r requirements/rocm.txt + pip install -r requirements/rocm-build.txt + # https://docs.vllm.ai/en/latest/getting_started/installation/gpu/index.html?device=rocm + PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py develop + fi popd } @@ -65,19 +69,22 @@ run_benchmark() { upload_results() { if [[ "${UPLOAD_BENCHMARK_RESULTS:-1}" == "1" ]]; then # Upload the benchmark results - python upload_benchmark_results.py --vllm vllm --benchmark-results vllm/benchmarks/results + python upload_benchmark_results.py \ + --vllm vllm \ + --benchmark-results vllm/benchmarks/results \ + --device "${GPU_DEVICE}" pushd vllm if [[ -f benchmarks/results/benchmark_results.md ]]; then # Upload the markdown file - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.md" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.md" aws s3 cp --acl public-read \ benchmarks/results/benchmark_results.md "s3://ossci-benchmarks/${S3_PATH}" fi if [[ -f benchmarks.log ]]; then # Upload the logs - S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmarks.log" + S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmarks.log" aws s3 cp --acl public-read \ benchmarks.log "s3://ossci-benchmarks/${S3_PATH}" fi @@ -99,7 +106,13 @@ pushd vllm export HEAD_BRANCH=main export HEAD_SHA=$(git rev-parse --verify HEAD) -S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/benchmark_results.json" +if command -v nvidia-smi; then + declare -g GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') +elif command -v amd-smi; then + declare -g GPU_DEVICE=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}') +fi + +S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results.json" aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 if [[ ${NOT_EXIST:-0} == "0" && "${OVERWRITE_BENCHMARK_RESULTS:-0}" != "1" ]]; then diff --git a/vllm-benchmarks/upload_benchmark_results.py b/vllm-benchmarks/upload_benchmark_results.py index 0c4ab22..0f05b40 100755 --- a/vllm-benchmarks/upload_benchmark_results.py +++ b/vllm-benchmarks/upload_benchmark_results.py @@ -61,6 +61,12 @@ def parse_args() -> Any: default="ossci-benchmarks", help="the S3 bucket to upload the benchmark results", ) + parser.add_argument( + "--device", + type=str, + required=True, + help="the name of the GPU device coming from nvidia-smi or amd-smi", + ) parser.add_argument( "--dry-run", action="store_true", @@ -94,10 +100,13 @@ def get_benchmark_metadata(head_branch: str, head_sha: str) -> Dict[str, Any]: def get_runner_info() -> Dict[str, Any]: + if torch.cuda.is_available() and torch.version.hip: + name = "rocm" + elif torch.cuda.is_available() and torch.version.cuda: + name = "cuda" + return { - # TODO (huydhn): Figure out a better way to set the name here without - # hard coding it to cuda - "name": "cuda", + "name": name, "type": torch.cuda.get_device_name(), "cpu_info": platform.processor(), "cpu_count": psutil.cpu_count(), @@ -155,9 +164,10 @@ def upload_to_s3( head_branch: str, head_sha: str, aggregated_results: List[Dict[str, Any]], + device: str, dry_run: bool = True, ) -> None: - s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/benchmark_results.json" + s3_path = f"v3/{REPO}/{head_branch}/{head_sha}/{device}/benchmark_results.json" info(f"Upload benchmark results to s3://{s3_bucket}/{s3_path}") if not dry_run: # Write in JSONEachRow format @@ -184,7 +194,12 @@ def main() -> None: # Extract and aggregate the benchmark results aggregated_results = aggregate(metadata, runner, load(args.benchmark_results)) upload_to_s3( - args.s3_bucket, head_branch, head_sha, aggregated_results, args.dry_run + args.s3_bucket, + head_branch, + head_sha, + aggregated_results, + args.device, + args.dry_run, )