Skip to content

Commit 29241ca

Browse files
committed
Merge remote-tracking branch 'upstream/main' into upstream_merge_2025_04_29
2 parents 2c68ff9 + 4c33d67 commit 29241ca

File tree

209 files changed

+3943
-1561
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

209 files changed

+3943
-1561
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ steps:
301301
parallelism: 4
302302

303303
- label: PyTorch Compilation Unit Tests
304+
torch_nightly: true
304305
source_file_dependencies:
305306
- vllm/
306307
- tests/compile
@@ -310,6 +311,7 @@ steps:
310311
- pytest -v -s compile/test_sequence_parallelism.py
311312

312313
- label: PyTorch Fullgraph Smoke Test # 9min
314+
torch_nightly: true
313315
source_file_dependencies:
314316
- vllm/
315317
- tests/compile
@@ -320,6 +322,7 @@ steps:
320322
- pytest -v -s compile/piecewise/test_toy_llama.py
321323

322324
- label: PyTorch Fullgraph Test # 18min
325+
torch_nightly: true
323326
source_file_dependencies:
324327
- vllm/
325328
- tests/compile
@@ -401,12 +404,13 @@ steps:
401404
commands:
402405
- pytest -v -s benchmarks/
403406

404-
- label: Quantization Test # 33min
407+
- label: Quantization Test
405408
source_file_dependencies:
406409
- csrc/
407410
- vllm/model_executor/layers/quantization
408411
- tests/quantization
409-
command: VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
412+
commands:
413+
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
410414

411415
- label: LM Eval Small Models # 53min
412416
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
@@ -447,88 +451,85 @@ steps:
447451
##### models test #####
448452

449453
- label: Basic Models Test # 24min
454+
torch_nightly: true
450455
source_file_dependencies:
451456
- vllm/
452457
- tests/models
453458
commands:
454459
- pytest -v -s models/test_transformers.py
455460
- pytest -v -s models/test_registry.py
461+
- pytest -v -s models/test_utils.py
462+
- pytest -v -s models/test_vision.py
456463
# V1 Test: https://github.com/vllm-project/vllm/issues/14531
457464
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
458465
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
459466
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
460467

461-
- label: Language Models Test (Standard) # 32min
468+
- label: Language Models Test (Standard)
462469
#mirror_hardwares: [amd]
463470
source_file_dependencies:
464471
- vllm/
465-
- tests/models/decoder_only/language
466-
- tests/models/embedding/language
467-
- tests/models/encoder_decoder/language
472+
- tests/models/language
468473
commands:
469474
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
470475
- pip install 'git+https://github.com/Dao-AILab/[email protected]'
471-
- pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
472-
- pytest -v -s models/embedding/language -m core_model
476+
- pytest -v -s models/language -m core_model
473477

474-
- label: Language Models Test (Extended) # 1h10min
478+
- label: Language Models Test (Extended)
475479
optional: true
476480
source_file_dependencies:
477481
- vllm/
478-
- tests/models/decoder_only/language
479-
- tests/models/embedding/language
480-
- tests/models/encoder_decoder/language
482+
- tests/models/language
481483
commands:
482484
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
483-
- pip install causal-conv1d
484-
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
485-
- pytest -v -s models/embedding/language -m 'not core_model'
485+
- pip install 'git+https://github.com/Dao-AILab/[email protected]'
486+
- pytest -v -s models/language -m 'not core_model'
486487

487-
- label: Multi-Modal Models Test (Standard) # 40min
488+
- label: Multi-Modal Models Test (Standard)
488489
#mirror_hardwares: [amd]
489490
source_file_dependencies:
490491
- vllm/
491-
- tests/models/decoder_only/audio_language
492-
- tests/models/decoder_only/vision_language
493-
- tests/models/embedding/vision_language
494-
- tests/models/encoder_decoder/audio_language
495-
- tests/models/encoder_decoder/vision_language
492+
- tests/models/multimodal
496493
commands:
497494
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
498-
- pytest -v -s models/multimodal
499-
- pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
500-
- pytest -v -s models/decoder_only/vision_language -m 'core_model or quant_model'
501-
- pytest -v -s models/embedding/vision_language -m core_model
502-
- pytest -v -s models/encoder_decoder/audio_language -m core_model
503-
- pytest -v -s models/encoder_decoder/language -m core_model
504-
- pytest -v -s models/encoder_decoder/vision_language -m core_model
505-
- pytest -v -s models/decoder_only/vision_language/test_interleaved.py
506-
507-
- label: Multi-Modal Models Test (Extended) 1 # 48m
495+
- pytest -v -s models/multimodal/processing
496+
- pytest -v -s --ignore models/multimodal/generation/test_whisper.py models/multimodal -m core_model
497+
- cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
498+
499+
- label: Multi-Modal Models Test (Extended) 1
508500
optional: true
509501
source_file_dependencies:
510502
- vllm/
511-
- tests/models/decoder_only/audio_language
512-
- tests/models/decoder_only/vision_language
513-
- tests/models/embedding/vision_language
514-
- tests/models/encoder_decoder/vision_language
503+
- tests/models/multimodal
515504
commands:
516505
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
517-
- pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
518-
- pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model'
519-
- pytest -v -s --ignore models/decoder_only/vision_language/test_models.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
520-
- pytest -v -s models/embedding/vision_language -m 'not core_model'
521-
- pytest -v -s models/encoder_decoder/language -m 'not core_model'
522-
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'
523-
524-
- label: Multi-Modal Models Test (Extended) 2 # 38m
506+
- pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model'
507+
508+
- label: Multi-Modal Models Test (Extended) 2
525509
optional: true
526510
source_file_dependencies:
527511
- vllm/
528-
- tests/models/decoder_only/vision_language
512+
- tests/models/multimodal
529513
commands:
530514
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
531-
- pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=1) and not core_model and not quant_model'
515+
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
516+
517+
- label: Multi-Modal Models Test (Extended) 3
518+
optional: true
519+
source_file_dependencies:
520+
- vllm/
521+
- tests/models/multimodal
522+
commands:
523+
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
524+
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
525+
526+
- label: Quantized Models Test
527+
#mirror_hardwares: [amd]
528+
source_file_dependencies:
529+
- vllm/model_executor/layers/quantization
530+
- tests/models/quantization
531+
commands:
532+
- pytest -v -s models/quantization
532533

533534
# This test is used only in PR development phase to test individual models and should never run on main
534535
- label: Custom Models Test
@@ -598,9 +599,8 @@ steps:
598599
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
599600
# Avoid importing model tests that cause CUDA reinitialization error
600601
- pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
601-
- pytest models/encoder_decoder/language/test_bart.py -v -s -m 'distributed(num_gpus=2)'
602-
- pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)'
603-
- pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)'
602+
- pytest models/language -v -s -m 'distributed(num_gpus=2)'
603+
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)'
604604
# test sequence parallel
605605
- pytest -v -s distributed/test_sequence_parallel.py
606606
# this test fails consistently.

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ set(VLLM_EXT_SRC
242242
"csrc/quantization/fp8/common.cu"
243243
"csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
244244
"csrc/quantization/gguf/gguf_kernel.cu"
245+
"csrc/quantization/activation_kernels.cu"
245246
"csrc/cuda_utils_kernels.cu"
246247
"csrc/prepare_inputs/advance_step.cu"
247248
"csrc/custom_all_reduce.cu"

0 commit comments

Comments
 (0)