Skip to content

Commit

Permalink
Fixing the working directories for all CI tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexei-V-Ivanov-AMD committed Jan 16, 2025
1 parent 5c36cb8 commit 1092866
Showing 1 changed file with 43 additions and 16 deletions.
59 changes: 43 additions & 16 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ steps:
##### fast check tests #####

- label: Documentation Build # 2min
working_dir: "/vllm-workspace/test_docs/docs"
working_dir: "/app/vllm/test_docs/docs"
fast_check: true
no_gpu: True
commands:
Expand All @@ -41,6 +41,7 @@ steps:
- grep \"sig sig-object py\" build/html/api/inference_params.html

- label: Async Engine, Inputs, Utils, Worker Test # 24min
working_dir: "/app/vllm/tests"
fast_check: true
source_file_dependencies:
- vllm/
Expand All @@ -63,6 +64,7 @@ steps:
- pytest -v -s worker # Worker

- label: Python-only Installation Test
working_dir: "/app/vllm/tests"
source_file_dependencies:
- tests/standalone_tests/python_only_compile.sh
- setup.py
Expand All @@ -83,6 +85,7 @@ steps:
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

- label: Chunked Prefill Test
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/
- tests/basic_correctness/test_chunked_prefill
Expand All @@ -91,6 +94,7 @@ steps:
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py

- label: Core Test # 10min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
fast_check: true
source_file_dependencies:
Expand All @@ -101,7 +105,7 @@ steps:
- pytest -v -s core

- label: Entrypoints Test # 40min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
fast_check: true
mirror_hardwares: [amd]
source_file_dependencies:
Expand All @@ -117,7 +121,7 @@ steps:
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests

- label: Distributed Tests (4 GPUs) # 10min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 4
fast_check: true
source_file_dependencies:
Expand All @@ -133,6 +137,7 @@ steps:
- pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py

- label: Metrics, Tracing Test # 10min
working_dir: "/app/vllm/tests"
num_gpus: 2
fast_check: true
source_file_dependencies:
Expand All @@ -152,16 +157,17 @@ steps:
##### 1 GPU test #####

- label: Regression Test # 5min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/test_regression
commands:
- pip install modelscope
- pytest -v -s test_regression.py
working_dir: "/vllm-workspace/tests" # optional

- label: Engine Test # 10min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
Expand All @@ -173,6 +179,7 @@ steps:
- pytest -v -s tokenization

- label: V1 Test
working_dir: "/app/vllm/tests"
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
Expand All @@ -181,7 +188,7 @@ steps:
- VLLM_USE_V1=1 pytest -v -s v1

- label: Examples Test # 25min
working_dir: "/vllm-workspace/examples"
working_dir: "/app/vllm/examples"
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/entrypoints
Expand All @@ -203,6 +210,7 @@ steps:
- python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2

- label: Prefix Caching Test # 9min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
Expand All @@ -211,6 +219,7 @@ steps:
- pytest -v -s prefix_caching

- label: Samplers Test # 36min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/model_executor/layers
- vllm/sampling_metadata.py
Expand All @@ -221,6 +230,7 @@ steps:
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers

- label: LogitsProcessor Test # 5min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- vllm/model_executor/layers
Expand All @@ -232,6 +242,7 @@ steps:
- pytest -v -s model_executor/test_guided_processors.py

- label: Speculative decoding tests # 40min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/spec_decode
- tests/spec_decode
Expand All @@ -242,6 +253,7 @@ steps:
- pytest -v -s spec_decode/e2e/test_eagle_correctness.py

- label: LoRA Test %N # 15min each
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- vllm/lora
Expand All @@ -261,13 +273,15 @@ steps:
- pytest -v -s compile/piecewise/test_toy_llama.py

- label: "PyTorch Fullgraph Test" # 18min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/
- tests/compile
commands:
- pytest -v -s compile/test_full_graph.py

- label: Kernels Test %N # 1h each
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
source_file_dependencies:
- csrc/
Expand All @@ -278,6 +292,7 @@ steps:
parallelism: 4

- label: Tensorizer Test # 11min
working_dir: "/app/vllm/tests"
mirror_hardwares: [amd]
soft_fail: true
source_file_dependencies:
Expand All @@ -289,22 +304,23 @@ steps:
- pytest -v -s tensorizer_loader

- label: Benchmarks # 9min
working_dir: "/vllm-workspace/.buildkite"
working_dir: "/app/vllm/.buildkite"
mirror_hardwares: [amd]
source_file_dependencies:
- benchmarks/
commands:
- bash run-benchmarks.sh

- label: Quantization Test # 33min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
- tests/quantization
command: VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization

- label: LM Eval Small Models # 53min
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
working_dir: "/app/vllm/.buildkite/lm-eval-harness"
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
Expand All @@ -313,13 +329,15 @@ steps:
- bash ./run-tests.sh -c configs/models-small.txt -t 1

- label: Encoder Decoder tests # 5min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/
- tests/encoder_decoder
commands:
- pytest -v -s encoder_decoder

- label: OpenAI-Compatible Tool Use # 20 min
working_dir: "/app/vllm/tests"
fast_check: false
mirror_hardwares: [ amd ]
source_file_dependencies:
Expand All @@ -331,6 +349,7 @@ steps:
##### models test #####

- label: Basic Models Test # 24min
working_dir: "/app/vllm/tests"
source_file_dependencies:
- vllm/
- tests/models
Expand All @@ -339,6 +358,7 @@ steps:
- pytest -v -s models/test_initialization.py

- label: Language Models Test (Standard) # 32min
working_dir: "/app/vllm/tests"
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
Expand All @@ -350,6 +370,7 @@ steps:
- pytest -v -s models/embedding/language -m core_model

- label: Language Models Test (Extended) # 1h10min
working_dir: "/app/vllm/tests"
optional: true
source_file_dependencies:
- vllm/
Expand All @@ -361,6 +382,7 @@ steps:
- pytest -v -s models/embedding/language -m 'not core_model'

- label: Multi-Modal Models Test (Standard) # 40min
working_dir: "/app/vllm/tests"
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
Expand All @@ -380,6 +402,7 @@ steps:
- pytest -v -s models/encoder_decoder/vision_language -m core_model

- label: Multi-Modal Models Test (Extended) 1 # 48m
working_dir: "/app/vllm/tests"
optional: true
source_file_dependencies:
- vllm/
Expand All @@ -400,6 +423,7 @@ steps:
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'

- label: Multi-Modal Models Test (Extended) 2 # 38m
working_dir: "/app/vllm/tests"
optional: true
source_file_dependencies:
- vllm/
Expand All @@ -410,6 +434,7 @@ steps:

# This test is used only in PR development phase to test individual models and should never run on main
- label: Custom Models Test
working_dir: "/app/vllm/tests"
optional: true
commands:
- echo 'Testing custom models...'
Expand All @@ -421,7 +446,7 @@ steps:
##### multi gpus test #####

- label: Distributed Comm Ops Test # 7min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
source_file_dependencies:
- vllm/distributed
Expand All @@ -431,7 +456,7 @@ steps:
- pytest -v -s distributed/test_shm_broadcast.py

- label: 2 Node Tests (4 GPUs in total) # 16min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
num_nodes: 2
source_file_dependencies:
Expand All @@ -450,7 +475,7 @@ steps:

- label: Distributed Tests (2 GPUs) # 40min
#mirror_hardwares: [amd]
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
source_file_dependencies:
- vllm/distributed/
Expand All @@ -476,7 +501,7 @@ steps:
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/disagg_test.py

- label: Plugin Tests (2 GPUs) # 40min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
fast_check: true
source_file_dependencies:
Expand All @@ -495,7 +520,7 @@ steps:
- pytest -v -s models/test_oot_registration.py # it needs a clean process

- label: Multi-step Tests (4 GPUs) # 36min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 4
source_file_dependencies:
- vllm/model_executor/layers/sampler.py
Expand All @@ -513,7 +538,7 @@ steps:
- pytest -v -s multi_step/test_correctness_llm.py

- label: Pipeline Parallelism Test # 45min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 4
source_file_dependencies:
- vllm/distributed/
Expand All @@ -526,6 +551,7 @@ steps:
- pytest -v -s distributed/test_pipeline_parallel.py

- label: LoRA TP Test (Distributed)
working_dir: "/app/vllm/tests"
num_gpus: 4
source_file_dependencies:
- vllm/lora
Expand All @@ -544,7 +570,7 @@ steps:


- label: Weight Loading Multiple GPU Test # 33min
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
source_file_dependencies:
- vllm/
Expand All @@ -553,7 +579,7 @@ steps:
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt

- label: Weight Loading Multiple GPU Test - Large Models # optional
working_dir: "/vllm-workspace/tests"
working_dir: "/app/vllm/tests"
num_gpus: 2
gpu: a100
optional: true
Expand All @@ -568,6 +594,7 @@ steps:
##### A100 test #####

- label: Distributed Tests (A100) # optional
working_dir: "/app/vllm/tests"
gpu: a100
optional: true
num_gpus: 4
Expand All @@ -585,7 +612,7 @@ steps:
gpu: a100
optional: true
num_gpus: 4
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
working_dir: "/app/vllm/.buildkite/lm-eval-harness"
source_file_dependencies:
- csrc/
- vllm/model_executor/layers/quantization
Expand Down

0 comments on commit 1092866

Please sign in to comment.