From ae34bc24d7aba747b4459bc93d38f71cdf5d2967 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 11 Jul 2024 16:58:41 -0700 Subject: [PATCH] remove ray for launching api server --- tests/async_engine/test_openapi_server_ray.py | 14 ++------------ tests/distributed/test_pipeline_parallel.py | 6 ++---- tests/entrypoints/openai/test_chat.py | 14 ++------------ tests/entrypoints/openai/test_completion.py | 14 ++------------ tests/entrypoints/openai/test_embedding.py | 12 ++---------- tests/entrypoints/openai/test_models.py | 14 ++------------ tests/entrypoints/openai/test_vision.py | 10 +--------- tests/tensorizer_loader/test_tensorizer.py | 7 +------ 8 files changed, 14 insertions(+), 77 deletions(-) diff --git a/tests/async_engine/test_openapi_server_ray.py b/tests/async_engine/test_openapi_server_ray.py index cc05d79e56874..26d4e6a359644 100644 --- a/tests/async_engine/test_openapi_server_ray.py +++ b/tests/async_engine/test_openapi_server_ray.py @@ -1,24 +1,14 @@ import openai # use the official client for correctness check import pytest -# using Ray for overall ease of process management, parallel requests, -# and debugging. -import ray -from ..utils import VLLM_PATH, RemoteOpenAIServer +from ..utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "facebook/opt-125m" @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def server(ray_ctx): +def server(): return RemoteOpenAIServer([ "--model", MODEL_NAME, diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 4dd3de9167758..1014174f5d2bf 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -2,11 +2,8 @@ import openai # use the official client for correctness check import pytest -# using Ray for overall ease of process management, parallel requests, -# and debugging. -import ray -from ..utils import VLLM_PATH, RemoteOpenAIServer +from ..utils import RemoteOpenAIServer # downloading lora to test lora requests @@ -19,6 +16,7 @@ pytestmark = pytest.mark.asyncio + @pytest.fixture(scope="module") def server(): args = [ diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 3e80214f24dc5..f901031778b59 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -6,15 +6,12 @@ import jsonschema import openai # use the official client for correctness check import pytest -# using Ray for overall ease of process management, parallel requests, -# and debugging. -import ray import torch # downloading lora to test lora requests from huggingface_hub import snapshot_download from openai import BadRequestError -from ...utils import VLLM_PATH, RemoteOpenAIServer +from ...utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" @@ -76,14 +73,7 @@ def zephyr_lora_files(): @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def server(zephyr_lora_files, ray_ctx): +def server(zephyr_lora_files): return RemoteOpenAIServer([ "--model", MODEL_NAME, diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py index 52a848b7831d5..40195ae3009f8 100644 --- a/tests/entrypoints/openai/test_completion.py +++ b/tests/entrypoints/openai/test_completion.py @@ -6,9 +6,6 @@ import jsonschema import openai # use the official client for correctness check import pytest -# using Ray for overall ease of process management, parallel requests, -# and debugging. -import ray import requests # downloading lora to test lora requests from huggingface_hub import snapshot_download @@ -16,7 +13,7 @@ from vllm.transformers_utils.tokenizer import get_tokenizer -from ...utils import VLLM_PATH, RemoteOpenAIServer +from ...utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" @@ -78,14 +75,7 @@ def zephyr_lora_files(): @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def server(zephyr_lora_files, ray_ctx): +def server(zephyr_lora_files): return RemoteOpenAIServer([ "--model", MODEL_NAME, diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index f8aa1c9143a3b..8865d32d417ad 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -3,22 +3,14 @@ import numpy as np import openai import pytest -import ray -from ...utils import VLLM_PATH, RemoteOpenAIServer +from ...utils import RemoteOpenAIServer EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct" @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def embedding_server(ray_ctx): +def embedding_server(): return RemoteOpenAIServer([ "--model", EMBEDDING_MODEL_NAME, diff --git a/tests/entrypoints/openai/test_models.py b/tests/entrypoints/openai/test_models.py index 914ef6e19e109..da147756ac61f 100644 --- a/tests/entrypoints/openai/test_models.py +++ b/tests/entrypoints/openai/test_models.py @@ -1,12 +1,9 @@ import openai # use the official client for correctness check import pytest -# using Ray for overall ease of process management, parallel requests, -# and debugging. -import ray # downloading lora to test lora requests from huggingface_hub import snapshot_download -from ...utils import VLLM_PATH, RemoteOpenAIServer +from ...utils import RemoteOpenAIServer # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" @@ -21,14 +18,7 @@ def zephyr_lora_files(): @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def server(zephyr_lora_files, ray_ctx): +def server(zephyr_lora_files): return RemoteOpenAIServer([ "--model", MODEL_NAME, diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index b869717608d0f..ed47bd7094b60 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -3,7 +3,6 @@ import openai import pytest import pytest_asyncio -import ray from vllm.multimodal.utils import ImageFetchAiohttp, encode_image_base64 @@ -23,14 +22,7 @@ @pytest.fixture(scope="module") -def ray_ctx(): - ray.init(runtime_env={"working_dir": VLLM_PATH}) - yield - ray.shutdown() - - -@pytest.fixture(scope="module") -def server(ray_ctx): +def server(): return RemoteOpenAIServer([ "--model", MODEL_NAME, diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/tensorizer_loader/test_tensorizer.py index b2ebcc15cd0fc..3e4ec0e116bc1 100644 --- a/tests/tensorizer_loader/test_tensorizer.py +++ b/tests/tensorizer_loader/test_tensorizer.py @@ -6,7 +6,6 @@ import openai import pytest -import ray import torch from tensorizer import EncryptionParams @@ -22,7 +21,7 @@ tensorize_vllm_model) from ..conftest import VllmRunner, cleanup -from ..utils import VLLM_PATH, RemoteOpenAIServer +from ..utils import RemoteOpenAIServer # yapf conflicts with isort for this docstring @@ -220,8 +219,6 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path): json.dumps(model_loader_extra_config), ] - ray.init(runtime_env={"working_dir": VLLM_PATH}) - server = RemoteOpenAIServer(openai_args) print("Server ready.") @@ -282,7 +279,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner, base_model.model.llm_engine.model_executor.shutdown() del base_model cleanup() - ray.shutdown() # load model with two shards and serialize with encryption model_path = str(tmp_path / (model_ref + "-%02d.tensors")) @@ -305,7 +301,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner, assert os.path.isfile(model_path % 0), "Serialization subprocess failed" assert os.path.isfile(model_path % 1), "Serialization subprocess failed" cleanup() - ray.shutdown() loaded_vllm_model = vllm_runner( model_ref,