Skip to content

Commit

Permalink
remove ray for launching api server
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao committed Jul 11, 2024
1 parent 196b94b commit ae34bc2
Show file tree
Hide file tree
Showing 8 changed files with 14 additions and 77 deletions.
14 changes: 2 additions & 12 deletions tests/async_engine/test_openapi_server_ray.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,14 @@
import openai # use the official client for correctness check
import pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray

from ..utils import VLLM_PATH, RemoteOpenAIServer
from ..utils import RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "facebook/opt-125m"


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(ray_ctx):
def server():
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
Expand Down
6 changes: 2 additions & 4 deletions tests/distributed/test_pipeline_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@

import openai # use the official client for correctness check
import pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray

from ..utils import VLLM_PATH, RemoteOpenAIServer
from ..utils import RemoteOpenAIServer

# downloading lora to test lora requests

Expand All @@ -19,6 +16,7 @@

pytestmark = pytest.mark.asyncio


@pytest.fixture(scope="module")
def server():
args = [
Expand Down
14 changes: 2 additions & 12 deletions tests/entrypoints/openai/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,12 @@
import jsonschema
import openai # use the official client for correctness check
import pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray
import torch
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from openai import BadRequestError

from ...utils import VLLM_PATH, RemoteOpenAIServer
from ...utils import RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
Expand Down Expand Up @@ -76,14 +73,7 @@ def zephyr_lora_files():


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(zephyr_lora_files, ray_ctx):
def server(zephyr_lora_files):
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
Expand Down
14 changes: 2 additions & 12 deletions tests/entrypoints/openai/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
import jsonschema
import openai # use the official client for correctness check
import pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray
import requests
# downloading lora to test lora requests
from huggingface_hub import snapshot_download
from openai import BadRequestError

from vllm.transformers_utils.tokenizer import get_tokenizer

from ...utils import VLLM_PATH, RemoteOpenAIServer
from ...utils import RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
Expand Down Expand Up @@ -78,14 +75,7 @@ def zephyr_lora_files():


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(zephyr_lora_files, ray_ctx):
def server(zephyr_lora_files):
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
Expand Down
12 changes: 2 additions & 10 deletions tests/entrypoints/openai/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,14 @@
import numpy as np
import openai
import pytest
import ray

from ...utils import VLLM_PATH, RemoteOpenAIServer
from ...utils import RemoteOpenAIServer

EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def embedding_server(ray_ctx):
def embedding_server():
return RemoteOpenAIServer([
"--model",
EMBEDDING_MODEL_NAME,
Expand Down
14 changes: 2 additions & 12 deletions tests/entrypoints/openai/test_models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import openai # use the official client for correctness check
import pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import ray
# downloading lora to test lora requests
from huggingface_hub import snapshot_download

from ...utils import VLLM_PATH, RemoteOpenAIServer
from ...utils import RemoteOpenAIServer

# any model with a chat template should work here
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
Expand All @@ -21,14 +18,7 @@ def zephyr_lora_files():


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(zephyr_lora_files, ray_ctx):
def server(zephyr_lora_files):
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
Expand Down
10 changes: 1 addition & 9 deletions tests/entrypoints/openai/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import openai
import pytest
import pytest_asyncio
import ray

from vllm.multimodal.utils import ImageFetchAiohttp, encode_image_base64

Expand All @@ -23,14 +22,7 @@


@pytest.fixture(scope="module")
def ray_ctx():
ray.init(runtime_env={"working_dir": VLLM_PATH})
yield
ray.shutdown()


@pytest.fixture(scope="module")
def server(ray_ctx):
def server():
return RemoteOpenAIServer([
"--model",
MODEL_NAME,
Expand Down
7 changes: 1 addition & 6 deletions tests/tensorizer_loader/test_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import openai
import pytest
import ray
import torch
from tensorizer import EncryptionParams

Expand All @@ -22,7 +21,7 @@
tensorize_vllm_model)

from ..conftest import VllmRunner, cleanup
from ..utils import VLLM_PATH, RemoteOpenAIServer
from ..utils import RemoteOpenAIServer

# yapf conflicts with isort for this docstring

Expand Down Expand Up @@ -220,8 +219,6 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
json.dumps(model_loader_extra_config),
]

ray.init(runtime_env={"working_dir": VLLM_PATH})

server = RemoteOpenAIServer(openai_args)
print("Server ready.")

Expand Down Expand Up @@ -282,7 +279,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
base_model.model.llm_engine.model_executor.shutdown()
del base_model
cleanup()
ray.shutdown()

# load model with two shards and serialize with encryption
model_path = str(tmp_path / (model_ref + "-%02d.tensors"))
Expand All @@ -305,7 +301,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
assert os.path.isfile(model_path % 0), "Serialization subprocess failed"
assert os.path.isfile(model_path % 1), "Serialization subprocess failed"
cleanup()
ray.shutdown()

loaded_vllm_model = vllm_runner(
model_ref,
Expand Down

0 comments on commit ae34bc2

Please sign in to comment.