rebase onto latest changes

vllm-project · Jan 20, 2025 · 4121ba1 · 4121ba1
1 parent e2cc727
commit 4121ba1
Show file tree

Hide file tree

Showing 13 changed files with 111 additions and 137 deletions.
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -22,7 +22,7 @@ ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/li
 
 RUN echo 'ulimit -c 0' >> ~/.bashrc
 
-RUN pip install intel_extension_for_pytorch==2.5.0
+RUN pip install intel_extension_for_pytorch==2.5.0 # FIXME: why is this installed here?
 
 WORKDIR /workspace
 

diff --git a/Dockerfile.neuron b/Dockerfile.neuron
@@ -20,14 +20,14 @@ ARG APP_MOUNT=/workspace
 VOLUME [ ${APP_MOUNT} ]
 WORKDIR ${APP_MOUNT}/vllm
 
+COPY requirements-neuron.txt /app/vllm/
+COPY requirements-common.txt /app/vllm/
+
+# FIXME: needs some way to provide build dependencies
 ENV PIP_EXTRA_INDEX_URL=https://pip.repos.neuron.amazonaws.com
 RUN --mount=type=cache,target=/root/.cache/pip \
-    python3 -m pip install --upgrade pip && \
-    python3 -m pip install fastapi ninja tokenizers pandas && \
-    python3 -m pip install sentencepiece transformers==4.45.2 -U && \
-    python3 -m pip install transformers-neuronx -U && \
-    python3 -m pip install --pre neuronx-cc==2.16.345.0 -U
-    python3 -m pip install pytest -U
+    cd /app/vllm && \
+    python3 -m pip install --pre -U -r requirements-neuron.txt
 
 
 
@@ -36,18 +36,10 @@ ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
     if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
 
-RUN --mount=type=cache,target=/root/.cache/pip \
-    cd /app/vllm && \
-    python3 -m pip install -U -r requirements-neuron.txt -r requirements-build.txt
-
 ENV VLLM_TARGET_DEVICE=neuron
 RUN --mount=type=bind,source=.git,target=.git \
-    pip install --no-build-isolation -v -e .
-
-# install development dependencies (for testing)
-RUN python3 -m pip install -e tests/vllm_test_utils
-
-# overwrite entrypoint to run bash script
-RUN echo "import subprocess; import sys; subprocess.check_call(sys.argv[1:])" > /usr/local/bin/dockerd-entrypoint.py
+    cd /app/vllm \
+    && pip install --use-pep517 --no-build-isolation -v -e . \
+    && cd ..
 
 CMD ["/bin/bash"]
diff --git a/Dockerfile.rocm b/Dockerfile.rocm
@@ -45,14 +45,12 @@ RUN python3 -m pip install --upgrade pip
 # TODO: implement sccache support across components
 RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
 
-# Install torch == 2.6.0 on ROCm
 RUN --mount=type=cache,target=/root/.cache/pip \
     case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
         *"rocm-6.2"*) \
             python3 -m pip uninstall -y torch torchvision \
             && python3 -m pip install --pre \
                 torch \
-                'setuptools-scm>=8' \
                 torchvision \
                 --extra-index-url https://download.pytorch.org/whl/rocm6.2;; \
         *) ;; esac
@@ -141,7 +139,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 RUN --mount=type=cache,target=${CCACHE_DIR} \
     --mount=type=bind,source=.git,target=.git \
     --mount=type=cache,target=/root/.cache/pip \
-    VLLM_TARGET_DEVICE=rocm pip install -v --no-build-isolation -e .
+    VLLM_ROCM_VERSION=6.2 VLLM_TARGET_DEVICE=rocm \
+        pip install -v --no-build-isolation -e .
 
 # Copy amdsmi wheel into final image
 RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \

diff --git a/Dockerfile.tpu b/Dockerfile.tpu
@@ -20,15 +20,12 @@ ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
     if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi
 
-# Build vLLM.
+# FIXME: needs some way of providing build dependencies
+
+COPY . /workspace/vllm
 ENV VLLM_TARGET_DEVICE="tpu"
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=.git,target=.git \
-    python3 -m pip install \
-        -r requirements-tpu.txt
-RUN python3 setup.py develop
-
-# install development dependencies (for testing)
-RUN python3 -m pip install -e tests/vllm_test_utils
+    cd /workspace/vllm && pip install --no-build-isolation -v -e .
 
 CMD ["/bin/bash"]
diff --git a/_build_backend/vllm.py b/_build_backend/vllm.py
@@ -1,9 +1,10 @@
 import os
 from textwrap import dedent
 
-from setuptools import build_meta as build_meta_orig
 from setuptools.build_meta import *
 
+from shutil import which
+
 VLLM_TARGET_DEVICE = os.getenv(
     "VLLM_TARGET_DEVICE",
     "cuda",  # match the default value in vllm/envs.py
@@ -13,24 +14,30 @@
     "setuptools>=61",
     "setuptools-scm>=8",
 )
-BUILD_REQUIREMENTS_EXTENSIONS = (
-    "cmake>=3.26",
-    "ninja",
-    "packaging",
-    "wheel",
-)
+
+with open("requirements-build.txt") as fh:
+    BUILD_REQUIREMENTS_EXTENSIONS = [
+        line.split("#")[0].strip() for line in fh.readlines()
+        if not line.strip().startswith("#")
+    ]
+
+
+def _check_for_extra_index_url(expected_value):
+    if which("uv"):
+        _check_for_env_var("UV_EXTRA_INDEX_URL", expected_value)
+
+    _check_for_env_var("PIP_EXTRA_INDEX_URL", expected_value)
+
 
 def _check_for_env_var(key: str, expected_value: str):
-    """ Print a warning when the env var's value doesn't match the expected value. """ # noqa: E501
+    """ Print a warning when the env var's value doesn't match the expected value. """
     value = os.getenv(key)
     if value and value == expected_value:
         return
 
-    warning = (
-        f"{key} is not defined, but might be required for this build." # noqa: E501
-        if value is None
-        else f"{key} is set to {value}, but {expected_value} is suggested." # noqa: E501
-    )
+    warning = (f"{key} is not defined, but might be required for this build."
+               if value is None else
+               f"{key} is set to {value}, but {expected_value} is suggested.")
 
     msg = dedent(
         """
@@ -41,8 +48,7 @@ def _check_for_env_var(key: str, expected_value: str):
             {key}={suggested_value}
 
         in your environment before starting the build.
-        ***""",  # noqa: E501
-    )
+        ***""", )
 
     import warnings
 
@@ -51,83 +57,87 @@ def _check_for_env_var(key: str, expected_value: str):
         stacklevel=2,
     )
 
+
 def get_requires_for_build_wheel(  # type: ignore[no-redef]
-    config_settings=None,
-):
+        config_settings=None, ):
     requirements_extras = []
     if VLLM_TARGET_DEVICE == "cpu" or VLLM_TARGET_DEVICE == "openvino":
-        _check_for_env_var("PIP_EXTRA_INDEX_URL", expected_value="https://download.pytorch.org/whl/cpu")
-        requirements_extras.append("torch==2.4.0+cpu")
+        _check_for_extra_index_url("https://download.pytorch.org/whl/cpu")
+        requirements_extras.append("torch==2.5.1")
+        # TODO: add intel extension for pytorch?
     elif VLLM_TARGET_DEVICE == "cuda":
-        requirements_extras.append("torch==2.4.0")
+        requirements_extras.append("torch==2.5.1")
     elif VLLM_TARGET_DEVICE == "rocm":
-        # TODO: ? add support for multiple ROCM versions (5.2 and?)
-        rocm_supported_versions = ("6.2",)
-        requested_version = os.getenv("ROCM_VERSION")
-        if not requested_version:
-            raise RuntimeError(
-                "Set ROCM_VERSION env var. "
-                f"Supported versions={rocm_supported_versions}"
-            )
-        if requested_version not in rocm_supported_versions:
-            raise ValueError(
-                "Invalid ROCM_VERSION. "
-                f"Supported versions={rocm_supported_versions}"
-            )
-
-        _check_for_env_var("PIP_EXTRA_INDEX_URL", expected_value=f"https://download.pytorch.org/whl/nightly/rocm{requested_version}")
-        requirements_extras.extend(
-            [
-                "torch==2.6.0.dev20240918",
-                "torchvision==0.20.0.dev20240918",
-            ]
+        # TODO: ? add support for multiple ROCM versions (6.3?)
+        rocm_supported_versions = ("6.2", )
+        requested_rocm_version = os.getenv("VLLM_ROCM_VERSION")
+        if not requested_rocm_version:
+            raise RuntimeError("Set ROCM_VERSION env var. "
+                               f"Supported versions={rocm_supported_versions}")
+        if requested_rocm_version not in rocm_supported_versions:
+            raise ValueError("Invalid ROCM_VERSION. "
+                             f"Supported versions={rocm_supported_versions}")
+
+        _check_for_extra_index_url(
+            f"https://download.pytorch.org/whl/nightly/rocm{requested_rocm_version}"
         )
+        requirements_extras.extend([
+            f"torch==2.5.1+rocm{requested_rocm_version}"
+            f"torchvision==0.20.1+rocm${requested_rocm_version}",
+        ])
     elif VLLM_TARGET_DEVICE == "neuron":
-        _check_for_env_var("PIP_EXTRA_INDEX_URL", expected_value="https://pip.repos.neuron.amazonaws.com")
-        requirements_extras.append("torch-neuronx>=2.1.2")
-        requirements_extras.append("neuronx-cc==2.15.*")
-        # note
+        _check_for_extra_index_url(
+            expected_value="https://pip.repos.neuron.amazonaws.com")
+        requirements_extras.extend([
+            "torch-neuronx>=2.1.2",
+            "neuronx-cc==2.15.*",
+        ])
     elif VLLM_TARGET_DEVICE == "tpu":
         _check_for_env_var(
             "PIP_FIND_LINKS",
-            expected_value="https://storage.googleapis.com/libtpu-releases/index.html https://storage.googleapis.com/jax-releases/jax_nightly_releases.html https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html", # noqa: E501
-        )
-        requirements_extras.extend(
-            [
-                "torch==2.5.0",
-                "torch_xla[tpu,pallas]",
-            ]
+            expected_value=
+            "https://storage.googleapis.com/libtpu-releases/index.html https://storage.googleapis.com/jax-releases/jax_nightly_releases.html https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html",
         )
+        torch_xla_base = "https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/"
+        requirements_extras.extend([
+            "torch==2.6.0.dev20241126+cpu",  # has to match torch version in `requirements-tpu.txt`
+            "torch_xla[tpu,pallas]",
+        ])
+        for python_version in ("3.11", "3.10", "3.9"):
+            pyv = python_version.replace(".", '')
+            torch_xla_version = "torch_xla-2.6.0.dev20241126"  # has to match torch version in `requirements-tpu.txt`
+            req_str = f"torch_xla[tpu] @ {torch_xla_base}/{torch_xla_version}-cp{pyv}-cp{pyv}-linux_x86_64.whl ; python_version == \"{python_version}\""
+            requirements_extras.append(req_str)
     elif VLLM_TARGET_DEVICE == "xpu":
-        _check_for_env_var("PIP_EXTRA_INDEX_URL", expected_value="https://pytorch-extension.intel.com/release-whl/stable/xpu/us/")
-        requirements_extras.append(
-            "torch @ https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0.post1%2Bcxx11.abi-cp310-cp310-linux_x86_64.whl",
-        )
+        _check_for_extra_index_url(
+            "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/")
+        requirements_extras.extend([
+            "torch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl",
+            "intel-extension-for-pytorch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl",
+            "oneccl_bind_pt @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp310-cp310-linux_x86_64.whl",
+        ])
+    elif VLLM_TARGET_DEVICE == "hpu":  # noqa: SIM114
+        pass
     elif VLLM_TARGET_DEVICE == "empty":
         pass
     else:
-        raise RuntimeError(f"Unknown runtime environment {VLLM_TARGET_DEVICE=}")
-
-    requirements = build_meta_orig.get_requires_for_build_wheel(config_settings)
+        raise RuntimeError(
+            f"Unknown runtime environment {VLLM_TARGET_DEVICE=}")
 
     complete_requirements = [
         *BASE_REQUIREMENTS,
         *BUILD_REQUIREMENTS_EXTENSIONS,
-        *requirements,
         *requirements_extras,
     ]
     print(
-        f"vllm build-backend: resolved build dependencies to: {complete_requirements}"  # noqa: E501
+        f"vllm build-backend: resolved build dependencies to: {complete_requirements}"
     )
     return complete_requirements
 
 
 def get_requires_for_build_sdist(  # type: ignore[no-redef]
-    config_settings=None,
-):
-    requirements = build_meta_orig.get_requires_for_build_sdist(config_settings)
-
+        config_settings=None, ):
     return [
         *BASE_REQUIREMENTS,
-        *requirements,
+        # *requirements,
     ]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,16 +1,4 @@
 [build-system]
-# Should be mirrored in requirements-build.txt
-requires = [
-    "cmake>=3.26",
-    "ninja",
-    "packaging",
-    "setuptools>=61",
-    "setuptools-scm>=8.0",
-    "torch == 2.5.1",
-    "wheel",
-    "jinja2",
-]
-build-backend = "setuptools.build_meta"
 requires = ["setuptools", "setuptools-scm"]
 build-backend = "vllm"
 # per-target build dependencies are defined dynamically in `build_backend/vllm.py`
@@ -35,6 +23,7 @@ exclude = [
 [tool.ruff.lint.per-file-ignores]
 "vllm/version.py" = ["F401"]
 "vllm/_version.py" = ["ALL"]
+"_build_backend/vllm.py" = ["E501"]
 
 [tool.ruff.lint]
 select = [

diff --git a/requirements-build.txt b/requirements-build.txt
@@ -1,9 +1,4 @@
-# Should be mirrored in pyproject.toml
-cmake>=3.26
-ninja
-packaging
-setuptools>=61
-setuptools-scm>=8
-torch==2.5.1
-wheel
-jinja2
+cmake>=3.26
+ninja
+packaging
+wheel
diff --git a/requirements-hpu.txt b/requirements-hpu.txt
@@ -6,6 +6,4 @@ ray
 triton
 pandas
 tabulate
-setuptools>=61
-setuptools-scm>=8
 vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@4312768
diff --git a/requirements-neuron.txt b/requirements-neuron.txt
@@ -4,4 +4,9 @@
 # Dependencies for Neuron devices
 transformers-neuronx >= 0.13.0
 torch-neuronx >= 2.5.0
-neuronx-cc
+neuronx-cc==2.16.345.0
+fastapi
+ninja
+tokenizers
+pandas
+pytest
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
@@ -10,3 +10,4 @@ ray >= 2.10.0
 peft
 pytest-asyncio
 tensorizer>=2.9.0
+torch
diff --git a/requirements-tpu.txt b/requirements-tpu.txt
@@ -1,12 +1,6 @@
 # Common dependencies
 -r requirements-common.txt
 
-# Dependencies for TPU
-cmake>=3.26
-ninja
-packaging
-setuptools-scm>=8
-wheel
 jinja2
 ray[default]
 

diff --git a/requirements-xpu.txt b/requirements-xpu.txt
@@ -2,12 +2,6 @@
 -r requirements-common.txt
 
 ray >= 2.9
-cmake>=3.26
-ninja
-packaging
-setuptools-scm>=8
-wheel
-jinja2
 
 torch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp310-cp310-linux_x86_64.whl
 intel-extension-for-pytorch @ https://intel-optimized-pytorch.s3.cn-north-1.amazonaws.com.cn/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp310-cp310-linux_x86_64.whl
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,3 +10,4 @@ ray >= 2.10.0 @@
     peft
     pytest-asyncio
     tensorizer>=2.9.0
+    torch