From fb84bf808bd68d551db8d89b7c8f200628c63264 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Mon, 20 Jan 2025 16:56:27 +0100
Subject: [PATCH] cleanup

---
 Dockerfile.arm         |  5 +++--
 Dockerfile.cpu         |  2 +-
 Dockerfile.hpu         |  2 +-
 Dockerfile.neuron      |  6 ++----
 Dockerfile.openvino    |  2 +-
 Dockerfile.tpu         |  7 ++-----
 _build_backend/vllm.py | 12 +++++++++++-
 setup.py               |  4 ++--
 8 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/Dockerfile.arm b/Dockerfile.arm
index 093ee2209222f..25c937ded4dcc 100644
--- a/Dockerfile.arm
+++ b/Dockerfile.arm
@@ -51,7 +51,8 @@ ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=bind,source=.git,target=.git \
-    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
+    pip install build -r requirements-build.txt && \
+    VLLM_TARGET_DEVICE=cpu python -m build --no-isolation --wheel && \
     pip install dist/*.whl && \
     rm -rf dist
 
@@ -59,4 +60,4 @@ WORKDIR /workspace/
 
 RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
 
-ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
\ No newline at end of file
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index 3856f41ec2c49..10fe41f08d70a 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -54,7 +54,7 @@ ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=bind,source=.git,target=.git \
-    VLLM_TARGET_DEVICE=cpu pip install -v .
+    VLLM_TARGET_DEVICE=cpu pip install -v --no-build-isolation .
 
 WORKDIR /workspace/
 
diff --git a/Dockerfile.hpu b/Dockerfile.hpu
index 66cf68c32f2ca..d95d3a6212675 100644
--- a/Dockerfile.hpu
+++ b/Dockerfile.hpu
@@ -9,7 +9,7 @@ RUN pip install -v -r requirements-hpu.txt
 ENV no_proxy=localhost,127.0.0.1
 ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true
 
-RUN VLLM_TARGET_DEVICE=hpu python3 setup.py install
+RUN VLLM_TARGET_DEVICE=hpu pip install .
 
 # install development dependencies (for testing)
 RUN python3 -m pip install -e tests/vllm_test_utils
diff --git a/Dockerfile.neuron b/Dockerfile.neuron
index e3c8b084d261e..f1bf9f3e01a14 100644
--- a/Dockerfile.neuron
+++ b/Dockerfile.neuron
@@ -20,15 +20,13 @@ ARG APP_MOUNT=/workspace
 VOLUME [ ${APP_MOUNT} ]
 WORKDIR ${APP_MOUNT}/vllm
 
+COPY requirements-build.txt /app/vllm/
 COPY requirements-neuron.txt /app/vllm/
 COPY requirements-common.txt /app/vllm/
 
-# FIXME: needs some way to provide build dependencies
 ENV PIP_EXTRA_INDEX_URL=https://pip.repos.neuron.amazonaws.com
 RUN --mount=type=cache,target=/root/.cache/pip \
-    cd /app/vllm && \
-    python3 -m pip install --pre -U -r requirements-neuron.txt
-
+    python3 -m pip install --pre -U -r requirements-neuron.txt -r requirements-build.txt
 
 
 COPY . .
diff --git a/Dockerfile.openvino b/Dockerfile.openvino
index 4de3bcad05009..22d51d0d2b5d3 100644
--- a/Dockerfile.openvino
+++ b/Dockerfile.openvino
@@ -18,7 +18,7 @@ COPY . /workspace/vllm
 
 # build vLLM with OpenVINO backend
 RUN --mount=type=cache,target=/root/.cache/pip \
-    PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
+    PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" \
     VLLM_TARGET_DEVICE="openvino" \
     python3 -m pip install -v /workspace/vllm/
 
diff --git a/Dockerfile.tpu b/Dockerfile.tpu
index d6eb591dd6b62..e1e0bc21922a2 100644
--- a/Dockerfile.tpu
+++ b/Dockerfile.tpu
@@ -13,19 +13,16 @@ RUN --mount=type=cache,target=/var/cache/apt \
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
     --mount=type=bind,source=requirements-tpu.txt,target=requirements-tpu.txt \
-    pip install -r requirements-tpu.txt
+    pip install -r requirements-tpu.txt -r requirements-build.txt
 
 COPY . .
 ARG GIT_REPO_CHECK=0
 RUN --mount=type=bind,source=.git,target=.git \
     if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh; fi
 
-# FIXME: needs some way of providing build dependencies
-
 COPY . /workspace/vllm
-ENV VLLM_TARGET_DEVICE="tpu"
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=.git,target=.git \
-    cd /workspace/vllm && pip install --no-build-isolation -v -e .
+    VLLM_TARGET_DEVICE="tpu" pip install --no-build-isolation -v -e .
 
 CMD ["/bin/bash"]
diff --git a/_build_backend/vllm.py b/_build_backend/vllm.py
index 6c1f8b9ce3c9f..9843bc3a6957e 100644
--- a/_build_backend/vllm.py
+++ b/_build_backend/vllm.py
@@ -66,7 +66,17 @@ def get_requires_for_build_wheel(  # type: ignore[no-redef]
         requirements_extras.append("torch==2.5.1")
         # TODO: add intel extension for pytorch?
     elif VLLM_TARGET_DEVICE == "cuda":
-        requirements_extras.append("torch==2.5.1")
+        if os.getenv("TARGETPLATFORM") == "linux/arm64":
+            # FIXME: ?
+            _check_for_extra_index_url(
+                "https://download.pytorch.org/whl/nightly/cu124")
+            requirements_extras.extend([
+                "torch==2.6.0.dev20241210+cu124",
+                "torchvision==0.22.0.dev20241215"
+            ])
+
+        else:
+            requirements_extras.append("torch==2.5.1")
     elif VLLM_TARGET_DEVICE == "rocm":
         # TODO: ? add support for multiple ROCM versions (6.3?)
         rocm_supported_versions = ("6.2", )
diff --git a/setup.py b/setup.py
index 2fc53e58d238b..b2a20722a4f02 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,6 @@
 
 from packaging.version import Version, parse
 from setuptools import Extension, find_packages, setup
-from setuptools import build_meta as _orig
 from setuptools.build_meta import *
 from setuptools.command.build_ext import build_ext
 from setuptools.errors import SetupError
@@ -352,7 +351,8 @@ def _no_device() -> bool:
 
 
 def _is_cuda() -> bool:
-    return VLLM_TARGET_DEVICE == "cuda" and not (_is_neuron() or _is_tpu() or _is_hpu())
+    return VLLM_TARGET_DEVICE == "cuda" and not (_is_neuron() or _is_tpu()
+                                                 or _is_hpu())
 
 
 def _is_hip() -> bool: