Skip to content

Commit

Permalink
Build: Update to use custom vllm and TRT version at build and model g…
Browse files Browse the repository at this point in the history
…eneration respectively (#7927)

Co-authored-by: Misha Chornyi <[email protected]>
  • Loading branch information
pvijayakrish and mc-nv authored Jan 15, 2025
1 parent 6743fd9 commit 67f067b
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 15 deletions.
76 changes: 64 additions & 12 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -72,11 +72,11 @@

DEFAULT_TRITON_VERSION_MAP = {
"release_version": "2.54.0dev",
"triton_container_version": "24.01dev",
"upstream_container_version": "24.12",
"triton_container_version": "25.01dev",
"upstream_container_version": "25.01",
"ort_version": "1.20.1",
"ort_openvino_version": "2024.4.0",
"standalone_openvino_version": "2024.4.0",
"ort_openvino_version": "2024.5.0",
"standalone_openvino_version": "2024.5.0",
"dcgm_version": "3.3.6",
"vllm_version": "0.6.3.post1",
"rhel_py_version": "3.12.3",
Expand Down Expand Up @@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
# Install the windows- or linux-specific buildbase dependencies
if target_platform() == "windows":
df += """
RUN python3 -m pip install build
SHELL ["cmd", "/S", "/C"]
"""
else:
Expand Down Expand Up @@ -1465,12 +1467,31 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
"""

if "vllm" in backends:
df += """
# vLLM needed for vLLM backend
RUN pip3 install vllm=={}
""".format(
FLAGS.vllm_version
)
df += f"""
ARG BUILD_PUBLIC_VLLM="true"
ARG VLLM_INDEX_URL
ARG PYTORCH_TRITON_URL
RUN --mount=type=secret,id=req,target=/run/secrets/requirements \\
if [ "$BUILD_PUBLIC_VLLM" = "false" ]; then \\
pip3 install --no-cache-dir \\
mkl==2021.1.1 \\
mkl-include==2021.1.1 \\
mkl-devel==2021.1.1 \\
&& pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
# Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
&& cd /tmp \\
&& wget $PYTORCH_TRITON_URL \\
&& pip install --no-cache-dir /tmp/pytorch_triton-*.whl \\
&& rm /tmp/pytorch_triton-*.whl; \\
else \\
# public vLLM needed for vLLM backend
pip3 install vllm=={DEFAULT_TRITON_VERSION_MAP["vllm_version"]}; \\
fi
ARG PYVER=3.12
ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib/python${{PYVER}}/dist-packages/torch/lib:${{LD_LIBRARY_PATH}}
"""

if "dali" in backends:
df += """
Expand Down Expand Up @@ -1838,13 +1859,21 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
finalargs = [
"docker",
"build",
]
if secrets != "":
finalargs += [
f"--secret id=req,src={requirements}",
f"--build-arg VLLM_INDEX_URL={vllm_index_url}",
f"--build-arg PYTORCH_TRITON_URL={pytorch_triton_url}",
f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}",
]
finalargs += [
"-t",
"tritonserver",
"-f",
os.path.join(FLAGS.build_dir, "Dockerfile"),
".",
]

docker_script.cwd(THIS_SCRIPT_DIR)
docker_script.cmd(finalargs, check_exitcode=True)

Expand Down Expand Up @@ -2689,6 +2718,19 @@ def enable_all():
default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"],
help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--build-secret",
action="append",
required=False,
nargs=2,
metavar=("key", "value"),
help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
" - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
" - 'vllm_index_url': The index URL for the pip install.\n"
" - 'pytorch_triton_url': The location of the PyTorch wheel to download.\n"
" - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
"Ensure that the required environment variables for these secrets are set before running the build.",
)
FLAGS = parser.parse_args()

if FLAGS.image is None:
Expand All @@ -2715,6 +2757,8 @@ def enable_all():
FLAGS.override_backend_cmake_arg = []
if FLAGS.extra_backend_cmake_arg is None:
FLAGS.extra_backend_cmake_arg = []
if FLAGS.build_secret is None:
FLAGS.build_secret = []

# if --enable-all is specified, then update FLAGS to enable all
# settings, backends, repo-agents, caches, file systems, endpoints, etc.
Expand Down Expand Up @@ -2808,6 +2852,14 @@ def enable_all():
)
backends["python"] = backends["vllm"]

secrets = dict(getattr(FLAGS, "build_secret", []))
if secrets is not None:
requirements = secrets.get("req", "")
vllm_index_url = secrets.get("vllm_index_url", "")
pytorch_triton_url = secrets.get("pytorch_triton_url", "")
build_public_vllm = secrets.get("build_public_vllm", "true")
log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))

# Initialize map of repo agents to build and repo-tag for each.
repoagents = {}
for be in FLAGS.repoagent:
Expand Down
11 changes: 8 additions & 3 deletions qa/common/gen_qa_model_repository
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,14 @@ chmod -R 777 $VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
chmod -R 777 $VOLUME_DATADEPENDENTDIR
# Make shared library for custom Hardmax plugin.
(git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git && \
cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.)
if [ -d "/usr/src/tensorrt" ]; then
cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
else
git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git
cd /workspace/TensorRT/samples/python/onnx_custom_plugin
fi
rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.
LD_PRELOAD=$VOLUME_PLGDESTDIR/libcustomHardmaxPlugin.so python3 $VOLUME_SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$VOLUME_PLGDESTDIR
chmod -R 777 $VOLUME_PLGDESTDIR
EOF
Expand Down

0 comments on commit 67f067b

Please sign in to comment.