Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build: Update to use custom vllm and TRT version at build and model generation respectively #7927

Merged
merged 20 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 62 additions & 11 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@
DEFAULT_TRITON_VERSION_MAP = {
"release_version": "2.53.0dev",
"triton_container_version": "24.12dev",
"upstream_container_version": "24.11",
"ort_version": "1.19.2",
"ort_openvino_version": "2024.4.0",
"standalone_openvino_version": "2024.4.0",
"upstream_container_version": "25.01",
"ort_version": "1.20.1",
"ort_openvino_version": "2024.5.0",
"standalone_openvino_version": "2024.5.0",
"dcgm_version": "3.3.6",
"vllm_version": "0.6.3.post1",
"rhel_py_version": "3.12.3",
Expand Down Expand Up @@ -1463,12 +1463,31 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
"""

if "vllm" in backends:
df += """
# vLLM needed for vLLM backend
RUN pip3 install vllm=={}
""".format(
FLAGS.vllm_version
)
df += f"""
ARG BUILD_PUBLIC_VLLM="true"
ARG VLLM_INDEX_URL
ARG PYTORCH_TRITON_URL

RUN --mount=type=secret,id=req,target=/run/secrets/requirements \\
if [ "$BUILD_PUBLIC_VLLM" = "false" ]; then \\
pip3 install --no-cache-dir \\
mkl==2021.1.1 \\
mkl-include==2021.1.1 \\
mkl-devel==2021.1.1 \\
&& pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
# Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
&& cd /tmp \\
&& wget $PYTORCH_TRITON_URL \\
&& pip install --no-cache-dir /tmp/pytorch_triton-*.whl \\
&& rm /tmp/pytorch_triton-*.whl; \\
else \\
# public vLLM needed for vLLM backend
pip3 install vllm=={DEFAULT_TRITON_VERSION_MAP["vllm_version"]}; \\
fi
nv-kmcgill53 marked this conversation as resolved.
Show resolved Hide resolved

ARG PYVER=3.12
ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib/python${{PYVER}}/dist-packages/torch/lib:${{LD_LIBRARY_PATH}}
"""

if "dali" in backends:
df += """
Expand Down Expand Up @@ -1836,13 +1855,21 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
finalargs = [
"docker",
"build",
]
if secrets != "":
finalargs += [
f"--secret id=req,src={requirements}",
pvijayakrish marked this conversation as resolved.
Show resolved Hide resolved
f"--build-arg VLLM_INDEX_URL={vllm_index_url}",
f"--build-arg PYTORCH_TRITON_URL={pytorch_triton_url}",
f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}"
pvijayakrish marked this conversation as resolved.
Show resolved Hide resolved
]
finalargs += [
"-t",
"tritonserver",
"-f",
os.path.join(FLAGS.build_dir, "Dockerfile"),
".",
]

docker_script.cwd(THIS_SCRIPT_DIR)
docker_script.cmd(finalargs, check_exitcode=True)

Expand Down Expand Up @@ -2683,6 +2710,19 @@ def enable_all():
default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"],
help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.",
)
parser.add_argument(
"--build-secret",
pvijayakrish marked this conversation as resolved.
Show resolved Hide resolved
action="append",
required=False,
nargs=2,
metavar=('key', 'value'),
help="Add build secrets in the form of <key> <value>. These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=<key>`. The following keys are expected and their purposes are described below:\n\n"
pvijayakrish marked this conversation as resolved.
Show resolved Hide resolved
" - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n"
" - 'vllm_index_url': The index URL for the pip install.\n"
" - 'pytorch_triton_url': The location of the PyTorch wheel to download.\n"
" - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n"
"Ensure that the required environment variables for these secrets are set before running the build."
)
FLAGS = parser.parse_args()

if FLAGS.image is None:
Expand All @@ -2709,6 +2749,8 @@ def enable_all():
FLAGS.override_backend_cmake_arg = []
if FLAGS.extra_backend_cmake_arg is None:
FLAGS.extra_backend_cmake_arg = []
if FLAGS.build_secret is None:
FLAGS.build_secret = []

# if --enable-all is specified, then update FLAGS to enable all
# settings, backends, repo-agents, caches, file systems, endpoints, etc.
Expand Down Expand Up @@ -2802,6 +2844,15 @@ def enable_all():
)
backends["python"] = backends["vllm"]

secrets = dict(getattr(FLAGS, 'build_secret', []))
if secrets is not None:
requirements = secrets.get('req','')
vllm_index_url = secrets.get('vllm_index_url','')
pytorch_triton_url = secrets.get('pytorch_triton_url','')
build_public_vllm = secrets.get('build_public_vllm','true')
log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm))


# Initialize map of repo agents to build and repo-tag for each.
repoagents = {}
for be in FLAGS.repoagent:
Expand Down
11 changes: 8 additions & 3 deletions qa/common/gen_qa_model_repository
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,14 @@ chmod -R 777 $VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
chmod -R 777 $VOLUME_DATADEPENDENTDIR
# Make shared library for custom Hardmax plugin.
(git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git && \
cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.)
if [ -d "/usr/src/tensorrt" ]; then
cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
nv-kmcgill53 marked this conversation as resolved.
Show resolved Hide resolved
else
git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git
cd /workspace/TensorRT/samples/python/onnx_custom_plugin
fi
rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.
LD_PRELOAD=$VOLUME_PLGDESTDIR/libcustomHardmaxPlugin.so python3 $VOLUME_SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$VOLUME_PLGDESTDIR
chmod -R 777 $VOLUME_PLGDESTDIR
EOF
Expand Down