diff --git a/build.py b/build.py index 1cd03f5e99..5e3c072247 100755 --- a/build.py +++ b/build.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -72,11 +72,11 @@ DEFAULT_TRITON_VERSION_MAP = { "release_version": "2.54.0dev", - "triton_container_version": "24.01dev", - "upstream_container_version": "24.12", + "triton_container_version": "25.01dev", + "upstream_container_version": "25.01", "ort_version": "1.20.1", - "ort_openvino_version": "2024.4.0", - "standalone_openvino_version": "2024.4.0", + "ort_openvino_version": "2024.5.0", + "standalone_openvino_version": "2024.5.0", "dcgm_version": "3.3.6", "vllm_version": "0.6.3.post1", "rhel_py_version": "3.12.3", @@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): # Install the windows- or linux-specific buildbase dependencies if target_platform() == "windows": df += """ +RUN python3 -m pip install build + SHELL ["cmd", "/S", "/C"] """ else: @@ -1465,12 +1467,31 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach """ if "vllm" in backends: - df += """ -# vLLM needed for vLLM backend -RUN pip3 install vllm=={} -""".format( - FLAGS.vllm_version - ) + df += f""" +ARG BUILD_PUBLIC_VLLM="true" +ARG VLLM_INDEX_URL +ARG PYTORCH_TRITON_URL + +RUN --mount=type=secret,id=req,target=/run/secrets/requirements \\ + if [ "$BUILD_PUBLIC_VLLM" = "false" ]; then \\ + pip3 install --no-cache-dir \\ + mkl==2021.1.1 \\ + mkl-include==2021.1.1 \\ + mkl-devel==2021.1.1 \\ + && pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\ + # Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1 + && cd /tmp \\ + && wget $PYTORCH_TRITON_URL \\ + && pip install --no-cache-dir /tmp/pytorch_triton-*.whl \\ + && rm /tmp/pytorch_triton-*.whl; \\ + else \\ + # public vLLM needed for vLLM backend + pip3 install vllm=={DEFAULT_TRITON_VERSION_MAP["vllm_version"]}; \\ + fi + +ARG PYVER=3.12 +ENV LD_LIBRARY_PATH /usr/local/lib:/usr/local/lib/python${{PYVER}}/dist-packages/torch/lib:${{LD_LIBRARY_PATH}} +""" if "dali" in backends: df += """ @@ -1838,13 +1859,21 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_ finalargs = [ "docker", "build", + ] + if secrets != "": + finalargs += [ + f"--secret id=req,src={requirements}", + f"--build-arg VLLM_INDEX_URL={vllm_index_url}", + f"--build-arg PYTORCH_TRITON_URL={pytorch_triton_url}", + f"--build-arg BUILD_PUBLIC_VLLM={build_public_vllm}", + ] + finalargs += [ "-t", "tritonserver", "-f", os.path.join(FLAGS.build_dir, "Dockerfile"), ".", ] - docker_script.cwd(THIS_SCRIPT_DIR) docker_script.cmd(finalargs, check_exitcode=True) @@ -2689,6 +2718,19 @@ def enable_all(): default=DEFAULT_TRITON_VERSION_MAP["rhel_py_version"], help="This flag sets the Python version for RHEL platform of Triton Inference Server to be built. Default: the latest supported version.", ) + parser.add_argument( + "--build-secret", + action="append", + required=False, + nargs=2, + metavar=("key", "value"), + help="Add build secrets in the form of . These secrets are used during the build process for vllm. The secrets are passed to the Docker build step as `--secret id=`. The following keys are expected and their purposes are described below:\n\n" + " - 'req': A file containing a list of dependencies for pip (e.g., requirements.txt).\n" + " - 'vllm_index_url': The index URL for the pip install.\n" + " - 'pytorch_triton_url': The location of the PyTorch wheel to download.\n" + " - 'build_public_vllm': A flag (default is 'true') indicating whether to build the public VLLM version.\n\n" + "Ensure that the required environment variables for these secrets are set before running the build.", + ) FLAGS = parser.parse_args() if FLAGS.image is None: @@ -2715,6 +2757,8 @@ def enable_all(): FLAGS.override_backend_cmake_arg = [] if FLAGS.extra_backend_cmake_arg is None: FLAGS.extra_backend_cmake_arg = [] + if FLAGS.build_secret is None: + FLAGS.build_secret = [] # if --enable-all is specified, then update FLAGS to enable all # settings, backends, repo-agents, caches, file systems, endpoints, etc. @@ -2808,6 +2852,14 @@ def enable_all(): ) backends["python"] = backends["vllm"] + secrets = dict(getattr(FLAGS, "build_secret", [])) + if secrets is not None: + requirements = secrets.get("req", "") + vllm_index_url = secrets.get("vllm_index_url", "") + pytorch_triton_url = secrets.get("pytorch_triton_url", "") + build_public_vllm = secrets.get("build_public_vllm", "true") + log('Build Arg for BUILD_PUBLIC_VLLM: "{}"'.format(build_public_vllm)) + # Initialize map of repo agents to build and repo-tag for each. repoagents = {} for be in FLAGS.repoagent: diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 93e4dc2dfd..edac18ccfe 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -500,9 +500,14 @@ chmod -R 777 $VOLUME_FORMATDESTDIR python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR chmod -R 777 $VOLUME_DATADEPENDENTDIR # Make shared library for custom Hardmax plugin. -(git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git && \ -cd /workspace/TensorRT/samples/python/onnx_custom_plugin && rm -rf build && mkdir build && \ -cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/.) +if [ -d "/usr/src/tensorrt" ]; then + cd /usr/src/tensorrt/samples/python/onnx_custom_plugin +else + git clone -b release/${TENSORRT_VERSION} https://github.com/NVIDIA/TensorRT.git + cd /workspace/TensorRT/samples/python/onnx_custom_plugin +fi +rm -rf build && mkdir build && \ +cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $VOLUME_PLGDESTDIR/. LD_PRELOAD=$VOLUME_PLGDESTDIR/libcustomHardmaxPlugin.so python3 $VOLUME_SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$VOLUME_PLGDESTDIR chmod -R 777 $VOLUME_PLGDESTDIR EOF