NVIDIA · kaiyux · May 27, 2025 · May 26, 2025
@@ -45,6 +45,7 @@ PYTHON_VERSION     ?=
 NGC_STAGING_REPO   ?= nvcr.io/nvstaging/tensorrt-llm
 NGC_REPO           ?= nvcr.io/nvidia/tensorrt-llm
 NGC_USE_STAGING    ?= 0
+NGC_AUTO_REPO      ?= $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
 
 define add_local_user
 	docker build \
@@ -202,16 +203,20 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION)
 ngc-devel_push: DOCKER_BUILD_ARGS = --push
 ngc-devel_push: ngc-devel_build ;
 
-ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
+ngc-devel_run:  IMAGE_NAME = $(NGC_AUTO_REPO)
+ngc-devel_pull: IMAGE_NAME = $(NGC_AUTO_REPO)
 
 ngc-release_%: STAGE = release
 ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM)
 ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION)
 ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO)
 ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM)
 
-ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
-ngc-release_run: WORK_DIR = /app/tensorrt_llm
+ngc-release_run:  WORK_DIR = /app/tensorrt_llm
+ngc-release_run:  IMAGE_NAME = $(NGC_AUTO_REPO)
+ngc-release_run:  IMAGE_TAG = $(TRT_LLM_VERSION)
+ngc-release_pull: IMAGE_NAME = $(NGC_AUTO_REPO)
+ngc-release_pull: IMAGE_TAG = $(TRT_LLM_VERSION)
 
 ngc-manifest_%: STAGE = release
 ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO)

@@ -0,0 +1,92 @@
+# Description
+
+TensorRT-LLM provides users with an easy-to-use Python API to define Large Language Models (LLMs) and support
+state-of-the-art optimizations to perform inference efficiently on NVIDIA GPUs. TensorRT-LLM also contains components to
+create Python and C++ runtimes that orchestrate the inference execution in performant way.
+
+# Overview
+
+## TensorRT-LLM Develop Container
+
+The TensorRT-LLM Develop container includes all necessary dependencies to build TensorRT-LLM from source. It is
+specifically designed to be used alongside the source code cloned from the official TensorRT-LLM repository:
+
+[GitHub Repository - NVIDIA TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM)
+
+Full instructions for cloning the TensorRT-LLM repository can be found in
+the [TensorRT-LLM Documentation](https://nvidia.github.io/TensorRT-LLM/installation/build-from-source-linux.html).
+
+### Running TensorRT-LLM Using Docker
+
+With the top-level directory of the TensorRT-LLM repository cloned to your local machine, you can run the following
+command to start the development container:
+
+```bash
+make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1 IMAGE_TAG=x.xx.x
+```
+
+where `x.xx.x` is the version of the TensorRT-LLM container to use. This command pulls the specified container from the
+NVIDIA NGC registry, sets up the local user's account within the container, and launches it with full GPU support. The
+local source code of TensorRT-LLM will be mounted inside the container at the path `/code/tensorrt_llm` for seamless
+integration. Ensure that the image version matches the version of TensorRT-LLM in your current local git branch. Not
+specifying an `IMAGE_TAG` will attempt to resolve this automatically, but the not every intermediate release might be
+accompanied by development container. In that case, use the latest version preceding the version of your development
+branch.
+
+If you prefer launching the container directly with `docker`, you can use the following command:
+
+```bash
+docker run --rm -it --ipc=host --ulimit memlock=-1 --ulimit stack=67108864  \
+           --gpus=all \
+           --env "CCACHE_DIR=/code/tensorrt_llm/cpp/.ccache" \
+           --env "CCACHE_BASEDIR=/code/tensorrt_llm" \
+           --env "CONAN_HOME=/code/tensorrt_llm/cpp/.conan" \
+           --workdir /code/tensorrt_llm \
+           --tmpfs /tmp:exec \
+           --volume .:/code/tensorrt_llm \
+           nvcr.io/nvidia/tensorrt-llm/devel:x.xx.x
+```
+
+Note that this will start the container with the user `root`, which may leave files with root ownership in your local
+checkout.
+
+### Building the TensorRT-LLM Wheel within the Container
+
+You can build the TensorRT-LLM Python wheel inside the development container using the following command:
+
+```bash
+./scripts/build_wheel.py --clean --use_ccache --cuda_architectures=native
+```
+
+#### Explanation of Build Flags:
+
+- `--clean`: Clears intermediate build artifacts from prior builds to ensure a fresh compilation.
+- `--use_ccache`: Enables `ccache` to optimize and accelerate subsequent builds by caching compilation results.
+- `--cuda_architectures=native`: Configures the build for the native architecture of your GPU. Leave this away to build
+  the wheel for all supported architectures. For additional details, refer to
+  the [CUDA Architectures Documentation](https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html#prop_tgt:CUDA_ARCHITECTURES).
+
+For additional build options and their usage, refer to the help documentation by running:
+
+```bash
+./scripts/build_wheel.py --help
+```
+
+The wheel will be built in the `build` directory and can be installed using `pip install` like so:
+
+```bash
+pip install ./build/tensorrt_llm*.whl
+```
+
+For additional information on building the TensorRT-LLM wheel, refer to
+the [official documentation on building from source](https://nvidia.github.io/TensorRT-LLM/installation/build-from-source-linux.html#option-1-full-build-with-c-compilation).
+
+### Security CVEs
+
+To review known CVEs on this image, refer to the Security Scanning tab on this page.
+
+### License
+
+By pulling and using the container, you accept the terms and conditions of
+this [End User License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/)
+and [Product-Specific Terms](https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/).