aws · junpuf · Nov 19, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -58,24 +58,24 @@ release_images:
       public_registry: True
   5:
     framework: "vllm"
-    version: "0.11.0"
+    version: "0.11.1"
     arch_type: "x86"
     customer_type: "ec2"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
       os_version: "ubuntu22.04"
-      cuda_version: "cu128"
+      cuda_version: "cu129"
       example: False
       disable_sm_tag: False
       force_release: False
       public_registry: True
       enable_soci: True
   6:
     framework: "vllm"
-    version: "0.10.2"
-    arch_type: "arm64"
-    customer_type: "ec2"
+    version: "0.11.1"
+    arch_type: "x86"
+    customer_type: "sagemaker"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
@@ -88,14 +88,14 @@ release_images:
       enable_soci: True
   7:
     framework: "vllm"
-    version: "0.11.0"
-    arch_type: "x86"
-    customer_type: "sagemaker"
+    version: "0.10.2"
+    arch_type: "arm64"
+    customer_type: "ec2"
     general:
       device_types: [ "gpu" ]
       python_versions: [ "py312" ]
       os_version: "ubuntu22.04"
-      cuda_version: "cu128"
+      cuda_version: "cu129"
       example: False
       disable_sm_tag: False
       force_release: False

@@ -56,6 +56,7 @@ def deploy_endpoint(name, image_uri, role, instance_type):
             instance_type=instance_type,
             initial_instance_count=1,
             endpoint_name=name,
+            inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",
             wait=True,
         )
         print("Endpoint deployment completed successfully")

@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.11.0"
+version: &VERSION "0.11.1"
 short_version: &SHORT_VERSION "0.11"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
       <<: *BUILD_CONTEXT
     image_size_baseline: 26000
     device_type: &DEVICE_TYPE gpu
-    cuda_version: &CUDA_VERSION cu128
+    cuda_version: &CUDA_VERSION cu129
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
@@ -50,4 +50,4 @@ images:
         - sanity
         - security
         - sagemaker
-        - eks
+        # - eks
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.11.0"
+version: &VERSION "0.11.1"
 short_version: &SHORT_VERSION "0.11"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
@@ -35,7 +35,7 @@ images:
       <<: *BUILD_CONTEXT
     image_size_baseline: 26000
     device_type: &DEVICE_TYPE gpu
-    cuda_version: &CUDA_VERSION cu128
+    cuda_version: &CUDA_VERSION cu129
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
@@ -49,19 +49,19 @@ images:
       test_platforms:
         - sanity
         - security
-        - ec2
-        - eks
-    tests:
-      - platform: ec2-multi-node-efa
-        params:
-          instance_type: p4d.24xlarge
-          node_count: 2
-        run:
-          - python test/v2/ec2/vllm/test_ec2.py
+        # - ec2
+        # - eks
+    # tests:
+    #   - platform: ec2-multi-node-efa
+    #     params:
+    #       instance_type: p4d.24xlarge
+    #       node_count: 2
+    #     run:
+    #       - python test/v2/ec2/vllm/test_ec2.py
 
-      # - platform: eks
-        params:
-          cluster: dlc-vllm
-          namespace: vllm
-        run:
-          - python test/v2/eks/vllm/vllm_eks_test.py
+    #   # - platform: eks
+    #     params:
+    #       cluster: dlc-vllm
+    #       namespace: vllm
+    #     run:
+    #       - python test/v2/eks/vllm/vllm_eks_test.py
@@ -1,4 +1,4 @@
-FROM docker.io/vllm/vllm-openai:v0.11.0 as base
+FROM docker.io/vllm/vllm-openai:v0.11.1 as base
 ARG PYTHON="python3"
 LABEL maintainer="Amazon AI"
 ARG EFA_VERSION="1.43.3"