Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions release_images_general.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,24 @@ release_images:
public_registry: True
5:
framework: "vllm"
version: "0.11.0"
version: "0.11.1"
arch_type: "x86"
customer_type: "ec2"
general:
device_types: [ "gpu" ]
python_versions: [ "py312" ]
os_version: "ubuntu22.04"
cuda_version: "cu128"
cuda_version: "cu129"
example: False
disable_sm_tag: False
force_release: False
public_registry: True
enable_soci: True
6:
framework: "vllm"
version: "0.10.2"
arch_type: "arm64"
customer_type: "ec2"
version: "0.11.1"
arch_type: "x86"
customer_type: "sagemaker"
general:
device_types: [ "gpu" ]
python_versions: [ "py312" ]
Expand All @@ -88,14 +88,14 @@ release_images:
enable_soci: True
7:
framework: "vllm"
version: "0.11.0"
arch_type: "x86"
customer_type: "sagemaker"
version: "0.10.2"
arch_type: "arm64"
customer_type: "ec2"
general:
device_types: [ "gpu" ]
python_versions: [ "py312" ]
os_version: "ubuntu22.04"
cuda_version: "cu128"
cuda_version: "cu129"
example: False
disable_sm_tag: False
force_release: False
Expand Down
1 change: 1 addition & 0 deletions test/vllm/sagemaker/test_sm_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def deploy_endpoint(name, image_uri, role, instance_type):
instance_type=instance_type,
initial_instance_count=1,
endpoint_name=name,
inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",
wait=True,
)
print("Endpoint deployment completed successfully")
Expand Down
6 changes: 3 additions & 3 deletions vllm/buildspec-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK vllm
version: &VERSION "0.11.0"
version: &VERSION "0.11.1"
short_version: &SHORT_VERSION "0.11"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"
Expand Down Expand Up @@ -35,7 +35,7 @@ images:
<<: *BUILD_CONTEXT
image_size_baseline: 26000
device_type: &DEVICE_TYPE gpu
cuda_version: &CUDA_VERSION cu128
cuda_version: &CUDA_VERSION cu129
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
Expand All @@ -50,4 +50,4 @@ images:
- sanity
- security
- sagemaker
- eks
# - eks
34 changes: 17 additions & 17 deletions vllm/buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK vllm
version: &VERSION "0.11.0"
version: &VERSION "0.11.1"
short_version: &SHORT_VERSION "0.11"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"
Expand Down Expand Up @@ -35,7 +35,7 @@ images:
<<: *BUILD_CONTEXT
image_size_baseline: 26000
device_type: &DEVICE_TYPE gpu
cuda_version: &CUDA_VERSION cu128
cuda_version: &CUDA_VERSION cu129
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py312
os_version: &OS_VERSION ubuntu22.04
Expand All @@ -49,19 +49,19 @@ images:
test_platforms:
- sanity
- security
- ec2
- eks
tests:
- platform: ec2-multi-node-efa
params:
instance_type: p4d.24xlarge
node_count: 2
run:
- python test/v2/ec2/vllm/test_ec2.py
# - ec2
# - eks
# tests:
# - platform: ec2-multi-node-efa
# params:
# instance_type: p4d.24xlarge
# node_count: 2
# run:
# - python test/v2/ec2/vllm/test_ec2.py

# - platform: eks
params:
cluster: dlc-vllm
namespace: vllm
run:
- python test/v2/eks/vllm/vllm_eks_test.py
# # - platform: eks
# params:
# cluster: dlc-vllm
# namespace: vllm
# run:
# - python test/v2/eks/vllm/vllm_eks_test.py
2 changes: 1 addition & 1 deletion vllm/x86_64/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/vllm/vllm-openai:v0.11.0 as base
FROM docker.io/vllm/vllm-openai:v0.11.1 as base
ARG PYTHON="python3"
LABEL maintainer="Amazon AI"
ARG EFA_VERSION="1.43.3"
Expand Down