diff --git a/.github/actions/pr-permission-gate/action.yml b/.github/actions/pr-permission-gate/action.yml index d6078207e1ef..4288a11bb9a3 100644 --- a/.github/actions/pr-permission-gate/action.yml +++ b/.github/actions/pr-permission-gate/action.yml @@ -4,8 +4,9 @@ inputs: required-level: description: Minimum permission level required (read|triage|write|maintain|admin) default: write + runs: - using: "composite" + using: composite steps: - name: Check PR sender permission uses: actions/github-script@v7 diff --git a/.github/workflows/pr-sglang-sagemaker.yml b/.github/workflows/pr-sglang-sagemaker.yml new file mode 100644 index 000000000000..9e8e03c17528 --- /dev/null +++ b/.github/workflows/pr-sglang-sagemaker.yml @@ -0,0 +1,229 @@ +name: PR - SGLang + +on: + pull_request: + branches: + - main + paths: + - "docker/sglang/**" + +permissions: + contents: read + +concurrency: + group: pr-sglang-${{ github.event.pull_request.number }} + cancel-in-progress: true + +env: + SGLANG_VERSION: "0.5.5" + PYTHON_VERSION: "py312" + CUDA_VERSION: "cu129" + OS_VERSION: "ubuntu22.04" + FORCE_COLOR: "1" + +jobs: + check-changes: + runs-on: ubuntu-latest + outputs: + sglang-sagemaker: ${{ steps.changes.outputs.sglang-sagemaker }} + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - uses: pre-commit/action@v3.0.1 + with: + extra_args: --all-files + - name: Detect file changes + id: changes + uses: dorny/paths-filter@v3 + with: + filters: | + sglang-sagemaker: + - "docker/sglang/Dockerfile" + + build-sglang-image: + needs: [check-changes] + if: needs.check-changes.outputs.sglang-sagemaker == 'true' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-build-runner + outputs: + image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }} + steps: + - uses: actions/checkout@v5 + - run: .github/scripts/runner_setup.sh + - run: .github/scripts/buildkitd.sh + + - name: ECR login + uses: ./.github/actions/ecr-authenticate + with: + aws_region: ${{ vars.AWS_REGION }} + aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} + + - name: Resolve image URI for build + id: image-uri-build + run: | + IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:sglang-${{ env.SGLANG_VERSION }}-gpu-${{ env.PYTHON_VERSION }}-${{ env.CUDA_VERSION }}-${{ env.OS_VERSION }}-sagemaker-pr-${{ github.event.pull_request.number }} + echo "Image URI to build: $IMAGE_URI" + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_ENV + echo "IMAGE_URI=$IMAGE_URI" >> $GITHUB_OUTPUT + + - name: Build image + run: | + # base image: https://hub.docker.com/r/lmsysorg/sglang/tags + docker buildx build --progress plain \ + --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ + --build-arg BASE_IMAGE="lmsysorg/sglang:v${{ env.SGLANG_VERSION }}-${{ env.CUDA_VERSION }}-amd64" \ + --cache-to=type=inline \ + --cache-from=type=registry,ref=${IMAGE_URI} \ + --tag ${IMAGE_URI} \ + --target sglang-sagemaker \ + -f docker/sglang/Dockerfile . + + - name: Container push + run: | + docker push ${IMAGE_URI} + docker rmi ${IMAGE_URI} + + sglang-local-benchmark-test: + needs: [build-sglang-image] + if: needs.build-sglang-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6xl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Container pull + uses: ./.github/actions/ecr-authenticate + with: + aws_region: ${{ vars.AWS_REGION }} + aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} + image_uri: ${{ needs.build-sglang-image.outputs.image-uri }} + + - name: Setup for SGLang datasets + run: | + mkdir -p /tmp/sglang/dataset + if [ ! -f /tmp/sglang/dataset/ShareGPT_V3_unfiltered_cleaned_split.json ]; then + echo "Downloading ShareGPT dataset..." + wget -P /tmp/sglang/dataset https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + else + echo "ShareGPT dataset already exists. Skipping download." + fi + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v /tmp/sglang/dataset:/dataset \ + -p 30000:30000 \ + -e SM_SGLANG_MODEL_PATH=Qwen/Qwen3-0.6B \ + -e SM_SGLANG_REASONING_PARSER=qwen3 \ + -e SM_SGLANG_HOST=127.0.0.1 \ + -e SM_SGLANG_PORT=30000 \ + -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${{ needs.build-sglang-image.outputs.image-uri }}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + echo "Waiting for container startup ..." + sleep 60s + docker logs ${CONTAINER_ID} + + - name: Run SGLang tests + run: | + docker exec ${CONTAINER_ID} python3 -m sglang.bench_serving \ + --backend sglang \ + --host 127.0.0.1 --port 30000 \ + --num-prompts 1000 \ + --model Qwen/Qwen3-0.6B \ + --dataset-name sharegpt \ + --dataset-path /dataset/ShareGPT_V3_unfiltered_cleaned_split.json + + - name: Cleanup container and images + if: always() + uses: ./.github/actions/container-cleanup + with: + container_id: ${{ env.CONTAINER_ID }} + + sglang-lang-test: + needs: [build-sglang-image] + if: needs.build-sglang-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:x86-g6exl-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Container pull + uses: ./.github/actions/ecr-authenticate + with: + aws_region: ${{ vars.AWS_REGION }} + aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} + image_uri: ${{ needs.build-sglang-image.outputs.image-uri }} + + - name: Checkout SGLang tests + uses: actions/checkout@v5 + with: + repository: sgl-project/sglang + ref: v${{ env.SGLANG_VERSION }} + path: sglang_source + + - name: Start container + run: | + CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ + -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ + -v ./sglang_source:/workdir --workdir /workdir \ + -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ + ${{ needs.build-sglang-image.outputs.image-uri }}) + echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV + + - name: Setup for SGLang tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + + bash scripts/ci/ci_install_dependency.sh + ' + + - name: Run SGLang tests + run: | + docker exec ${CONTAINER_ID} sh -c ' + set -eux + nvidia-smi + + # Frontend Test + cd /workdir/test/lang + python3 run_suite.py --suite per-commit + ' + + - name: Cleanup container and images + if: always() + uses: ./.github/actions/container-cleanup + with: + container_id: ${{ env.CONTAINER_ID }} + + sglang-sagemaker-test: + needs: [build-sglang-image] + if: needs.build-sglang-image.result == 'success' + runs-on: + - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} + fleet:default-runner + steps: + - name: Checkout DLC source + uses: actions/checkout@v5 + + - name: Install test dependencies + run: | + uv venv + source .venv/bin/activate + + uv pip install -r test/requirements.txt + uv pip install -r test/sglang/sagemaker/requirements.txt + + - name: Run sagemaker tests + run: | + source .venv/bin/activate + cd test/ + python3 -m pytest -vs -rA --image-uri ${{ needs.build-sglang-image.outputs.image-uri }} sglang/sagemaker diff --git a/.gitignore b/.gitignore index 126c4416f381..2a1f55c8f33e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ __pycache__ .idea *.pyc .venv -.ruff_cache \ No newline at end of file +.ruff_cache +.pytest_cache diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec83d90cb8bf..4d721708888c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ default_install_hook_types: - - pre-commit - commit-msg + - pre-commit default_stages: - pre-commit @@ -16,44 +16,44 @@ repos: rev: v6.0.0 hooks: # ------------------------------- 馃尦 Git Tools ------------------------------- # - - id: check-merge-conflict - name: "馃尦 git 路 Detect conflict markers" - - id: forbid-new-submodules - name: "馃尦 git 路 Prevent submodule creation" - - id: no-commit-to-branch - name: "馃尦 git 路 Protect main branches" - args: ["--branch", "main", "--branch", "master"] + - id: check-merge-conflict + name: "馃尦 git 路 Detect conflict markers" + - id: forbid-new-submodules + name: "馃尦 git 路 Prevent submodule creation" + - id: no-commit-to-branch + name: "馃尦 git 路 Protect main branches" + args: ["--branch", "main", "--branch", "master"] # ---------------------------- 馃搧 Filesystem Tools --------------------------- # - - id: check-executables-have-shebangs - name: "馃搧 filesystem/鈿欙笍 exec 路 Verify shebang presence" - - id: check-shebang-scripts-are-executable - name: "馃搧 filesystem/鈿欙笍 exec 路 Verify script permissions" - - id: check-symlinks - name: "馃搧 filesystem/馃敆 symlink 路 Check symlink validity" - - id: destroyed-symlinks - name: "馃搧 filesystem/馃敆 symlink 路 Detect broken symlinks" - - id: requirements-txt-fixer - name: "馃搧 filesystem/馃摑 file 路 Validate requirements.txt" - - id: trailing-whitespace - name: "馃搧 filesystem/馃摑 file 路 Validate empty whitespaces" + - id: check-executables-have-shebangs + name: "馃搧 filesystem/鈿欙笍 exec 路 Verify shebang presence" + - id: check-shebang-scripts-are-executable + name: "馃搧 filesystem/鈿欙笍 exec 路 Verify script permissions" + - id: check-symlinks + name: "馃搧 filesystem/馃敆 symlink 路 Check symlink validity" + - id: destroyed-symlinks + name: "馃搧 filesystem/馃敆 symlink 路 Detect broken symlinks" + - id: requirements-txt-fixer + name: "馃搧 filesystem/馃摑 file 路 Validate requirements.txt" + - id: trailing-whitespace + name: "馃搧 filesystem/馃摑 file 路 Validate empty whitespaces" # ----------------------------- 馃敀 Security Tools ---------------------------- # - - id: check-json - name: "馃搧 filesystem/馃敡 config 路 Validate JSON file" - - id: check-yaml - name: "馃搧 filesystem/馃敡 config 路 Validate YAML file" - - id: detect-aws-credentials - name: "馃敀 security 路 Detect aws credentials" - args: - - --allow-missing-credentials - - id: detect-private-key - name: "馃敀 security 路 Detect private keys" + - id: check-json + name: "馃搧 filesystem/馃敡 config 路 Validate JSON file" + - id: check-yaml + name: "馃搧 filesystem/馃敡 config 路 Validate YAML file" + - id: detect-aws-credentials + name: "馃敀 security 路 Detect aws credentials" + args: + - --allow-missing-credentials + - id: detect-private-key + name: "馃敀 security 路 Detect private keys" - repo: https://github.com/gitleaks/gitleaks rev: v8.29.0 hooks: - - id: gitleaks - name: "馃敀 security 路 Detect hardcoded secrets" + - id: gitleaks + name: "馃敀 security 路 Detect hardcoded secrets" # --------------------------- 馃攳 Code Quality Tools -------------------------- # @@ -61,80 +61,78 @@ repos: - repo: https://github.com/crate-ci/typos rev: v1.38.1 hooks: - - id: typos - name: "鉁忥笍 typos 路 Checking typos" - args: [--force-exclude] + - id: typos + name: "鉁忥笍 typos 路 Checking typos" + args: [--force-exclude] ### Python Tools ### - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.3 hooks: - - id: ruff-format - name: "馃悕 python 路 Format python" - - id: ruff-check - name: "馃悕 python 路 Lint python" + - id: ruff-format + name: "馃悕 python 路 Format python" + - id: ruff-check + name: "馃悕 python 路 Lint python" + args: ["--fix"] + - id: ruff-check + name: "馃悕 python 路 Sort python imports" + args: ["check", "--select", "I", "--fix"] ### Workflow Validation ### - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.35.0 hooks: - - id: check-github-workflows - name: "馃悪 github-actions 路 Validate gh workflow files" - args: ["--verbose"] - - - repo: https://github.com/pycqa/isort - rev: 7.0.0 - hooks: - - id: isort - name: "馃悕 python 路 Sort python packages" + - id: check-github-workflows + name: "馃悪 github-actions 路 Validate gh workflow files" + args: ["--verbose"] - repo: https://github.com/rhysd/actionlint rev: v1.7.8 hooks: - - id: actionlint - name: "馃悪 github-actions 路 Lint gh workflow files" - args: ["-shellcheck=enable=all"] + - id: actionlint + name: "馃悪 github-actions 路 Lint gh workflow files" + args: ["-shellcheck=enable=all"] ### Markdown ### - repo: https://github.com/hukkin/mdformat rev: 1.0.0 hooks: - - id: mdformat - name: "馃摑 markdown 路 Format markdown" - additional_dependencies: - - mdformat-gfm - - mdformat-ruff - - ruff + - id: mdformat + name: "馃摑 markdown 路 Format markdown" + additional_dependencies: + - mdformat-gfm + - mdformat-ruff + - ruff - repo: https://github.com/igorshubovych/markdownlint-cli rev: v0.45.0 hooks: - - id: markdownlint - name: "馃摑 markdown 路 Lint markdown" - args: [--fix] + - id: markdownlint + name: "馃摑 markdown 路 Lint markdown" + args: [--fix] ### Docker Tools ### - repo: https://github.com/reteps/dockerfmt rev: v0.3.9 hooks: - - id: dockerfmt - name: "馃惓 docker 路 Format docker" - args: - - --indent=2 - - --write + - id: dockerfmt + name: "馃惓 docker 路 Format docker" + args: + - --indent=2 + - --write ### Shell ### - repo: https://github.com/scop/pre-commit-shfmt rev: v3.12.0-2 # Use the latest stable revision hooks: - - id: shfmt - name: "馃悮 shell 路 Format shell scripts" - args: ["-s"] + - id: shfmt + name: "馃悮 shell 路 Format shell scripts" + args: ["-s"] # --------------------------- 鉁嶏笍 Git Commit Quality -------------------------- # @@ -142,15 +140,15 @@ repos: ### Commit Message Standards ### - repo: local hooks: - - id: signoff-commit - name: Sign-off Commit - entry: bash - args: - - -c - - | - if ! grep -q "^Signed-off-by: $(git config user.name) <$(git config user.email)>" "$(git rev-parse --git-path COMMIT_EDITMSG)"; then - printf "\nSigned-off-by: $(git config user.name) <$(git config user.email)>\n" >> "$(git rev-parse --git-path COMMIT_EDITMSG)" - fi - language: system - verbose: true - stages: [commit-msg] + - id: signoff-commit + name: Sign-off Commit + entry: bash + args: + - -c + - | + if ! grep -q "^Signed-off-by: $(git config user.name) <$(git config user.email)>" "$(git rev-parse --git-path COMMIT_EDITMSG)"; then + printf "\nSigned-off-by: $(git config user.name) <$(git config user.email)>\n" >> "$(git rev-parse --git-path COMMIT_EDITMSG)" + fi + language: system + verbose: true + stages: [commit-msg] diff --git a/docker/sglang/Dockerfile b/docker/sglang/Dockerfile new file mode 100644 index 000000000000..8490362401a0 --- /dev/null +++ b/docker/sglang/Dockerfile @@ -0,0 +1,98 @@ +# Declare the argument as default to use as input +ARG BASE_IMAGE=lmsysorg/sglang:v0.5.5-cu129-amd64 + +# Use input argument as base image +FROM $BASE_IMAGE AS base + +# ==================================================== +# ====================== common ====================== +# ==================================================== + +ARG PYTHON="python3" +ARG EFA_VERSION="1.43.3" + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=general \ + # Python won鈥檛 try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" + +WORKDIR / + +# Copy artifacts +# =============== +COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY ./scripts/telemetry/bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +COPY ./scripts/common/install_efa.sh install_efa.sh +COPY ./scripts/common/start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py \ + && chmod +x /usr/local/bin/bash_telemetry.sh \ + && chmod +x /usr/local/bin/start_cuda_compat.sh + +# Install EFA and remove vulnerable nvjpeg +# ========================================= +RUN bash install_efa.sh ${EFA_VERSION} \ + && rm install_efa.sh \ + && mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + # latest cu12 libnvjpeg available is cu124 + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ + && rm -rf /tmp/nvjpeg \ + # create symlink for python + && rm -rf /usr/bin/python \ + && ln -s /usr/bin/python3 /usr/bin/python \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* + +# Run OSS compliance script +# ========================== +RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc \ + # OSS compliance - use Python zipfile instead of unzip + && HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && python3 -c "import zipfile, os; zipfile.ZipFile('/root/oss_compliance.zip').extractall('/root/'); os.remove('/root/oss_compliance.zip')" \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + # clean + && rm -rf ${HOME_DIR}/oss_compliance* \ + && rm -rf /tmp/tmp* \ + && rm -rf /tmp/uv* \ + && rm -rf /var/lib/apt/lists/* \ + && rm -rf /root/.cache | true + +# ======================================================= +# ====================== sagemaker ====================== +# ======================================================= + +FROM base AS sglang-sagemaker + +ARG CACHE_REFRESH=0 +RUN dpkg -l | grep -E "cuda|nvidia|libnv" | awk '{print $2}' | xargs apt-mark hold \ + && apt-get update \ + && apt-get upgrade -y \ + && apt-get clean + +RUN rm -rf /tmp/* + +COPY ./scripts/sglang/sagemaker_entrypoint.sh /usr/bin/serve +RUN chmod +x /usr/bin/serve + +ENTRYPOINT ["/usr/bin/serve"] \ No newline at end of file diff --git a/scripts/common/start_cuda_compat.sh b/scripts/common/start_cuda_compat.sh new file mode 100755 index 000000000000..791d355c5abe --- /dev/null +++ b/scripts/common/start_cuda_compat.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +verlte() { + [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] +} + +COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1 +if [ -f $COMPAT_FILE ]; then + CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-) + echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}" + NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) + if [ -z "$NVIDIA_DRIVER_VERSION" ]; then + NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true) + fi + echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}" + if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then + echo "Adding CUDA compat to LD_LIBRARY_PATH" + export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH + echo $LD_LIBRARY_PATH + else + echo "Skipping CUDA compat setup as newer NVIDIA driver is installed" + fi +else + echo "Skipping CUDA compat setup as package not found" +fi diff --git a/scripts/sglang/sagemaker_entrypoint.sh b/scripts/sglang/sagemaker_entrypoint.sh new file mode 100755 index 000000000000..0f13cf5cb002 --- /dev/null +++ b/scripts/sglang/sagemaker_entrypoint.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Check if telemetry file exists before executing +# Execute telemetry script if it exists, suppress errors +bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true + +if command -v nvidia-smi >/dev/null 2>&1 && command -v nvcc >/dev/null 2>&1; then + bash /usr/local/bin/start_cuda_compat.sh +fi + +echo "Starting server" + +PREFIX="SM_SGLANG_" +ARG_PREFIX="--" + +ARGS=() + +while IFS='=' read -r key value; do + arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-') + + ARGS+=("${ARG_PREFIX}${arg_name}") + if [ -n "$value" ]; then + ARGS+=("$value") + fi +done < <(env | grep "^${PREFIX}") + +# Add default port only if not already set +if ! [[ " ${ARGS[@]} " =~ " --port " ]]; then + ARGS+=(--port "${SM_SGLANG_PORT:-8080}") +fi + +# Add default host only if not already set +if ! [[ " ${ARGS[@]} " =~ " --host " ]]; then + ARGS+=(--host "${SM_SGLANG_HOST:-0.0.0.0}") +fi + +# Add default model-path only if not already set +if ! [[ " ${ARGS[@]} " =~ " --model-path " ]]; then + ARGS+=(--model-path "${SM_SGLANG_MODEL_PATH:-/opt/ml/model}") +fi + +echo "Running command: exec python3 -m sglang.launch_server ${ARGS[@]}" +exec python3 -m sglang.launch_server "${ARGS[@]}" diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000000..6181bd5dfc31 --- /dev/null +++ b/test/__init__.py @@ -0,0 +1,26 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import logging +import sys + +from test_utils.logger import ColoredFormatter + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +console_handler = logging.StreamHandler(sys.stdout) +console_handler.setFormatter(ColoredFormatter()) +console_handler.setLevel(logging.DEBUG) + +logger.addHandler(console_handler) diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000000..0ac31d0f8c00 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,39 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Common pytest fixtures for all tests under module test/""" + +import pytest +from test_utils.aws import AWSSessionManager +from test_utils.constants import DEFAULT_REGION + + +def pytest_addoption(parser): + parser.addoption("--image-uri", action="store", help="Image URI to be tested") + parser.addoption( + "--region", action="store", default=DEFAULT_REGION, help="AWS Region to test image on AWS" + ) + + +@pytest.fixture(scope="session") +def image_uri(request): + return request.config.getoption("--image-uri") + + +@pytest.fixture(scope="session") +def region(request): + return request.config.getoption("--region") + + +@pytest.fixture(scope="session") +def aws_session(region): + return AWSSessionManager(region) diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 000000000000..1fcff467b343 --- /dev/null +++ b/test/requirements.txt @@ -0,0 +1,3 @@ +boto3 +botocore +pytest diff --git a/test/sglang/sagemaker/requirements.txt b/test/sglang/sagemaker/requirements.txt new file mode 100644 index 000000000000..30f66577a3b2 --- /dev/null +++ b/test/sglang/sagemaker/requirements.txt @@ -0,0 +1 @@ +sagemaker diff --git a/test/sglang/sagemaker/test_sm_endpoint.py b/test/sglang/sagemaker/test_sm_endpoint.py new file mode 100644 index 000000000000..7e5de37a3507 --- /dev/null +++ b/test/sglang/sagemaker/test_sm_endpoint.py @@ -0,0 +1,170 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Integration test for serving endpoint with SGLang DLC""" + +import json +import logging +from pprint import pformat + +import pytest +from botocore.exceptions import ClientError +from sagemaker import serializers +from sagemaker.model import Model +from sagemaker.predictor import Predictor +from test_utils import clean_string, random_suffix_name, wait_for_status + +# To enable debugging, change logging.INFO to logging.DEBUG +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.INFO) + +ENDPOINT_WAIT_PERIOD = 60 +ENDPOINT_WAIT_LENGTH = 30 +ENDPOINT_INSERVICE = "InService" + + +def get_endpoint_status(sagemaker_client, endpoint_name): + response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name) + LOGGER.debug(f"Describe endpoint response: {pformat(response)}") + return response["EndpointStatus"] + + +def get_hf_token(aws_session): + LOGGER.info("Retrieving HuggingFace token from AWS Secrets Manager...") + token_path = "test/hf_token" + + try: + get_secret_value_response = aws_session.secretsmanager.get_secret_value(SecretId=token_path) + LOGGER.info("Successfully retrieved HuggingFace token") + except ClientError as e: + LOGGER.error(f"Failed to retrieve HuggingFace token: {e}") + raise e + + # Do not print secrets token in logs + response = json.loads(get_secret_value_response["SecretString"]) + token = response.get("HF_TOKEN") + return token + + +@pytest.fixture(scope="function") +def model_id(request): + # Return the model_id given by the test parameter + return request.param + + +@pytest.fixture(scope="function") +def instance_type(request): + # Return the model_id given by the test parameter + return request.param + + +@pytest.fixture(scope="function") +def model_package(aws_session, image_uri, model_id): + sagemaker_client = aws_session.sagemaker + sagemaker_role = aws_session.iam_resource.Role("SageMakerRole").arn + cleaned_id = clean_string(model_id.split("/")[1], "_./") + model_name = random_suffix_name(f"sglang-{cleaned_id}-model-package", 50) + + LOGGER.debug(f"Using image: {image_uri}") + LOGGER.debug(f"Model ID: {model_id}") + + LOGGER.info(f"Creating SageMaker model: {model_name}...") + hf_token = get_hf_token(aws_session) + model = Model( + name=model_name, + image_uri=image_uri, + role=sagemaker_role, + predictor_cls=Predictor, + env={ + "SM_SGLANG_MODEL_PATH": model_id, + "HF_TOKEN": hf_token, + }, + ) + LOGGER.info("Model created successfully") + + yield model + + LOGGER.info(f"Deleting model: {model_name}") + sagemaker_client.delete_model(ModelName=model_name) + + +@pytest.fixture(scope="function") +def model_endpoint(aws_session, model_package, instance_type): + sagemaker_client = aws_session.sagemaker + model = model_package + cleaned_instance = clean_string(instance_type, "_./") + endpoint_name = random_suffix_name(f"sglang-{cleaned_instance}-endpoint", 50) + + LOGGER.debug(f"Using instance type: {instance_type}") + + LOGGER.info("Starting endpoint deployment (this may take 10-15 minutes)...") + predictor = model.deploy( + instance_type=instance_type, + initial_instance_count=1, + endpoint_name=endpoint_name, + inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1", + serializer=serializers.JSONSerializer(), + wait=True, + ) + LOGGER.info("Endpoint deployment completed successfully") + + LOGGER.info(f"Waiting for endpoint {ENDPOINT_INSERVICE} status ...") + assert wait_for_status( + ENDPOINT_INSERVICE, + ENDPOINT_WAIT_PERIOD, + ENDPOINT_WAIT_LENGTH, + get_endpoint_status, + sagemaker_client, + endpoint_name, + ) + + yield predictor + + LOGGER.info(f"Deleting endpoint: {endpoint_name}") + sagemaker_client.delete_endpoint(EndpointName=endpoint_name) + + LOGGER.info(f"Deleting endpoint configuration: {endpoint_name}") + sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name) + + +@pytest.mark.parametrize("instance_type", ["ml.g5.12xlarge"], indirect=True) +@pytest.mark.parametrize("model_id", ["Qwen/Qwen3-0.6B"], indirect=True) +def test_sglang_sagemaker_endpoint(model_endpoint, model_id): + predictor = model_endpoint + + prompt = "Write a python script to calculate square of n" + payload = { + "model": model_id, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 2400, + "temperature": 0.01, + "top_p": 0.9, + "top_k": 50, + } + LOGGER.debug(f"Sending inference request with payload: {pformat(payload)}") + + response = predictor.predict(payload) + LOGGER.info("Inference request invoked successfully") + + if isinstance(response, bytes): + response = response.decode("utf-8") + + if isinstance(response, str): + try: + response = json.loads(response) + except json.JSONDecodeError: + LOGGER.warning("Response is not valid JSON. Returning as string.") + + assert response, "Model response is empty, failing endpoint test!" + + LOGGER.info(f"Model response: {pformat(response)}") + LOGGER.info("Inference test successful!") diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py new file mode 100644 index 000000000000..7c307b1b8259 --- /dev/null +++ b/test/test_utils/__init__.py @@ -0,0 +1,60 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Common utility functions for all tests under module test/ +For test utility functions, please appropriately declare function argument types +and their output types for readability and reusability. +When necessary, use docstrings to explain the functions' mechanisms. +""" + +import logging +import random +import string +import time +from collections.abc import Callable +from typing import Any + +LOGGER = logging.getLogger(__name__) +LOGGER.setLevel(logging.INFO) + + +def random_suffix_name(resource_name: str, max_length: int, delimiter: str = "-") -> str: + rand_length = max_length - len(resource_name) - len(delimiter) + rand = "".join( + random.choice(string.ascii_lowercase + string.digits) for _ in range(rand_length) + ) + return f"{resource_name}{delimiter}{rand}" + + +def clean_string(text: str, symbols_to_remove: str, replacement: str = "-") -> str: + for symbol in symbols_to_remove: + text = text.replace(symbol, replacement) + return text + + +def wait_for_status( + expected_status: str, + wait_periods: int, + period_length: int, + get_status_method: Callable[[Any], str], + *method_args: Any, +) -> bool: + actual_status = None + for i in range(wait_periods): + time.sleep(period_length) + LOGGER.debug(f"Time passed while waiting: {period_length * (i + 1)}s.") + actual_status = get_status_method(*method_args) + if actual_status == expected_status: + return True + + LOGGER.error(f"Wait for status: {expected_status} timed out. Actual status: {actual_status}") + return False diff --git a/test/test_utils/aws.py b/test/test_utils/aws.py new file mode 100644 index 000000000000..a6e1c49dbed9 --- /dev/null +++ b/test/test_utils/aws.py @@ -0,0 +1,42 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""AWS Session Manager for all AWS boto3 API resources""" + +import boto3 +from test_utils.constants import DEFAULT_REGION + + +class AWSSessionManager: + def __init__(self, region=DEFAULT_REGION, profile_name=None): + if profile_name: + self.session = boto3.Session(profile_name=profile_name, region_name=region) + else: + self.session = boto3.Session(region_name=region) + + # Client API + self.cloudwatch = self.session.client("cloudwatch") + self.codebuild = self.session.client("codebuild") + self.codepipeline = self.session.client("codepipeline") + self.ec2 = self.session.client("ec2") + self.events = self.session.client("events") + self.iam = self.session.client("iam") + self.resource_groups = self.session.client("resource-groups") + self.scheduler = self.session.client("scheduler") + self.secretsmanager = self.session.client("secretsmanager") + self.sts = self.session.client("sts") + self.s3 = self.session.client("s3") + self.sagemaker = self.session.client("sagemaker") + + # Resource API + self.iam_resource = self.session.resource("iam") + self.s3_resource = self.session.resource("s3") diff --git a/test/test_utils/constants.py b/test/test_utils/constants.py new file mode 100644 index 000000000000..eaa7babdfaf9 --- /dev/null +++ b/test/test_utils/constants.py @@ -0,0 +1 @@ +DEFAULT_REGION = "us-west-2" diff --git a/test/test_utils/logger.py b/test/test_utils/logger.py new file mode 100644 index 000000000000..aa9725085c73 --- /dev/null +++ b/test/test_utils/logger.py @@ -0,0 +1,41 @@ +# Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Logging handler for nice formatted logs""" + +import logging + + +# Custom formatter +class ColoredFormatter(logging.Formatter): + COLORS = { + "DEBUG": {"text": "\x1b[36;20m", "bold": "\x1b[1;36;20m", "underline": "\x1b[4;36;20m"}, + "INFO": {"text": "\x1b[38;20m", "bold": "\x1b[1;38;20m", "underline": "\x1b[4;38;20m"}, + "WARNING": {"text": "\x1b[33;20m", "bold": "\x1b[1;33;20m", "underline": "\x1b[4;33;20m"}, + "ERROR": {"text": "\x1b[31;20m", "bold": "\x1b[1;31;20m", "underline": "\x1b[4;31;20m"}, + "CRITICAL": {"text": "\x1b[31;1m", "bold": "\x1b[1;31;1m", "underline": "\x1b[4;31;1m"}, + "RESET": "\x1b[0m", + } + + def format(self, record): + colors = self.COLORS.get(record.levelname, self.COLORS["DEBUG"]) + reset = self.COLORS["RESET"] + + # Create formatted string with different styles + format_str = ( + f"{colors['bold']}%(asctime)s{reset} - " + f"{colors['text']}%(name)s{reset} - " + f"{colors['underline']}%(levelname)s{reset} - " + f"{colors['text']}%(message)s{reset}" + ) + formatter = logging.Formatter(format_str) + return formatter.format(record)