aws · philschmid · Apr 29, 2021 · Apr 29, 2021 · Apr 29, 2021 · Apr 29, 2021
@@ -0,0 +1,3 @@
+{
+  "python.pythonPath": "/Users/philipp/anaconda3/envs/sm/bin/python"
+}
@@ -0,0 +1,5 @@
+vmargs=-XX:+UseContainerSupport -XX:InitialRAMPercentage=8.0 -XX:MaxRAMPercentage=10.0 -XX:-UseLargePages -XX:+UseG1GC -XX:+ExitOnOutOfMemoryError
+model_store=/opt/ml/model
+load_models=ALL
+inference_address=http://0.0.0.0:8080
+management_address=http://0.0.0.0:8081
@@ -0,0 +1,28 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import shlex
+import subprocess
+import sys
+
+
+if sys.argv[1] == "serve":
+    from sagemaker_huggingface_inference_toolkit import serving
+
+    serving.main()
+else:
+    subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))
+
+# prevent docker exit
+subprocess.call(["tail", "-f", "/dev/null"])
@@ -23,8 +23,8 @@ images:
     tag_python_version: &TAG_PYTHON_VERSION py36
     cuda_version: &CUDA_VERSION cu110
     os_version: &OS_VERSION ubuntu18.04
-    transformers_version: &TRANSFORMERS_VERSION 4.5.0
-    datasets_version: &DATASETS_VERSION 1.5.0
+    transformers_version: &TRANSFORMERS_VERSION 4.6.1
+    datasets_version: &DATASETS_VERSION 1.6.2
     tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
       *CUDA_VERSION, '-', *OS_VERSION ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, 

@@ -12,6 +12,21 @@ repository_info:
     repository_name: &REPOSITORY_NAME !join ["pr", "-", "huggingface", "-", *BASE_FRAMEWORK, "-", *TRAINING_IMAGE_TYPE]
     repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/,
       *REPOSITORY_NAME ]
+  inference_repository: &INFERENCE_REPOSITORY
+    image_type: &INFERENCE_IMAGE_TYPE inference
+    root: !join [ "huggingface/", *BASE_FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
+    repository_name:  &REPOSITORY_NAME !join ["pr", "-", "huggingface", "-", *BASE_FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/,
+      *REPOSITORY_NAME ]
+
+context:
+  inference_context: &INFERENCE_CONTEXT
+    mms-entrypoint:
+      source: ../build_artifacts/inference/mms-entrypoint.py
+      target: mms-entrypoint.py
+    config:
+      source: ../build_artifacts/inference/config.properties
+      target: config.properties
 
 images:
   BuildHuggingFacePytorchGpuPy37Cu110TrainingDockerImage:
@@ -23,9 +38,42 @@ images:
     tag_python_version: &TAG_PYTHON_VERSION py36
     cuda_version: &CUDA_VERSION cu110
     os_version: &OS_VERSION ubuntu18.04
-    transformers_version: &TRANSFORMERS_VERSION 4.5.0
-    datasets_version: &DATASETS_VERSION 1.5.0
+    transformers_version: &TRANSFORMERS_VERSION 4.6.1
+    datasets_version: &DATASETS_VERSION 1.6.2
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
+      *CUDA_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, 
+      *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+  BuildHuggingFacePytorchCPUPTInferencePy3DockerImage:
+    <<: *INFERENCE_REPOSITORY
+    build: &HUGGINGFACE_PYTORCH_CPU_INFERENCE_PY3 false
+    image_size_baseline: 4899
+    device_type: &DEVICE_TYPE cpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py36
+    os_version: &OS_VERSION ubuntu18.04
+    transformers_version: &TRANSFORMERS_VERSION 4.6.0
+    inference_toolkit_version: &INFERENCE_TOOLKIT_VERSION 0.0.1.dev0
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
+      *CUDA_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, 
+      *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    context:
+      <<: *INFERENCE_CONTEXT
+  BuildHuggingFacePytorchGpuPy37Cu110InferenceDockerImage:
+    <<: *INFERENCE_REPOSITORY
+    build: &HUGGINGFACE_PYTORCH_GPU_INFERENCE_PY3 false
+    image_size_baseline: 14000
+    device_type: &DEVICE_TYPE gpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py36
+    cuda_version: &CUDA_VERSION cu111
+    os_version: &OS_VERSION ubuntu18.04
+    transformers_version: &TRANSFORMERS_VERSION 4.6.0
+    inference_toolkit_version: &INFERENCE_TOOLKIT_VERSION 0.0.1.dev0
     tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-',
       *CUDA_VERSION, '-', *OS_VERSION ]
     docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, 
       *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    context:
+      <<: *INFERENCE_CONTEXT
@@ -0,0 +1,134 @@
+FROM ubuntu:18.04
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
+# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently
+# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html
+LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
+
+ARG MMS_VERSION=1.1.2
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.6.13
+ARG OPEN_MPI_VERSION=4.0.1
+# HF ARGS
+ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/cpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl
+ARG TRANSFORMERS_VERSION
+ARG HF_INFERENCE_TOOLKIT_VERSION
+
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \
+    PYTHONIOENCODING=UTF-8 \
+    LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8 \
+    TEMP=/home/model-server/tmp \
+    DEBIAN_FRONTEND=noninteractive
+
+ENV PATH /opt/conda/bin:$PATH
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    build-essential \
+    openssl \
+    openjdk-8-jdk-headless \
+    vim \
+    wget \
+    curl \
+    unzip \
+    git \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+ && chmod +x ~/miniconda.sh \
+ && ~/miniconda.sh -b -p /opt/conda \
+ && rm ~/miniconda.sh \
+ && /opt/conda/bin/conda update conda \
+ && /opt/conda/bin/conda install -c conda-forge \
+    python=$PYTHON_VERSION \
+ && /opt/conda/bin/conda install -y \
+    # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest.
+    ruamel_yaml==0.15.100 \
+    cython==0.29.12 \
+    mkl-include==2019.4 \
+    mkl==2019.4 \
+    botocore \
+ && /opt/conda/bin/conda clean -ya
+
+RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
+ && pip install packaging==20.4 \
+    enum-compat==0.0.3 \
+    "cryptography>3.2"
+
+RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \
+ && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \
+ && cd openmpi-$OPEN_MPI_VERSION \
+ && ./configure --prefix=/home/.openmpi \
+ && make all install \
+ && cd .. \
+ && rm openmpi-$OPEN_MPI_VERSION.tar.gz \
+ && rm -rf openmpi-$OPEN_MPI_VERSION
+
+# The ENV variables declared below are changed in the previous section
+# Grouping these ENV variables in the first section causes
+# ompi_info to fail. This is only observed in CPU containers
+ENV PATH="$PATH:/home/.openmpi/bin"
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
+RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
+
+WORKDIR /
+
+RUN pip install --no-cache-dir \
+    multi-model-server==$MMS_VERSION \
+    sagemaker-inference
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp \
+ && chown -R model-server /home/model-server
+
+COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY config.properties /etc/sagemaker-mms.properties
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance*
+
+
+#################################           
+# Hugging Face specific section #
+#################################            
+
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt
+
+# Uninstall and re-install torch and torchvision from the PyTorch website
+RUN pip uninstall -y torch \
+ && pip install --no-cache-dir -U $PT_INFERENCE_URL 
+
+# install Hugging Face libraries and its dependencies
+RUN pip install --no-cache-dir \ 
+	transformers[sentencepiece]==${TRANSFORMERS_VERSION} \ 
+	protobuf==3.12.0 \
+   sagemaker-huggingface-inference-toolkit==${HF_INFERENCE_TOOLKIT_VERSION}
+
+EXPOSE 8080 8081
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["serve"]
@@ -0,0 +1,128 @@
+FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
+# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently
+# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html
+LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
+
+ARG MMS_VERSION=1.1.2
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.6.13
+ARG OPEN_MPI_VERSION=4.0.1
+# HF ARGS
+ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/gpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl
+ARG TRANSFORMERS_VERSION
+ARG HF_INFERENCE_TOOLKIT_VERSION
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \
+    PYTHONIOENCODING=UTF-8 \
+    LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8 \
+    TEMP=/home/model-server/tmp \
+    DEBIAN_FRONTEND=noninteractive
+
+ENV PATH /opt/conda/bin:$PATH
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    build-essential \
+    openssl \
+    openjdk-8-jdk-headless \
+    vim \
+    wget \
+    curl \
+    unzip \
+    git \
+    libnuma1 \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+ && chmod +x ~/miniconda.sh \
+ && ~/miniconda.sh -b -p /opt/conda \
+ && rm ~/miniconda.sh \
+ && /opt/conda/bin/conda update conda \
+ && /opt/conda/bin/conda install -c conda-forge \
+    python=$PYTHON_VERSION \
+ && /opt/conda/bin/conda install -y \
+    # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest.
+    ruamel_yaml==0.15.100 \
+    cython==0.29.12 \
+    botocore \
+    mkl-include==2019.4 \
+    mkl==2019.4 \
+ && /opt/conda/bin/conda clean -ya
+
+RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
+ && pip install packaging==20.4 \
+    enum-compat==0.0.3 \
+    "cryptography>3.2"
+
+RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \
+ && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \
+ && cd openmpi-$OPEN_MPI_VERSION \
+ && ./configure --prefix=/home/.openmpi \
+ && make all install \
+ && cd .. \
+ && rm openmpi-$OPEN_MPI_VERSION.tar.gz \
+ && rm -rf openmpi-$OPEN_MPI_VERSION
+
+ENV PATH="$PATH:/home/.openmpi/bin"
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
+
+WORKDIR /
+
+RUN pip install --no-cache-dir \
+    multi-model-server==$MMS_VERSION \
+    sagemaker-inference
+
+RUN useradd -m model-server \
+ && mkdir -p /home/model-server/tmp \
+ && chown -R model-server /home/model-server
+
+COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
+COPY config.properties /etc/sagemaker-mms.properties
+
+RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
+
+ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance*
+
+#################################           
+# Hugging Face specific section #
+#################################                
+
+RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt
+
+# Uninstall and re-install torch and torchvision from the PyTorch website
+RUN pip uninstall -y torch \
+ && pip install --no-cache-dir -U $PT_INFERENCE_URL 
+
+# install Hugging Face libraries and its dependencies
+RUN pip install --no-cache-dir \ 
+	transformers[sentencepiece]==${TRANSFORMERS_VERSION} \ 
+	protobuf==3.12.0 \
+   sagemaker-huggingface-inference-toolkit==${HF_INFERENCE_TOOLKIT_VERSION}
+
+EXPOSE 8080 8081
+ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
+CMD ["serve"]