From e6716515924c933e990184a8e39c9115f0fb0218 Mon Sep 17 00:00:00 2001 From: Vamshidhar Dantu Date: Fri, 16 Apr 2021 13:50:20 -0700 Subject: [PATCH 1/3] Add new base HuggingFace inference containers --- .../base/inference/artifacts/__init__.py | 0 .../inference/artifacts/config.properties | 5 + .../inference/artifacts/mms-entrypoint.py | 26 ++++ .../base/inference/docker/Dockerfile.cpu | 131 ++++++++++++++++++ huggingface/base/inference/docker/__init__.py | 0 .../inference/docker/cu110/Dockerfile.gpu | 131 ++++++++++++++++++ 6 files changed, 293 insertions(+) create mode 100644 huggingface/base/inference/artifacts/__init__.py create mode 100644 huggingface/base/inference/artifacts/config.properties create mode 100644 huggingface/base/inference/artifacts/mms-entrypoint.py create mode 100644 huggingface/base/inference/docker/Dockerfile.cpu create mode 100644 huggingface/base/inference/docker/__init__.py create mode 100644 huggingface/base/inference/docker/cu110/Dockerfile.gpu diff --git a/huggingface/base/inference/artifacts/__init__.py b/huggingface/base/inference/artifacts/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/huggingface/base/inference/artifacts/config.properties b/huggingface/base/inference/artifacts/config.properties new file mode 100644 index 000000000000..8f7d753388bf --- /dev/null +++ b/huggingface/base/inference/artifacts/config.properties @@ -0,0 +1,5 @@ +vmargs=-XX:+UseContainerSupport -XX:InitialRAMPercentage=8.0 -XX:MaxRAMPercentage=10.0 -XX:-UseLargePages -XX:+UseG1GC -XX:+ExitOnOutOfMemoryError +model_store=/opt/ml/model +load_models=ALL +inference_address=http://0.0.0.0:8080 +management_address=http://0.0.0.0:8081 diff --git a/huggingface/base/inference/artifacts/mms-entrypoint.py b/huggingface/base/inference/artifacts/mms-entrypoint.py new file mode 100644 index 000000000000..1c244c987729 --- /dev/null +++ b/huggingface/base/inference/artifacts/mms-entrypoint.py @@ -0,0 +1,26 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import shlex +import subprocess +import sys +import os.path + +if sys.argv[1] == 'serve': + from sagemaker_mxnet_serving_container import serving + serving.main() +else: + subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) + +# prevent docker exit +subprocess.call(['tail', '-f', '/dev/null']) diff --git a/huggingface/base/inference/docker/Dockerfile.cpu b/huggingface/base/inference/docker/Dockerfile.cpu new file mode 100644 index 000000000000..8163f8fadc13 --- /dev/null +++ b/huggingface/base/inference/docker/Dockerfile.cpu @@ -0,0 +1,131 @@ +FROM ubuntu:18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/cpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl +ARG PT_TORCHVISION_URL=https://torchvision-build.s3-us-west-2.amazonaws.com/1.7.1/cpu/torchvision-0.8.2-cp36-cp36m-linux_x86_64.whl + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + curl \ + emacs \ + git \ + libopencv-dev \ + openjdk-8-jdk-headless \ + openjdk-8-jdk \ + openjdk-8-jre \ + vim \ + wget \ + unzip \ + zlib1g-dev \ + libreadline-gplv2-dev \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + tk-dev \ + libffi-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + ipython==7.7.0 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + numpy==1.19.1 \ + parso==0.8.0 \ + scipy==1.3.0 \ + typing==3.6.4 \ + && /opt/conda/bin/conda clean -ya + +RUN /opt/conda/bin/conda install -c \ + conda-forge \ + opencv==4.0.1 \ + && /opt/conda/bin/conda install -y \ + scikit-learn==0.21.2 \ + pandas==0.25.0 \ + h5py==2.9.0 \ + requests==2.22.0 \ + && /opt/conda/bin/conda clean -ya \ + && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +WORKDIR / + +RUN pip install --no-cache-dir \ + "awscli<2" \ + boto3 \ + multi-model-server==$MMS_VERSION \ + sagemaker-mxnet-inference + +# This is here to make our installed version of OpenCV work. +# https://stackoverflow.com/questions/29274638/opencv-libdc1394-error-failed-to-initialize-libdc1394 +# TODO: Should we be installing OpenCV in our image like this? Is there another way we can fix this? +RUN ln -s /dev/null /dev/raw1394 + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/__init__.py b/huggingface/base/inference/docker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/huggingface/base/inference/docker/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/cu110/Dockerfile.gpu new file mode 100644 index 000000000000..941382870de4 --- /dev/null +++ b/huggingface/base/inference/docker/cu110/Dockerfile.gpu @@ -0,0 +1,131 @@ +FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/cpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl +ARG PT_TORCHVISION_URL=https://torchvision-build.s3-us-west-2.amazonaws.com/1.7.1/cpu/torchvision-0.8.2-cp36-cp36m-linux_x86_64.whl + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + curl \ + emacs \ + git \ + libopencv-dev \ + openjdk-8-jdk-headless \ + openjdk-8-jdk \ + openjdk-8-jre \ + vim \ + wget \ + unzip \ + zlib1g-dev \ + libreadline-gplv2-dev \ + libncursesw5-dev \ + libssl-dev \ + libsqlite3-dev \ + libgdbm-dev \ + libc6-dev \ + libbz2-dev \ + tk-dev \ + libffi-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + ipython==7.7.0 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + numpy==1.19.1 \ + parso==0.8.0 \ + scipy==1.3.0 \ + typing==3.6.4 \ + && /opt/conda/bin/conda clean -ya + +RUN /opt/conda/bin/conda install -c \ + conda-forge \ + opencv==4.0.1 \ + && /opt/conda/bin/conda install -y \ + scikit-learn==0.21.2 \ + pandas==0.25.0 \ + h5py==2.9.0 \ + requests==2.22.0 \ + && /opt/conda/bin/conda clean -ya \ + && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +WORKDIR / + +RUN pip install --no-cache-dir \ + "awscli<2" \ + boto3 \ + multi-model-server==$MMS_VERSION \ + sagemaker-mxnet-inference + +# This is here to make our installed version of OpenCV work. +# https://stackoverflow.com/questions/29274638/opencv-libdc1394-error-failed-to-initialize-libdc1394 +# TODO: Should we be installing OpenCV in our image like this? Is there another way we can fix this? +RUN ln -s /dev/null /dev/raw1394 + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] From 8be4671a65d2b4ad3757140e5b155fb54dda4d54 Mon Sep 17 00:00:00 2001 From: Vamshidhar Dantu Date: Tue, 20 Apr 2021 22:37:58 -0700 Subject: [PATCH 2/3] Pruning packages --- .../base/inference/artifacts/__init__.py | 0 .../inference/artifacts/mms-entrypoint.py | 5 +- .../base/inference/docker/Dockerfile.cpu | 69 +++++++------------ huggingface/base/inference/docker/__init__.py | 0 .../inference/docker/cu110/Dockerfile.gpu | 68 ++++++------------ 5 files changed, 48 insertions(+), 94 deletions(-) delete mode 100644 huggingface/base/inference/artifacts/__init__.py delete mode 100644 huggingface/base/inference/docker/__init__.py diff --git a/huggingface/base/inference/artifacts/__init__.py b/huggingface/base/inference/artifacts/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/huggingface/base/inference/artifacts/mms-entrypoint.py b/huggingface/base/inference/artifacts/mms-entrypoint.py index 1c244c987729..f9252aace31f 100644 --- a/huggingface/base/inference/artifacts/mms-entrypoint.py +++ b/huggingface/base/inference/artifacts/mms-entrypoint.py @@ -1,4 +1,4 @@ -# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of @@ -14,10 +14,9 @@ import shlex import subprocess import sys -import os.path if sys.argv[1] == 'serve': - from sagemaker_mxnet_serving_container import serving + from sagemaker_huggingface_serving_container import serving serving.main() else: subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) diff --git a/huggingface/base/inference/docker/Dockerfile.cpu b/huggingface/base/inference/docker/Dockerfile.cpu index 8163f8fadc13..b471cd3f8bf5 100644 --- a/huggingface/base/inference/docker/Dockerfile.cpu +++ b/huggingface/base/inference/docker/Dockerfile.cpu @@ -12,9 +12,8 @@ LABEL com.amazonaws.sagemaker.capabilities.multi-models=true ARG MMS_VERSION=1.1.2 ARG PYTHON=python3 -ARG PYTHON_VERSION=3.7.10 -ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/cpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl -ARG PT_TORCHVISION_URL=https://torchvision-build.s3-us-west-2.amazonaws.com/1.7.1/cpu/torchvision-0.8.2-cp36-cp36m-linux_x86_64.whl +ARG PYTHON_VERSION=3.6.13 +ARG OPEN_MPI_VERSION=4.0.1 ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ @@ -29,28 +28,14 @@ ENV PATH /opt/conda/bin:$PATH RUN apt-get update \ && apt-get install -y --no-install-recommends \ - build-essential \ ca-certificates \ - curl \ - emacs \ - git \ - libopencv-dev \ + build-essential \ + openssl \ openjdk-8-jdk-headless \ - openjdk-8-jdk \ - openjdk-8-jre \ vim \ wget \ + curl \ unzip \ - zlib1g-dev \ - libreadline-gplv2-dev \ - libncursesw5-dev \ - libssl-dev \ - libsqlite3-dev \ - libgdbm-dev \ - libc6-dev \ - libbz2-dev \ - tk-dev \ - libffi-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -65,42 +50,38 @@ RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-lat # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. ruamel_yaml==0.15.100 \ cython==0.29.12 \ - ipython==7.7.0 \ mkl-include==2019.4 \ mkl==2019.4 \ - numpy==1.19.1 \ - parso==0.8.0 \ - scipy==1.3.0 \ - typing==3.6.4 \ + botocore \ && /opt/conda/bin/conda clean -ya -RUN /opt/conda/bin/conda install -c \ - conda-forge \ - opencv==4.0.1 \ - && /opt/conda/bin/conda install -y \ - scikit-learn==0.21.2 \ - pandas==0.25.0 \ - h5py==2.9.0 \ - requests==2.22.0 \ - && /opt/conda/bin/conda clean -ya \ - && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ && pip install packaging==20.4 \ enum-compat==0.0.3 \ "cryptography>3.2" +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + WORKDIR / RUN pip install --no-cache-dir \ - "awscli<2" \ - boto3 \ multi-model-server==$MMS_VERSION \ - sagemaker-mxnet-inference - -# This is here to make our installed version of OpenCV work. -# https://stackoverflow.com/questions/29274638/opencv-libdc1394-error-failed-to-initialize-libdc1394 -# TODO: Should we be installing OpenCV in our image like this? Is there another way we can fix this? -RUN ln -s /dev/null /dev/raw1394 + sagemaker-inference RUN useradd -m model-server \ && mkdir -p /home/model-server/tmp \ @@ -124,8 +105,6 @@ RUN HOME_DIR=/root \ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ && rm -rf ${HOME_DIR}/oss_compliance* -RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt - EXPOSE 8080 8081 ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/__init__.py b/huggingface/base/inference/docker/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/huggingface/base/inference/docker/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/cu110/Dockerfile.gpu index 941382870de4..6a19f9fa94c9 100644 --- a/huggingface/base/inference/docker/cu110/Dockerfile.gpu +++ b/huggingface/base/inference/docker/cu110/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 LABEL maintainer="Amazon AI" LABEL dlc_major_version="1" @@ -12,9 +12,8 @@ LABEL com.amazonaws.sagemaker.capabilities.multi-models=true ARG MMS_VERSION=1.1.2 ARG PYTHON=python3 -ARG PYTHON_VERSION=3.7.10 -ARG PT_INFERENCE_URL=https://aws-pytorch-binaries.s3-us-west-2.amazonaws.com/r1.7.1_inference/20210112-183245/c1130f2829b03c0997b9813211a7c0f600fc569a/cpu/torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl -ARG PT_TORCHVISION_URL=https://torchvision-build.s3-us-west-2.amazonaws.com/1.7.1/cpu/torchvision-0.8.2-cp36-cp36m-linux_x86_64.whl +ARG PYTHON_VERSION=3.6.13 +ARG OPEN_MPI_VERSION=4.0.1 ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ @@ -29,28 +28,15 @@ ENV PATH /opt/conda/bin:$PATH RUN apt-get update \ && apt-get install -y --no-install-recommends \ - build-essential \ ca-certificates \ - curl \ - emacs \ - git \ - libopencv-dev \ + build-essential \ + openssl \ openjdk-8-jdk-headless \ - openjdk-8-jdk \ - openjdk-8-jre \ vim \ wget \ + curl \ unzip \ - zlib1g-dev \ - libreadline-gplv2-dev \ - libncursesw5-dev \ - libssl-dev \ - libsqlite3-dev \ - libgdbm-dev \ - libc6-dev \ - libbz2-dev \ - tk-dev \ - libffi-dev \ + libnuma1 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -65,42 +51,34 @@ RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-lat # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. ruamel_yaml==0.15.100 \ cython==0.29.12 \ - ipython==7.7.0 \ + botocore \ mkl-include==2019.4 \ mkl==2019.4 \ - numpy==1.19.1 \ - parso==0.8.0 \ - scipy==1.3.0 \ - typing==3.6.4 \ && /opt/conda/bin/conda clean -ya -RUN /opt/conda/bin/conda install -c \ - conda-forge \ - opencv==4.0.1 \ - && /opt/conda/bin/conda install -y \ - scikit-learn==0.21.2 \ - pandas==0.25.0 \ - h5py==2.9.0 \ - requests==2.22.0 \ - && /opt/conda/bin/conda clean -ya \ - && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ && pip install packaging==20.4 \ enum-compat==0.0.3 \ "cryptography>3.2" +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" + WORKDIR / RUN pip install --no-cache-dir \ - "awscli<2" \ - boto3 \ multi-model-server==$MMS_VERSION \ - sagemaker-mxnet-inference - -# This is here to make our installed version of OpenCV work. -# https://stackoverflow.com/questions/29274638/opencv-libdc1394-error-failed-to-initialize-libdc1394 -# TODO: Should we be installing OpenCV in our image like this? Is there another way we can fix this? -RUN ln -s /dev/null /dev/raw1394 + sagemaker-inference RUN useradd -m model-server \ && mkdir -p /home/model-server/tmp \ @@ -124,8 +102,6 @@ RUN HOME_DIR=/root \ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ && rm -rf ${HOME_DIR}/oss_compliance* -RUN curl https://aws-dlc-licenses.s3.amazonaws.com/pytorch-1.7/license.txt -o /license.txt - EXPOSE 8080 8081 ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] From d4ea399ece663d24df343e2eddd0e8a207fc4046 Mon Sep 17 00:00:00 2001 From: Vamshidhar Dantu Date: Tue, 20 Apr 2021 22:43:47 -0700 Subject: [PATCH 3/3] Added py36 and py37 docker files to readily use aws TF and PT wheels --- .../docker/{ => py3.6}/Dockerfile.cpu | 0 .../docker/{ => py3.6}/cu110/Dockerfile.gpu | 0 .../inference/docker/py3.7/Dockerfile.cpu | 110 ++++++++++++++++++ .../docker/py3.7/cu110/Dockerfile.gpu | 107 +++++++++++++++++ 4 files changed, 217 insertions(+) rename huggingface/base/inference/docker/{ => py3.6}/Dockerfile.cpu (100%) rename huggingface/base/inference/docker/{ => py3.6}/cu110/Dockerfile.gpu (100%) create mode 100644 huggingface/base/inference/docker/py3.7/Dockerfile.cpu create mode 100644 huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu diff --git a/huggingface/base/inference/docker/Dockerfile.cpu b/huggingface/base/inference/docker/py3.6/Dockerfile.cpu similarity index 100% rename from huggingface/base/inference/docker/Dockerfile.cpu rename to huggingface/base/inference/docker/py3.6/Dockerfile.cpu diff --git a/huggingface/base/inference/docker/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/py3.6/cu110/Dockerfile.gpu similarity index 100% rename from huggingface/base/inference/docker/cu110/Dockerfile.gpu rename to huggingface/base/inference/docker/py3.6/cu110/Dockerfile.gpu diff --git a/huggingface/base/inference/docker/py3.7/Dockerfile.cpu b/huggingface/base/inference/docker/py3.7/Dockerfile.cpu new file mode 100644 index 000000000000..8fb46c5dd7d4 --- /dev/null +++ b/huggingface/base/inference/docker/py3.7/Dockerfile.cpu @@ -0,0 +1,110 @@ +FROM ubuntu:18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + botocore \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu new file mode 100644 index 000000000000..1fc5b2ef3cad --- /dev/null +++ b/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu @@ -0,0 +1,107 @@ +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + libnuma1 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + botocore \ + mkl-include==2019.4 \ + mkl==2019.4 \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]