Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/sagemaker-container-support
84 changes: 61 additions & 23 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import boto3
import os
import logging
import platform
Expand All @@ -18,7 +19,7 @@
import sys
import tempfile


from sagemaker import Session
from test.utils import local_mode

logger = logging.getLogger(__name__)
Expand All @@ -36,6 +37,8 @@
def pytest_addoption(parser):
parser.addoption('--build-image', '-D', action="store_true")
parser.addoption('--build-base-image', '-B', action="store_true")
parser.addoption('--aws-id')
parser.addoption('--instance-type')
parser.addoption('--install-container-support', '-C', action="store_true")
parser.addoption('--docker-base-name', default='pytorch')
parser.addoption('--region', default='us-west-2')
Expand All @@ -46,40 +49,40 @@ def pytest_addoption(parser):
parser.addoption('--tag', default=None)


@pytest.fixture(scope='session')
def docker_base_name(request):
@pytest.fixture(scope='session', name='docker_base_name')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just asking: why is this new way preferred?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid pylint's warning about "Shadows name from outer scope".

http://blog.pytest.org/2016/whats-new-in-pytest-30/ Fixture name parameter:
"This solves the problem where the function argument shadows the argument name, which annoys pylint and might cause bugs if one forgets to pull a fixture into a test function."

def fixture_docker_base_name(request):
return request.config.getoption('--docker-base-name')


@pytest.fixture(scope='session')
def region(request):
@pytest.fixture(scope='session', name='region')
def fixture_region(request):
return request.config.getoption('--region')


@pytest.fixture(scope='session')
def framework_version(request):
@pytest.fixture(scope='session', name='framework_version')
def fixture_framework_version(request):
return request.config.getoption('--framework-version')


@pytest.fixture(scope='session')
def py_version(request):
@pytest.fixture(scope='session', name='py_version')
def fixture_py_version(request):
return 'py{}'.format(int(request.config.getoption('--py-version')))


@pytest.fixture(scope='session')
def processor(request):
@pytest.fixture(scope='session', name='processor')
def fixture_processor(request):
return request.config.getoption('--processor')


@pytest.fixture(scope='session')
def tag(request, framework_version, processor, py_version):
@pytest.fixture(scope='session', name='tag')
def fixture_tag(request, framework_version, processor, py_version):
provided_tag = request.config.getoption('--tag')
default_tag = '{}-{}-{}'.format(framework_version, processor, py_version)
return provided_tag if provided_tag else default_tag


@pytest.fixture(scope='session')
def docker_image(docker_base_name, tag):
@pytest.fixture(scope='session', name='docker_image')
def fixture_docker_image(docker_base_name, tag):
return '{}:{}'.format(docker_base_name, tag)


Expand All @@ -96,20 +99,20 @@ def opt_ml():
shutil.rmtree(tmp, True)


@pytest.fixture(scope='session')
def use_gpu(processor):
@pytest.fixture(scope='session', name='use_gpu')
def fixture_use_gpu(processor):
return processor == 'gpu'


@pytest.fixture(scope='session', autouse=True)
def install_container_support(request):
@pytest.fixture(scope='session', name='install_container_support', autouse=True)
def fixture_install_container_support(request):
install = request.config.getoption('--install-container-support')
if install:
local_mode.install_container_support()


@pytest.fixture(scope='session', autouse=True)
def build_base_image(request, framework_version, py_version, processor, tag, docker_base_name):
@pytest.fixture(scope='session', name='build_base_image', autouse=True)
def fixture_build_base_image(request, framework_version, py_version, processor, tag, docker_base_name):
build_base_image = request.config.getoption('--build-base-image')
if build_base_image:
return local_mode.build_base_image(framework_name=docker_base_name,
Expand All @@ -122,8 +125,8 @@ def build_base_image(request, framework_version, py_version, processor, tag, doc
return tag


@pytest.fixture(scope='session', autouse=True)
def build_image(request, framework_version, py_version, processor, tag, docker_base_name):
@pytest.fixture(scope='session', name='build_image', autouse=True)
def fixture_build_image(request, framework_version, py_version, processor, tag, docker_base_name):
build_image = request.config.getoption('--build-image')
if build_image:
return local_mode.build_image(framework_name=docker_base_name,
Expand All @@ -134,3 +137,38 @@ def build_image(request, framework_version, py_version, processor, tag, docker_b
cwd=os.path.join(dir_path, '..'))

return tag


@pytest.fixture(scope='session', name='sagemaker_session')
def fixture_sagemaker_session(region):
return Session(boto_session=boto3.Session(region_name=region))


@pytest.fixture(name='aws_id', scope='session')
def fixture_aws_id(request):
return request.config.getoption('--aws-id')


@pytest.fixture(name='instance_type', scope='session')
def fixture_instance_type(request):
return request.config.getoption('--instance-type')


@pytest.fixture(name='docker_registry', scope='session')
def fixture_docker_registry(aws_id, region):
return '{}.dkr.ecr.{}.amazonaws.com'.format(aws_id, region)


@pytest.fixture(name='ecr_image', scope='session')
def fixture_ecr_image(docker_registry, docker_base_name, tag):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will run sagemaker integ tests with repo sagemaker-pytorch, right? but i think integ tests push to preprod-pytorch. i'm not sure if this will run against the image pushed after local tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a good point!

return '{}/{}:{}'.format(docker_registry, docker_base_name, tag)


@pytest.fixture(scope='session', name='dist_cpu_backend', params=['tcp', 'gloo'])
def fixture_dist_cpu_backend(request):
return request.param


@pytest.fixture(scope='session', name='dist_gpu_backend', params=['gloo'])
def fixture_dist_gpu_backend(request):
return request.param
14 changes: 14 additions & 0 deletions test/integration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,17 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os

resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources'))
mnist_path = os.path.join(resources_path, 'mnist')
mnist_script = os.path.join(mnist_path, 'mnist.py')
data_dir = os.path.join(mnist_path, 'data')
training_dir = os.path.join(data_dir, 'training')
dist_operations_path = os.path.join(resources_path, 'distributed_operations.py')

mnist_1d_script = os.path.join(mnist_path, 'mnist_1d.py')
model_cpu_dir = os.path.join(mnist_path, 'model_cpu')
model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
model_gpu_dir = os.path.join(mnist_path, 'model_gpu')
model_gpu_1d_dir = os.path.join(model_gpu_dir, '1d')
33 changes: 5 additions & 28 deletions test/integration/local/test_distributed_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,14 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os
import pytest
import torch
from test.integration import data_dir, dist_operations_path, mnist_script
from test.utils import local_mode

resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'resources'))
mnist_path = os.path.join(resources_path, 'mnist')
mnist_script = os.path.join(mnist_path, 'mnist.py')
data_dir = os.path.join(mnist_path, 'data')
training_dir = os.path.join(data_dir, 'training')
dist_operations_path = os.path.join(resources_path, 'distributed_operations.py')

ENTRYPOINT = ["python", "-m", "pytorch_container.start"]


@pytest.fixture(scope='session', name='dist_cpu_backend', params=['tcp', 'gloo'])
def fixture_dist_cpu_backend(request):
return request.param


@pytest.fixture(scope='session', name='dist_gpu_backend', params=['gloo'])
def fixture_dist_gpu_backend(request):
return request.param


def test_dist_operations_path_cpu(docker_image, opt_ml, dist_cpu_backend):
local_mode.train(dist_operations_path, data_dir, docker_image, opt_ml,
entrypoint=ENTRYPOINT, cluster_size=3,
local_mode.train(dist_operations_path, data_dir, docker_image, opt_ml, cluster_size=3,
hyperparameters={'backend': dist_cpu_backend})

assert local_mode.file_exists(opt_ml, 'model/success'), 'Script success file was not created'
Expand All @@ -47,8 +27,7 @@ def test_dist_operations_path_cpu(docker_image, opt_ml, dist_cpu_backend):

@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
def test_dist_operations_path_gpu(docker_image, opt_ml, dist_gpu_backend):
local_mode.train(dist_operations_path, data_dir, docker_image, opt_ml,
entrypoint=ENTRYPOINT, cluster_size=3,
local_mode.train(dist_operations_path, data_dir, docker_image, opt_ml, cluster_size=3,
use_gpu=True, hyperparameters={'backend': dist_gpu_backend})

assert local_mode.file_exists(opt_ml, 'model/success'), 'Script success file was not created'
Expand All @@ -57,8 +36,7 @@ def test_dist_operations_path_gpu(docker_image, opt_ml, dist_gpu_backend):


def test_mnist_cpu(docker_image, opt_ml, dist_cpu_backend):
local_mode.train(mnist_script, data_dir, docker_image, opt_ml,
entrypoint=ENTRYPOINT, cluster_size=2,
local_mode.train(mnist_script, data_dir, docker_image, opt_ml, cluster_size=2,
hyperparameters={'backend': dist_cpu_backend})

assert local_mode.file_exists(opt_ml, 'model/model'), 'Model file was not created'
Expand All @@ -68,8 +46,7 @@ def test_mnist_cpu(docker_image, opt_ml, dist_cpu_backend):

@pytest.mark.skipif(not torch.cuda.is_available(), reason="cuda is not available")
def test_mnist_gpu(docker_image, opt_ml, dist_gpu_backend):
local_mode.train(mnist_script, data_dir, docker_image, opt_ml,
entrypoint=ENTRYPOINT, cluster_size=2,
local_mode.train(mnist_script, data_dir, docker_image, opt_ml, cluster_size=2,
use_gpu=True, hyperparameters={'backend': dist_gpu_backend})

assert local_mode.file_exists(opt_ml, 'model/model'), 'Model file was not created'
Expand Down
27 changes: 5 additions & 22 deletions test/integration/local/test_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,34 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os
import json
from six import StringIO, BytesIO
import pytest
import requests
from test.utils import local_mode
import torch
import logging
import torch.utils.data
import torch.utils.data.distributed
from torchvision import datasets, transforms
import numpy as np
from container_support.serving import JSON_CONTENT_TYPE, CSV_CONTENT_TYPE, NPY_CONTENT_TYPE

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

mnist_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'resources', 'mnist')
mnist_script = os.path.join(mnist_path, 'mnist.py')
mnist_1d_script = os.path.join(mnist_path, 'mnist_1d.py')
model_cpu_dir = os.path.join(mnist_path, 'model_cpu')
model_cpu_1d_dir = os.path.join(model_cpu_dir, '1d')
model_gpu_dir = os.path.join(mnist_path, 'model_gpu')
model_gpu_1d_dir = os.path.join(model_gpu_dir, '1d')

data_dir = os.path.join(mnist_path, 'data')
training_dir = os.path.join(data_dir, 'training')

ENTRYPOINT = ["python", "-m", "pytorch_container.start"]
from test.integration import training_dir, mnist_script, mnist_1d_script, model_cpu_dir, model_gpu_dir, \
model_cpu_1d_dir


@pytest.fixture(name='serve_cpu')
def fixture_serve_cpu(docker_image, opt_ml):
def serve(model_dir=model_cpu_dir, script=mnist_script):
return local_mode.serve(customer_script=script, model_dir=model_dir, image_name=docker_image,
opt_ml=opt_ml, entrypoint=ENTRYPOINT)
opt_ml=opt_ml)
return serve


@pytest.fixture(name='serve_gpu')
def fixture_serve_gpu(docker_image, opt_ml):
def serve(model_dir=model_cpu_dir, script=mnist_script):
def serve(model_dir=model_gpu_dir, script=mnist_script):
return local_mode.serve(customer_script=script, model_dir=model_dir, image_name=docker_image,
use_gpu=True, opt_ml=opt_ml, entrypoint=ENTRYPOINT)
use_gpu=True, opt_ml=opt_ml)
return serve


Expand Down Expand Up @@ -119,7 +103,6 @@ def _assert_prediction_csv(test_loader, accept):


def _get_test_data_loader(batch_size):
logger.info('training dir: {}'.format(os.listdir(training_dir)))
return torch.utils.data.DataLoader(
datasets.MNIST(training_dir, train=False, transform=transforms.Compose([
transforms.ToTensor(),
Expand Down
18 changes: 2 additions & 16 deletions test/integration/local/test_single_machine_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,12 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os
from test.utils import local_mode
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

mnist_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'resources', 'mnist'))
data_dir = os.path.join(mnist_path, 'data')

training_dir = os.path.join(data_dir, 'training')

mnist_script = 'mnist.py'

ENTRYPOINT = ["python", "-m", "pytorch_container.start"]
from test.integration import data_dir, mnist_script


def test_mnist_cpu(docker_image, opt_ml, use_gpu):
local_mode.train(mnist_script, data_dir, docker_image, opt_ml,
source_dir=mnist_path, use_gpu=use_gpu, entrypoint=ENTRYPOINT)
local_mode.train(mnist_script, data_dir, docker_image, opt_ml, use_gpu=use_gpu)

assert local_mode.file_exists(opt_ml, 'model/model'), 'Model file was not created'
assert local_mode.file_exists(opt_ml, 'output/success'), 'Success file was not created'
Expand Down
12 changes: 12 additions & 0 deletions test/integration/sagemaker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
19 changes: 19 additions & 0 deletions test/integration/sagemaker/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import pytest


@pytest.fixture(autouse=True)
def skip_by_device_type(request, tag):
if (request.node.get_marker('skip_gpu') and 'gpu' in tag) or (request.node.get_marker('skip_cpu') and 'cpu' in tag):
pytest.skip('Skipping because tag is: {}'.format(tag))
27 changes: 27 additions & 0 deletions test/integration/sagemaker/estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from sagemaker.pytorch.estimator import PyTorch


class PytorchTestEstimator(PyTorch):
def __init__(self, docker_image_uri, **kwargs):
super(PytorchTestEstimator, self).__init__(**kwargs)
self.docker_image_uri = docker_image_uri

def train_image(self):
return self.docker_image_uri

def create_model(self):
model = super(PytorchTestEstimator, self).create_model()
model.image = self.docker_image_uri
return model
Loading