|
| 1 | +import time |
| 2 | +import uuid |
| 3 | +import pytest |
| 4 | +import boto3 |
| 5 | +from click.testing import CliRunner |
| 6 | +from sagemaker.hyperpod.cli.commands.inference import ( |
| 7 | + custom_create, |
| 8 | + custom_invoke, |
| 9 | + custom_list, |
| 10 | + custom_describe, |
| 11 | + custom_delete, |
| 12 | + custom_get_operator_logs, |
| 13 | + custom_list_pods |
| 14 | +) |
| 15 | +from sagemaker.hyperpod.inference.hp_endpoint import HPEndpoint |
| 16 | + |
| 17 | +# --------- Test Configuration --------- |
| 18 | +NAMESPACE = "integration" |
| 19 | +VERSION = "1.0" |
| 20 | +REGION = "us-east-2" |
| 21 | +TIMEOUT_MINUTES = 15 |
| 22 | +POLL_INTERVAL_SECONDS = 30 |
| 23 | + |
| 24 | +@pytest.fixture(scope="module") |
| 25 | +def runner(): |
| 26 | + return CliRunner() |
| 27 | + |
| 28 | +@pytest.fixture(scope="module") |
| 29 | +def custom_endpoint_name(): |
| 30 | + return f"custom-cli-integration" |
| 31 | + |
| 32 | +@pytest.fixture(scope="module") |
| 33 | +def sagemaker_client(): |
| 34 | + return boto3.client("sagemaker", region_name=REGION) |
| 35 | + |
| 36 | +# --------- Custom Endpoint Tests --------- |
| 37 | + |
| 38 | +def test_custom_create(runner, custom_endpoint_name): |
| 39 | + result = runner.invoke(custom_create, [ |
| 40 | + "--namespace", NAMESPACE, |
| 41 | + "--version", VERSION, |
| 42 | + "--instance-type", "ml.g5.8xlarge", |
| 43 | + "--model-name", "test-model-integration", |
| 44 | + "--model-source-type", "s3", |
| 45 | + "--model-location", "deepseek15b", |
| 46 | + "--s3-bucket-name", "test-model-s3-zhaoqi", |
| 47 | + "--s3-region", REGION, |
| 48 | + "--image-uri", "763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.4.0-tgi2.3.1-gpu-py311-cu124-ubuntu22.04-v2.0", |
| 49 | + "--container-port", "8080", |
| 50 | + "--model-volume-mount-name", "model-weights", |
| 51 | + "--endpoint-name", custom_endpoint_name, |
| 52 | + "--resources-requests", '{"cpu": "30000m", "nvidia.com/gpu": 1, "memory": "100Gi"}', |
| 53 | + "--resources-limits", '{"nvidia.com/gpu": 1}', |
| 54 | + "--tls-certificate-output-s3-uri", "s3://tls-bucket-inf1-beta2", |
| 55 | + "--metrics-enabled", "true", |
| 56 | + "--metric-collection-period", "30", |
| 57 | + "--metric-name", "Invocations", |
| 58 | + "--metric-stat", "Sum", |
| 59 | + "--metric-type", "Average", |
| 60 | + "--min-value", "0.0", |
| 61 | + "--cloud-watch-trigger-name", "SageMaker-Invocations-new", |
| 62 | + "--cloud-watch-trigger-namespace", "AWS/SageMaker", |
| 63 | + "--target-value", "10", |
| 64 | + "--use-cached-metrics", "true", |
| 65 | + "--dimensions", '{"EndpointName": "' + custom_endpoint_name + '", "VariantName": "AllTraffic"}', |
| 66 | + "--env", '{ "HF_MODEL_ID": "/opt/ml/model", "SAGEMAKER_PROGRAM": "inference.py", "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code", "MODEL_CACHE_ROOT": "/opt/ml/model", "SAGEMAKER_ENV": "1" }', |
| 67 | + |
| 68 | + ]) |
| 69 | + assert result.exit_code == 0, result.output |
| 70 | + |
| 71 | + |
| 72 | +def test_custom_list(runner, custom_endpoint_name): |
| 73 | + result = runner.invoke(custom_list, ["--namespace", NAMESPACE]) |
| 74 | + assert result.exit_code == 0 |
| 75 | + assert custom_endpoint_name in result.output |
| 76 | + |
| 77 | + |
| 78 | +def test_custom_describe(runner, custom_endpoint_name): |
| 79 | + result = runner.invoke(custom_describe, [ |
| 80 | + "--name", custom_endpoint_name, |
| 81 | + "--namespace", NAMESPACE, |
| 82 | + "--full" |
| 83 | + ]) |
| 84 | + assert result.exit_code == 0 |
| 85 | + assert custom_endpoint_name in result.output |
| 86 | + |
| 87 | + |
| 88 | +def test_wait_until_inservice(custom_endpoint_name): |
| 89 | + """Poll SDK until specific JumpStart endpoint reaches DeploymentComplete""" |
| 90 | + print(f"[INFO] Waiting for JumpStart endpoint '{custom_endpoint_name}' to be DeploymentComplete...") |
| 91 | + deadline = time.time() + (TIMEOUT_MINUTES * 60) |
| 92 | + poll_count = 0 |
| 93 | + |
| 94 | + while time.time() < deadline: |
| 95 | + poll_count += 1 |
| 96 | + print(f"[DEBUG] Poll #{poll_count}: Checking endpoint status...") |
| 97 | + |
| 98 | + try: |
| 99 | + ep = HPEndpoint.get(name=custom_endpoint_name, namespace=NAMESPACE) |
| 100 | + state = ep.status.endpoints.sagemaker.state |
| 101 | + print(f"[DEBUG] Current state: {state}") |
| 102 | + if state == "CreationCompleted": |
| 103 | + print("[INFO] Endpoint is in CreationCompleted state.") |
| 104 | + return |
| 105 | + |
| 106 | + deployment_state = ep.status.deploymentStatus.deploymentObjectOverallState |
| 107 | + if deployment_state == "DeploymentFailed": |
| 108 | + pytest.fail("Endpoint deployment failed.") |
| 109 | + |
| 110 | + except Exception as e: |
| 111 | + print(f"[ERROR] Exception during polling: {e}") |
| 112 | + |
| 113 | + time.sleep(POLL_INTERVAL_SECONDS) |
| 114 | + |
| 115 | + pytest.fail("[ERROR] Timed out waiting for endpoint to be DeploymentComplete") |
| 116 | + |
| 117 | + |
| 118 | +def test_custom_invoke(runner, custom_endpoint_name): |
| 119 | + result = runner.invoke(custom_invoke, [ |
| 120 | + "--endpoint-name", custom_endpoint_name, |
| 121 | + "--body", '{"inputs": "What is the capital of USA?"}' |
| 122 | + ]) |
| 123 | + assert result.exit_code == 0 |
| 124 | + assert "error" not in result.output.lower() |
| 125 | + |
| 126 | + |
| 127 | +def test_custom_get_operator_logs(runner): |
| 128 | + result = runner.invoke(custom_get_operator_logs, ["--since-hours", "1"]) |
| 129 | + assert result.exit_code == 0 |
| 130 | + |
| 131 | + |
| 132 | +def test_custom_list_pods(runner): |
| 133 | + result = runner.invoke(custom_list_pods, ["--namespace", NAMESPACE]) |
| 134 | + assert result.exit_code == 0 |
| 135 | + |
| 136 | + |
| 137 | +def test_custom_delete(runner, custom_endpoint_name): |
| 138 | + result = runner.invoke(custom_delete, [ |
| 139 | + "--name", custom_endpoint_name, |
| 140 | + "--namespace", NAMESPACE |
| 141 | + ]) |
| 142 | + assert result.exit_code == 0 |
0 commit comments