Skip to content

Commit 466baca

Browse files
authored
Adding observability notebook (#96)
1 parent 8855224 commit 466baca

File tree

2 files changed

+111
-5
lines changed

2 files changed

+111
-5
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "b5f69c26-af46-4380-9df0-bea0f6e4963c",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from sagemaker.hyperpod.utils import get_monitoring_config, is_observability_addon_enabled"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 2,
16+
"id": "6a86998a-59dc-46e5-9aa7-33d5386b16f9",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"monitor_config = get_monitoring_config()"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": 5,
26+
"id": "15fffc00-d8d0-44e2-98b4-005e654e3cf4",
27+
"metadata": {},
28+
"outputs": [
29+
{
30+
"data": {
31+
"text/plain": [
32+
"'https://g-b4a6fed76b.grafana-workspace.us-west-2.amazonaws.com/d/aws-sm-hp-observability-cluster-v1_0'"
33+
]
34+
},
35+
"execution_count": 5,
36+
"metadata": {},
37+
"output_type": "execute_result"
38+
}
39+
],
40+
"source": [
41+
"monitor_config.grafanaURL"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": 6,
47+
"id": "14c00535-7274-48ac-8c94-9fa10bbf2324",
48+
"metadata": {},
49+
"outputs": [
50+
{
51+
"data": {
52+
"text/plain": [
53+
"'https://aps-workspaces.us-west-2.amazonaws.com/workspaces/ws-e372a1c1-d909-45f8-9c0d-d0912095f4d6'"
54+
]
55+
},
56+
"execution_count": 6,
57+
"metadata": {},
58+
"output_type": "execute_result"
59+
}
60+
],
61+
"source": [
62+
"monitor_config.prometheusURL"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"id": "42d930cb-7655-4409-99c9-e0240d4346a0",
69+
"metadata": {},
70+
"outputs": [],
71+
"source": []
72+
}
73+
],
74+
"metadata": {
75+
"kernelspec": {
76+
"display_name": "Python 3 (ipykernel)",
77+
"language": "python",
78+
"name": "python3"
79+
},
80+
"language_info": {
81+
"codemirror_mode": {
82+
"name": "ipython",
83+
"version": 3
84+
},
85+
"file_extension": ".py",
86+
"mimetype": "text/x-python",
87+
"name": "python",
88+
"nbconvert_exporter": "python",
89+
"pygments_lexer": "ipython3",
90+
"version": "3.12.5"
91+
}
92+
},
93+
"nbformat": 4,
94+
"nbformat_minor": 5
95+
}

src/sagemaker/hyperpod/utils.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,21 @@ def get_monitoring_config() -> Optional[MonitoringConfig]:
4444
response = boto3.client("eks").describe_addon(clusterName=eks_cluster_name, addonName=AMAZON_HYPERPOD_OBSERVABILITY)
4545
config_values = yaml.safe_load(response['addon']['configurationValues'])
4646

47-
prometheus_url = config_values['ampWorkspace']['prometheusEndpoint']
48-
region = get_hyperpod_cluster_region()
49-
workspace_arn = config_values['amgWorkspace']['arn']
50-
grafana_url = build_grafana_url(get_grafana_ws_name_from_arn(workspace_arn), region, GRAFANA_DASHBOARD_UID)
51-
metrics_data = config_values['metricsProvider']
47+
try:
48+
prometheus_url = config_values['ampWorkspace']['prometheusEndpoint']
49+
except KeyError:
50+
prometheus_url = None
51+
try:
52+
region = get_hyperpod_cluster_region()
53+
workspace_arn = config_values['amgWorkspace']['arn']
54+
grafana_url = build_grafana_url(
55+
get_grafana_ws_name_from_arn(workspace_arn) if workspace_arn else "default-workspace", region,
56+
GRAFANA_DASHBOARD_UID)
57+
except KeyError:
58+
grafana_url = None
59+
try:
60+
metrics_data = config_values['metricsProvider']
61+
except KeyError:
62+
metrics_data = None
5263

5364
return MonitoringConfig(grafanaURL=grafana_url, prometheusURL=prometheus_url, availableMetrics=metrics_data)

0 commit comments

Comments
 (0)