Skip to content

Commit 816932a

Browse files
authored
Fix paths in profiler report (#131)
* changed path in profiler report
1 parent fb0e9af commit 816932a

File tree

2 files changed

+59
-82
lines changed

2 files changed

+59
-82
lines changed

smdebug/profiler/analysis/rules/docker/profiler_report.ipynb

Lines changed: 53 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@
136136
"source": [
137137
"def load_report(rule_name):\n",
138138
" try:\n",
139-
" report = json.load(open('/opt/ml/processing/outputs/profiler-reports/'+rule_name+'.json'))\n",
139+
" report = json.load(open('/opt/ml/processing/output/rule/profiler-reports/'+rule_name+'.json'))\n",
140140
" return report\n",
141141
" except FileNotFoundError:\n",
142142
" print (rule_name + ' not triggered')"
@@ -207,68 +207,58 @@
207207
},
208208
"outputs": [],
209209
"source": [
210-
"text = \"\"\"The following table gives a summary about the training job. The table includes information about when the training job started and ended, how much time initialization, training loop and finalization took.\"\"\"\n",
211-
"if len(job_statistics) > 0:\n",
212-
" df = pd.DataFrame.from_dict(job_statistics, orient='index')\n",
213-
" start_time = us_since_epoch_to_human_readable_time(report['Details']['job_start'] * 1000000)\n",
214-
" date = datetime.datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%S:%f')\n",
215-
" day = date.date().strftime(\"%m/%d/%Y\")\n",
216-
" hour = date.time().strftime(\"%H:%M:%S\")\n",
217-
" duration = job_duration_in_seconds\n",
218-
" text = f\"\"\"{text} \\n Your training job started on {day} at {hour} and ran for {duration} seconds.\"\"\"\n",
219-
" \n",
220-
" #pretty_print(df)\n",
221-
" if \"first\" in report['Details'][\"step_num\"] and \"last\" in report['Details'][\"step_num\"]:\n",
222-
" if finalization_perc < 0:\n",
223-
" job_statistics[\"Finalization%\"] = 0\n",
224-
" if training_loop_perc < 0:\n",
225-
" job_statistics[\"Training loop\"] = 0\n",
226-
" if initialization_perc < 0:\n",
227-
" job_statistics[\"Initialization\"] = 0\n",
210+
"if report:\n",
211+
" text = \"\"\"The following table gives a summary about the training job. The table includes information about when the training job started and ended, how much time initialization, training loop and finalization took.\"\"\"\n",
212+
" if len(job_statistics) > 0:\n",
213+
" df = pd.DataFrame.from_dict(job_statistics, orient='index')\n",
214+
" start_time = us_since_epoch_to_human_readable_time(report['Details']['job_start'] * 1000000)\n",
215+
" date = datetime.datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%S:%f')\n",
216+
" day = date.date().strftime(\"%m/%d/%Y\")\n",
217+
" hour = date.time().strftime(\"%H:%M:%S\")\n",
218+
" duration = job_duration_in_seconds\n",
219+
" text = f\"\"\"{text} \\n Your training job started on {day} at {hour} and ran for {duration} seconds.\"\"\"\n",
220+
"\n",
221+
" #pretty_print(df)\n",
222+
" if \"first\" in report['Details'][\"step_num\"] and \"last\" in report['Details'][\"step_num\"]:\n",
223+
" if finalization_perc < 0:\n",
224+
" job_statistics[\"Finalization%\"] = 0\n",
225+
" if training_loop_perc < 0:\n",
226+
" job_statistics[\"Training loop\"] = 0\n",
227+
" if initialization_perc < 0:\n",
228+
" job_statistics[\"Initialization\"] = 0\n",
229+
" else:\n",
230+
" text = f\"\"\"{text} \\n Your training job started on {day} at {hour} and ran for {duration} seconds.\"\"\"\n",
231+
" \n",
232+
" if len(job_statistics) > 0:\n",
233+
" df2 = df.reset_index()\n",
234+
" df2.columns = [\"0\", \"1\"]\n",
235+
" source = ColumnDataSource(data=df2)\n",
236+
" columns = [TableColumn(field='0', title=\"\"),\n",
237+
" TableColumn(field='1', title=\"Job Statistics\"),]\n",
238+
" table = DataTable(source=source, columns=columns, width=450, height=380)\n",
239+
"\n",
240+
" plot = None\n",
241+
"\n",
242+
" if \"Initialization\" in job_statistics:\n",
243+
" piechart_data = {}\n",
244+
" piechart_data[\"Initialization\"] = initialization_perc \n",
245+
" piechart_data[\"Training loop\"] = training_loop_perc\n",
246+
" piechart_data[\"Finalization\"] = finalization_perc \n",
247+
"\n",
248+
" plot = create_piechart(piechart_data, \n",
249+
" height=350,\n",
250+
" width=500,\n",
251+
" x1=0.15,\n",
252+
" x2=0.15,\n",
253+
" radius=0.15, \n",
254+
" toolbar_location=None)\n",
255+
"\n",
256+
" if plot != None:\n",
257+
" paragraph = Paragraph(text=f\"\"\"{text}\"\"\", width = 800)\n",
258+
" show(column(paragraph, row(table, plot)))\n",
228259
" else:\n",
229-
" text = f\"\"\"{text} \\n Your training job started on {day} at {hour} and ran for {duration} seconds.\"\"\"\n"
230-
]
231-
},
232-
{
233-
"cell_type": "code",
234-
"execution_count": null,
235-
"metadata": {
236-
"tags": [
237-
"hide-input"
238-
]
239-
},
240-
"outputs": [],
241-
"source": [
242-
"if len(job_statistics) > 0:\n",
243-
" df2 = df.reset_index()\n",
244-
" df2.columns = [\"0\", \"1\"]\n",
245-
" source = ColumnDataSource(data=df2)\n",
246-
" columns = [TableColumn(field='0', title=\"\"),\n",
247-
" TableColumn(field='1', title=\"Job Statistics\"),]\n",
248-
" table = DataTable(source=source, columns=columns, width=450, height=380)\n",
249-
"\n",
250-
"plot = None\n",
251-
" \n",
252-
"if \"Initialization\" in job_statistics:\n",
253-
" piechart_data = {}\n",
254-
" piechart_data[\"Initialization\"] = initialization_perc \n",
255-
" piechart_data[\"Training loop\"] = training_loop_perc\n",
256-
" piechart_data[\"Finalization\"] = finalization_perc \n",
257-
"\n",
258-
" plot = create_piechart(piechart_data, \n",
259-
" height=350,\n",
260-
" width=500,\n",
261-
" x1=0.15,\n",
262-
" x2=0.15,\n",
263-
" radius=0.15, \n",
264-
" toolbar_location=None)\n",
265-
"\n",
266-
"if plot != None:\n",
267-
" paragraph = Paragraph(text=f\"\"\"{text}\"\"\", width = 800)\n",
268-
" show(column(paragraph, row(table, plot)))\n",
269-
"else:\n",
270-
" paragraph = Paragraph(text=f\"\"\"{text}. No step information was profiled from your training job. The time spent on initialization and finalization cannot be computed.\"\"\" , width = 800)\n",
271-
" show(column(paragraph, row(table)))"
260+
" paragraph = Paragraph(text=f\"\"\"{text}. No step information was profiled from your training job. The time spent on initialization and finalization cannot be computed.\"\"\" , width = 800)\n",
261+
" show(column(paragraph, row(table)))"
272262
]
273263
},
274264
{
@@ -372,22 +362,6 @@
372362
" show(column( text1, text2, row(table)))"
373363
]
374364
},
375-
{
376-
"cell_type": "code",
377-
"execution_count": null,
378-
"metadata": {
379-
"tags": [
380-
"hide-input"
381-
]
382-
},
383-
"outputs": [],
384-
"source": [
385-
"def display_image(image_name):\n",
386-
" files = glob.glob('/opt/ml/processing/outputs/profiler-reports/' + image_name)\n",
387-
" for filename in files:\n",
388-
" display(Image(filename=filename))"
389-
]
390-
},
391365
{
392366
"cell_type": "code",
393367
"execution_count": null,
@@ -696,7 +670,7 @@
696670
},
697671
"outputs": [],
698672
"source": [
699-
"files = glob.glob('/opt/ml/processing/outputs/profiler-reports/*json')\n",
673+
"files = glob.glob('/opt/ml/processing/output/rule/profiler-reports/*json')\n",
700674
"summary = {}\n",
701675
"for i in files:\n",
702676
" rule_name = i.split('/')[-1].replace('.json','')\n",

smdebug/profiler/analysis/rules/docker/rule_evaluation.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
from smdebug.rules.rule_invoker import invoke_rule
2323
from smdebug.trials import create_trial
2424

25-
os.mkdir("/opt/ml/processing/outputs/profiler-reports/")
25+
os.mkdir("/opt/ml/processing/output/")
26+
os.mkdir("/opt/ml/processing/output/rule/")
2627
os.mkdir("/opt/ml/processing/outputs/.sagemaker-ignore")
2728
os.mkdir("/opt/ml/code/")
2829
os.system("cp profiler_report.ipynb /opt/ml/code/")
@@ -42,7 +43,7 @@ def run_rule(rule_obj):
4243

4344
# path to profiler data
4445
profiler_path = os.environ["S3_PATH"]
45-
trial = create_trial(profiler_path, profiler=True)
46+
trial = create_trial(profiler_path, profiler=True, output_dir="/opt/ml/processing/output/rule")
4647

4748
if "TRIGGER_ALL" in os.environ:
4849
# create list of rules
@@ -93,6 +94,8 @@ def run_rule(rule_obj):
9394
trial,
9495
create_html=True,
9596
nb_full_path="profiler_report.ipynb",
96-
output_full_path="/opt/ml/processing/outputs/profiler-report.ipynb",
97+
output_full_path="/opt/ml/processing/output/rule/profiler-report.ipynb",
9798
)
9899
rule._plot_visualization(last_found_step=0)
100+
101+
os.system("cp -r /opt/ml/processing/output/rule/profiler-reports/ /opt/ml/processing/outputs/")

0 commit comments

Comments
 (0)