11# Standard Library
2+ import io
23import os
34import re
5+ import tempfile
46
57# Third Party
68import boto3
9+ from boto3 .s3 .transfer import TransferConfig
710
811# First Party
912from smdebug .core .access_layer .base import TSAccessBase
@@ -28,6 +31,10 @@ def __init__(
2831 self .s3 = boto3 .resource ("s3" , region_name = get_region ())
2932 self .s3_client = boto3 .client ("s3" , region_name = get_region ())
3033
34+ # Set the desired multipart threshold value (5GB)
35+ MB = 1024 ** 2
36+ self .transfer_config = TransferConfig (multipart_threshold = 5 * MB )
37+
3138 # check if the bucket exists
3239 buckets = [bucket ["Name" ] for bucket in self .s3_client .list_buckets ()["Buckets" ]]
3340 if self .bucket_name not in buckets :
@@ -39,26 +46,38 @@ def _init_data(self):
3946 else :
4047 self .data = ""
4148
42- def _init_data (self ):
43- if self .binary :
44- self .data = bytearray ()
45- else :
46- self .data = ""
47-
4849 def open (self , bucket_name , mode ):
4950 raise NotImplementedError
5051
5152 def write (self , _data ):
5253 start = len (self .data )
54+
5355 self .data += _data
5456 length = len (_data )
5557 return [start , length ]
5658
5759 def close (self ):
5860 if self .flushed :
5961 return
60- key = self .s3 .Object (self .bucket_name , self .key_name )
61- key .put (Body = self .data )
62+ if self .binary :
63+ self .logger .debug (
64+ f"Sagemaker-Debugger: Writing binary data to s3://{ os .path .join (self .bucket_name , self .key_name )} "
65+ )
66+ self .s3_client .upload_fileobj (
67+ io .BytesIO (self .data ), self .bucket_name , self .key_name , Config = self .transfer_config
68+ )
69+ else :
70+ f = tempfile .NamedTemporaryFile (mode = "w+" )
71+ self .logger .debug (
72+ f"Sagemaker-Debugger: Writing string data to s3://{ os .path .join (self .bucket_name , self .key_name )} "
73+ )
74+
75+ f .write (self .data )
76+ f .flush ()
77+ self .s3_client .upload_file (
78+ f .name , self .bucket_name , self .key_name , Config = self .transfer_config
79+ )
80+
6281 self .logger .debug (
6382 f"Sagemaker-Debugger: Wrote { len (self .data )} bytes to file "
6483 f"s3://{ os .path .join (self .bucket_name , self .key_name )} "
0 commit comments