Skip to content

Commit 89d677a

Browse files
authored
S3 upload fast (#122)
* using s3 transfer to upload files in s3
1 parent 2433316 commit 89d677a

File tree

1 file changed

+27
-8
lines changed
  • smdebug/core/access_layer

1 file changed

+27
-8
lines changed

smdebug/core/access_layer/s3.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
# Standard Library
2+
import io
23
import os
34
import re
5+
import tempfile
46

57
# Third Party
68
import boto3
9+
from boto3.s3.transfer import TransferConfig
710

811
# First Party
912
from smdebug.core.access_layer.base import TSAccessBase
@@ -28,6 +31,10 @@ def __init__(
2831
self.s3 = boto3.resource("s3", region_name=get_region())
2932
self.s3_client = boto3.client("s3", region_name=get_region())
3033

34+
# Set the desired multipart threshold value (5GB)
35+
MB = 1024 ** 2
36+
self.transfer_config = TransferConfig(multipart_threshold=5 * MB)
37+
3138
# check if the bucket exists
3239
buckets = [bucket["Name"] for bucket in self.s3_client.list_buckets()["Buckets"]]
3340
if self.bucket_name not in buckets:
@@ -39,26 +46,38 @@ def _init_data(self):
3946
else:
4047
self.data = ""
4148

42-
def _init_data(self):
43-
if self.binary:
44-
self.data = bytearray()
45-
else:
46-
self.data = ""
47-
4849
def open(self, bucket_name, mode):
4950
raise NotImplementedError
5051

5152
def write(self, _data):
5253
start = len(self.data)
54+
5355
self.data += _data
5456
length = len(_data)
5557
return [start, length]
5658

5759
def close(self):
5860
if self.flushed:
5961
return
60-
key = self.s3.Object(self.bucket_name, self.key_name)
61-
key.put(Body=self.data)
62+
if self.binary:
63+
self.logger.debug(
64+
f"Sagemaker-Debugger: Writing binary data to s3://{os.path.join(self.bucket_name, self.key_name)}"
65+
)
66+
self.s3_client.upload_fileobj(
67+
io.BytesIO(self.data), self.bucket_name, self.key_name, Config=self.transfer_config
68+
)
69+
else:
70+
f = tempfile.NamedTemporaryFile(mode="w+")
71+
self.logger.debug(
72+
f"Sagemaker-Debugger: Writing string data to s3://{os.path.join(self.bucket_name, self.key_name)}"
73+
)
74+
75+
f.write(self.data)
76+
f.flush()
77+
self.s3_client.upload_file(
78+
f.name, self.bucket_name, self.key_name, Config=self.transfer_config
79+
)
80+
6281
self.logger.debug(
6382
f"Sagemaker-Debugger: Wrote {len(self.data)} bytes to file "
6483
f"s3://{os.path.join(self.bucket_name, self.key_name)}"

0 commit comments

Comments
 (0)