Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions minio/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,14 @@ def _complete_multipart_upload(
tag = SubElement(element, "Part")
SubElement(tag, "PartNumber", str(part.part_number))
SubElement(tag, "ETag", '"' + part.etag + '"')
if part.checksum_crc32:
SubElement(tag, "ChecksumCRC32", part.checksum_crc32)
elif part.checksum_crc32c:
SubElement(tag, "ChecksumCRC32C", part.checksum_crc32c)
elif part.checksum_sha1:
SubElement(tag, "ChecksumSHA1", part.checksum_sha1)
elif part.checksum_sha256:
SubElement(tag, "ChecksumSHA256", part.checksum_sha256)
body = getbytes(element)
headers = HTTPHeaderDict(
{
Expand Down Expand Up @@ -2921,7 +2929,7 @@ def _upload_part(
region: Optional[str] = None,
extra_headers: Optional[HTTPHeaderDict] = None,
extra_query_params: Optional[HTTPQueryDict] = None,
) -> str:
) -> ObjectWriteResult:
"""Execute UploadPart S3 API."""
query_params = HTTPQueryDict({
"partNumber": str(part_number),
Expand All @@ -2937,7 +2945,7 @@ def _upload_part(
extra_headers=extra_headers,
extra_query_params=extra_query_params,
)
return cast(str, result.etag)
return result

def _upload_part_task(self, kwargs):
"""Upload_part task for ThreadPool."""
Expand Down Expand Up @@ -3221,7 +3229,10 @@ def put_object(
)

if not upload_id:
headers.extend(checksum_headers)
headers.extend(make_headers(
hashers, add_content_sha256, add_sha256_checksum,
algorithm_only=True,
))
upload_id = self._create_multipart_upload(
bucket_name=bucket_name,
object_name=object_name,
Expand Down Expand Up @@ -3251,22 +3262,36 @@ def put_object(
self._upload_part_task, kwargs,
)
else:
etag = self._upload_part(
result = self._upload_part(
bucket_name=bucket_name,
object_name=object_name,
data=part_data,
headers=headers,
upload_id=upload_id,
part_number=part_number,
)
parts.append(Part(part_number, etag))
parts.append(Part(
part_number=part_number,
etag=result.etag,
checksum_crc32=result.checksum_crc32,
checksum_crc32c=result.checksum_crc32c,
checksum_sha1=result.checksum_sha1,
checksum_sha256=result.checksum_sha256,
))

if pool:
result = pool.result()
result_queue = pool.result()
parts = [Part(0, "")] * part_count
while not result.empty():
part_number, etag = result.get()
parts[part_number - 1] = Part(part_number, etag)
while not result_queue.empty():
part_number, upload_result = result_queue.get()
parts[part_number - 1] = Part(
part_number=part_number,
etag=upload_result.etag,
checksum_crc32=upload_result.checksum_crc32,
checksum_crc32c=upload_result.checksum_crc32c,
checksum_sha1=upload_result.checksum_sha1,
checksum_sha256=upload_result.checksum_sha256,
)

upload_result = self._complete_multipart_upload(
bucket_name=bucket_name,
Expand Down
16 changes: 13 additions & 3 deletions minio/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,18 @@ def reset_hashers(hashers: Optional[Dict[Algorithm, "Hasher"]]):
def make_headers(
hashers: Optional[Dict[Algorithm, "Hasher"]],
add_content_sha256: bool,
add_sha256_checksum: bool
add_sha256_checksum: bool,
algorithm_only: bool = False
) -> Dict[str, str]:
"""Makes headers for hashers."""
"""Makes headers for hashers.

Args:
hashers: Dictionary of algorithm to hasher instances
add_content_sha256: Whether to add x-amz-content-sha256 header
add_sha256_checksum: Whether to add SHA256 checksum header
algorithm_only: If True, only include algorithm declaration header,
not checksum value headers
"""
headers = {}
if hashers:
for algo, hasher in hashers.items():
Expand All @@ -415,5 +424,6 @@ def make_headers(
if not add_sha256_checksum:
continue
headers["x-amz-sdk-checksum-algorithm"] = str(algo)
headers[algo.header()] = base64_string(sum_bytes)
if not algorithm_only:
headers[algo.header()] = base64_string(sum_bytes)
return headers
4 changes: 4 additions & 0 deletions minio/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ class Part:
etag: str
last_modified: Optional[datetime] = None
size: Optional[int] = None
checksum_crc32: Optional[str] = None
checksum_crc32c: Optional[str] = None
checksum_sha1: Optional[str] = None
checksum_sha256: Optional[str] = None

@classmethod
def fromxml(cls: Type[C], element: ET.Element) -> C:
Expand Down
110 changes: 110 additions & 0 deletions tests/functional/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from urllib3._collections import HTTPHeaderDict

from minio import Minio
from minio.checksum import Algorithm
from minio.commonconfig import ENABLED, REPLACE, CopySource, SnowballObject
from minio.datatypes import PostPolicy
from minio.deleteobjects import DeleteObject
Expand Down Expand Up @@ -908,6 +909,114 @@ def test_negative_put_object_with_path_segment( # pylint: disable=invalid-name
_client.remove_bucket(bucket_name=bucket_name)


def test_put_object_multipart_with_checksum( # pylint: disable=invalid-name
log_entry):
"""Test put_object() multipart upload with checksum validation.

This test validates the AWS S3 compliant checksum implementation for
multipart uploads:
- CreateMultipartUpload receives algorithm header only (not values)
- UploadPart includes checksum value headers
- CompleteMultipartUpload includes checksums in XML body
"""

# Get a unique bucket_name and object_name
bucket_name = _gen_bucket_name()
object_name = f"{uuid4()}-checksum"
object_name_sha256 = None # Initialize for cleanup
# Use 6 MB to trigger multipart upload (> 5 MB threshold)
length = 6 * MB

log_entry["args"] = {
"bucket_name": bucket_name,
"object_name": object_name,
"length": length,
"data": "LimitedRandomReader(6 * MB)",
"checksum": "Algorithm.CRC32C",
}

try:
_client.make_bucket(bucket_name=bucket_name)

# Upload with CRC32C checksum - triggers multipart upload
reader = LimitedRandomReader(length)
result = _client.put_object(
bucket_name=bucket_name,
object_name=object_name,
data=reader,
length=length,
checksum=Algorithm.CRC32C,
)

# Verify upload succeeded and returned valid result
if not result.etag:
raise ValueError("Upload did not return valid ETag")

# Verify ETag indicates multipart upload (contains dash and part count)
if '-' not in result.etag:
raise ValueError(
f"Expected multipart ETag (with dash), got: {result.etag}")

# Stat the object to verify it exists and has correct size
st_obj = _client.stat_object(
bucket_name=bucket_name,
object_name=object_name,
)

if st_obj.size != length:
raise ValueError(
f"Size mismatch: expected {length}, got {st_obj.size}")

# Test with SHA256 checksum algorithm
object_name_sha256 = f"{uuid4()}-checksum-sha256"
log_entry["args"]["object_name"] = object_name_sha256
log_entry["args"]["checksum"] = "Algorithm.SHA256"

reader = LimitedRandomReader(length)
result = _client.put_object(
bucket_name=bucket_name,
object_name=object_name_sha256,
data=reader,
length=length,
checksum=Algorithm.SHA256,
)

if not result.etag:
raise ValueError("Upload with SHA256 did not return valid ETag")

if '-' not in result.etag:
raise ValueError(
f"Expected multipart ETag for SHA256, got: {result.etag}")

st_obj = _client.stat_object(
bucket_name=bucket_name,
object_name=object_name_sha256,
)

if st_obj.size != length:
raise ValueError(
f"Size mismatch: expected {length}, got {st_obj.size}")

finally:
try:
_client.remove_object(
bucket_name=bucket_name, object_name=object_name)
except: # pylint: disable=bare-except
pass
if object_name_sha256:
try:
_client.remove_object(
bucket_name=bucket_name,
object_name=object_name_sha256,
)
except: # pylint: disable=bare-except
pass
try:
_client.remove_bucket(bucket_name=bucket_name)
except: # pylint: disable=bare-except
pass


def _test_stat_object(log_entry, sse=None, version_check=False):
"""Test stat_object()."""

Expand Down Expand Up @@ -2393,6 +2502,7 @@ def main():
test_copy_object_unmodified_since: None,
test_put_object: {"sse": ssec} if ssec else None,
test_negative_put_object_with_path_segment: None,
test_put_object_multipart_with_checksum: None,
test_stat_object: {"sse": ssec} if ssec else None,
test_stat_object_version: {"sse": ssec} if ssec else None,
test_get_object: {"sse": ssec} if ssec else None,
Expand Down