diff --git a/minio/api.py b/minio/api.py index 4c583140..fbc9d557 100644 --- a/minio/api.py +++ b/minio/api.py @@ -2832,6 +2832,14 @@ def _complete_multipart_upload( tag = SubElement(element, "Part") SubElement(tag, "PartNumber", str(part.part_number)) SubElement(tag, "ETag", '"' + part.etag + '"') + if part.checksum_crc32: + SubElement(tag, "ChecksumCRC32", part.checksum_crc32) + elif part.checksum_crc32c: + SubElement(tag, "ChecksumCRC32C", part.checksum_crc32c) + elif part.checksum_sha1: + SubElement(tag, "ChecksumSHA1", part.checksum_sha1) + elif part.checksum_sha256: + SubElement(tag, "ChecksumSHA256", part.checksum_sha256) body = getbytes(element) headers = HTTPHeaderDict( { @@ -2921,7 +2929,7 @@ def _upload_part( region: Optional[str] = None, extra_headers: Optional[HTTPHeaderDict] = None, extra_query_params: Optional[HTTPQueryDict] = None, - ) -> str: + ) -> ObjectWriteResult: """Execute UploadPart S3 API.""" query_params = HTTPQueryDict({ "partNumber": str(part_number), @@ -2937,7 +2945,7 @@ def _upload_part( extra_headers=extra_headers, extra_query_params=extra_query_params, ) - return cast(str, result.etag) + return result def _upload_part_task(self, kwargs): """Upload_part task for ThreadPool.""" @@ -3221,7 +3229,10 @@ def put_object( ) if not upload_id: - headers.extend(checksum_headers) + headers.extend(make_headers( + hashers, add_content_sha256, add_sha256_checksum, + algorithm_only=True, + )) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, @@ -3251,7 +3262,7 @@ def put_object( self._upload_part_task, kwargs, ) else: - etag = self._upload_part( + result = self._upload_part( bucket_name=bucket_name, object_name=object_name, data=part_data, @@ -3259,20 +3270,37 @@ def put_object( upload_id=upload_id, part_number=part_number, ) - parts.append(Part(part_number, etag)) + parts.append(Part( + part_number=part_number, + etag=result.etag, + checksum_crc32=result.checksum_crc32, + checksum_crc32c=result.checksum_crc32c, + checksum_sha1=result.checksum_sha1, + checksum_sha256=result.checksum_sha256, + )) if pool: - result = pool.result() + result_queue = pool.result() parts = [Part(0, "")] * part_count - while not result.empty(): - part_number, etag = result.get() - parts[part_number - 1] = Part(part_number, etag) + while not result_queue.empty(): + part_number, upload_result = result_queue.get() + parts[part_number - 1] = Part( + part_number=part_number, + etag=upload_result.etag, + checksum_crc32=upload_result.checksum_crc32, + checksum_crc32c=upload_result.checksum_crc32c, + checksum_sha1=upload_result.checksum_sha1, + checksum_sha256=upload_result.checksum_sha256, + ) upload_result = self._complete_multipart_upload( bucket_name=bucket_name, object_name=object_name, upload_id=cast(str, upload_id), parts=parts, + extra_headers=HTTPHeaderDict( + sse.headers() if isinstance(sse, SseCustomerKey) else None + ), ) return ObjectWriteResult.new( headers=upload_result.headers, diff --git a/minio/checksum.py b/minio/checksum.py index 94af7e1e..e86b33fb 100644 --- a/minio/checksum.py +++ b/minio/checksum.py @@ -402,9 +402,18 @@ def reset_hashers(hashers: Optional[Dict[Algorithm, "Hasher"]]): def make_headers( hashers: Optional[Dict[Algorithm, "Hasher"]], add_content_sha256: bool, - add_sha256_checksum: bool + add_sha256_checksum: bool, + algorithm_only: bool = False ) -> Dict[str, str]: - """Makes headers for hashers.""" + """Makes headers for hashers. + + Args: + hashers: Dictionary of algorithm to hasher instances + add_content_sha256: Whether to add x-amz-content-sha256 header + add_sha256_checksum: Whether to add SHA256 checksum header + algorithm_only: If True, only include algorithm declaration header, + not checksum value headers + """ headers = {} if hashers: for algo, hasher in hashers.items(): @@ -415,5 +424,6 @@ def make_headers( if not add_sha256_checksum: continue headers["x-amz-sdk-checksum-algorithm"] = str(algo) - headers[algo.header()] = base64_string(sum_bytes) + if not algorithm_only: + headers[algo.header()] = base64_string(sum_bytes) return headers diff --git a/minio/datatypes.py b/minio/datatypes.py index 2e645d37..6684df0f 100644 --- a/minio/datatypes.py +++ b/minio/datatypes.py @@ -284,6 +284,10 @@ class Part: etag: str last_modified: Optional[datetime] = None size: Optional[int] = None + checksum_crc32: Optional[str] = None + checksum_crc32c: Optional[str] = None + checksum_sha1: Optional[str] = None + checksum_sha256: Optional[str] = None @classmethod def fromxml(cls: Type[C], element: ET.Element) -> C: diff --git a/tests/functional/tests.py b/tests/functional/tests.py index 8d7f667c..8e5456df 100644 --- a/tests/functional/tests.py +++ b/tests/functional/tests.py @@ -42,6 +42,7 @@ from urllib3._collections import HTTPHeaderDict from minio import Minio +from minio.checksum import Algorithm from minio.commonconfig import ENABLED, REPLACE, CopySource, SnowballObject from minio.datatypes import PostPolicy from minio.deleteobjects import DeleteObject @@ -908,6 +909,114 @@ def test_negative_put_object_with_path_segment( # pylint: disable=invalid-name _client.remove_bucket(bucket_name=bucket_name) +def test_put_object_multipart_with_checksum( # pylint: disable=invalid-name + log_entry): + """Test put_object() multipart upload with checksum validation. + + This test validates the AWS S3 compliant checksum implementation for + multipart uploads: + - CreateMultipartUpload receives algorithm header only (not values) + - UploadPart includes checksum value headers + - CompleteMultipartUpload includes checksums in XML body + """ + + # Get a unique bucket_name and object_name + bucket_name = _gen_bucket_name() + object_name = f"{uuid4()}-checksum" + object_name_sha256 = None # Initialize for cleanup + # Use 6 MB to trigger multipart upload (> 5 MB threshold) + length = 6 * MB + + log_entry["args"] = { + "bucket_name": bucket_name, + "object_name": object_name, + "length": length, + "data": "LimitedRandomReader(6 * MB)", + "checksum": "Algorithm.CRC32C", + } + + try: + _client.make_bucket(bucket_name=bucket_name) + + # Upload with CRC32C checksum - triggers multipart upload + reader = LimitedRandomReader(length) + result = _client.put_object( + bucket_name=bucket_name, + object_name=object_name, + data=reader, + length=length, + checksum=Algorithm.CRC32C, + ) + + # Verify upload succeeded and returned valid result + if not result.etag: + raise ValueError("Upload did not return valid ETag") + + # Verify ETag indicates multipart upload (contains dash and part count) + if '-' not in result.etag: + raise ValueError( + f"Expected multipart ETag (with dash), got: {result.etag}") + + # Stat the object to verify it exists and has correct size + st_obj = _client.stat_object( + bucket_name=bucket_name, + object_name=object_name, + ) + + if st_obj.size != length: + raise ValueError( + f"Size mismatch: expected {length}, got {st_obj.size}") + + # Test with SHA256 checksum algorithm + object_name_sha256 = f"{uuid4()}-checksum-sha256" + log_entry["args"]["object_name"] = object_name_sha256 + log_entry["args"]["checksum"] = "Algorithm.SHA256" + + reader = LimitedRandomReader(length) + result = _client.put_object( + bucket_name=bucket_name, + object_name=object_name_sha256, + data=reader, + length=length, + checksum=Algorithm.SHA256, + ) + + if not result.etag: + raise ValueError("Upload with SHA256 did not return valid ETag") + + if '-' not in result.etag: + raise ValueError( + f"Expected multipart ETag for SHA256, got: {result.etag}") + + st_obj = _client.stat_object( + bucket_name=bucket_name, + object_name=object_name_sha256, + ) + + if st_obj.size != length: + raise ValueError( + f"Size mismatch: expected {length}, got {st_obj.size}") + + finally: + try: + _client.remove_object( + bucket_name=bucket_name, object_name=object_name) + except: # pylint: disable=bare-except + pass + if object_name_sha256: + try: + _client.remove_object( + bucket_name=bucket_name, + object_name=object_name_sha256, + ) + except: # pylint: disable=bare-except + pass + try: + _client.remove_bucket(bucket_name=bucket_name) + except: # pylint: disable=bare-except + pass + + def _test_stat_object(log_entry, sse=None, version_check=False): """Test stat_object().""" @@ -2393,6 +2502,7 @@ def main(): test_copy_object_unmodified_since: None, test_put_object: {"sse": ssec} if ssec else None, test_negative_put_object_with_path_segment: None, + test_put_object_multipart_with_checksum: None, test_stat_object: {"sse": ssec} if ssec else None, test_stat_object_version: {"sse": ssec} if ssec else None, test_get_object: {"sse": ssec} if ssec else None,