-
Notifications
You must be signed in to change notification settings - Fork 378
Closed
Description
Apache Iceberg version
0.6.0
Please describe the bug 🐞
When attempting to read the metadata.json file, which contains a list of snapshots where some snapshot summaries lack the operation
field, PyIceberg encounters the following error:
TypeError: Summary.init() missing 1 required positional argument: 'operation'.
Interestingly, when parsing the same metadata file using the Iceberg Java library, it works without any issues.
Full stack trace:
File "reader.py", line 91, in _get_iceberg_table
return StaticTable.from_metadata(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/pyiceberg/table/__init__.py", line 1101, in from_metadata
metadata = FromInputFile.table_metadata(file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 113, in table_metadata
return FromByteStream.table_metadata(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 94, in table_metadata
return TableMetadataUtil.parse_raw(metadata)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/pyiceberg/table/metadata.py", line 461, in parse_raw
return TableMetadataWrapper.model_validate_json(data).root
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".venv/lib/python3.12/site-packages/pydantic/main.py", line 580, in model_validate_json
return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Summary.__init__() missing 1 required positional argument: 'operation'
Metadata.json example:
{
"format-version" : 2,
"table-uuid" : "9996bcdf-3277-48f4-9e76-9e81766c9e0e",
"location" : "file://t/some_table/",
"last-sequence-number" : 45,
"last-updated-ms" : 1724611070351,
"last-column-id" : 79,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "DATA",
"required" : false,
"type" : "string"
}, {
"id" : 2,
"name" : "COLUMN_NAME",
"required" : false,
"type" : "string"
}]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ ]
} ],
"last-partition-id" : 999,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"format-version" : "2"
},
"current-snapshot-id" : 1724611070351000000,
"snapshots" : [ {
"sequence-number" : 44,
"snapshot-id" : 1724610129117000000,
"timestamp-ms" : 1724610129117,
"manifest-list" : "file://t/some_table/metadata/snap-1724610129117000000-d9b50309-0dff-472d-8711-86ca70021ffb.avro",
"schema-id" : 0,
"summary" : {
"manifests-created" : "8",
"total-records" : "26508666891",
"added-files-size" : "3927895626752",
"manifests-kept" : "0",
"total-files-size" : "3927895626752",
"added-records" : "26508666891",
"added-data-files" : "231513",
"manifests-replaced" : "0",
"total-data-files" : "231513"
}
}, {
"sequence-number" : 43,
"snapshot-id" : 1724006578422000000,
"timestamp-ms" : 1724006578422,
"manifest-list" : "file://t/some_table/metadata/snap-1724006578422000000-289566b5-78fe-4b60-9ffa-ab25dee1edde.avro",
"schema-id" : 0,
"summary" : {
"total-files-size" : "3888310341632",
"added-records" : "26224534820",
"added-data-files" : "225313",
"manifests-replaced" : "0",
"total-data-files" : "225313",
"manifests-created" : "56",
"total-records" : "26224534820",
"added-files-size" : "3888310341632",
"manifests-kept" : "0"
}
}, {
"sequence-number" : 45,
"snapshot-id" : 1724611070351000000,
"timestamp-ms" : 1724611070351,
"manifest-list" : "file://t/some_table/metadata/snap-1724611070351000000-6a307203-7148-467f-88eb-f932b32dd7f4.avro",
"schema-id" : 0,
"summary" : {
"added-files-size" : "3929709293568",
"total-records" : "26508666891",
"manifests-created" : "8",
"total-data-files" : "227581",
"manifests-replaced" : "0",
"added-data-files" : "227581",
"added-records" : "26508666891",
"total-files-size" : "3929709293568",
"operation" : "append",
"manifests-kept" : "0"
}
} ],
"snapshot-log" : [ {
"snapshot-id" : 1724006578422000000,
"timestamp-ms" : 1724006578422
}, {
"snapshot-id" : 1724610129117000000,
"timestamp-ms" : 1724610129117
}, {
"snapshot-id" : 1724611070351000000,
"timestamp-ms" : 1724611070351
} ]
}
Metadata
Metadata
Assignees
Labels
No labels