Skip to content

TypeError when operation field is missing in summary. #1106

@questsul

Description

@questsul

Apache Iceberg version

0.6.0

Please describe the bug 🐞

When attempting to read the metadata.json file, which contains a list of snapshots where some snapshot summaries lack the operation field, PyIceberg encounters the following error:

TypeError: Summary.init() missing 1 required positional argument: 'operation'.

Interestingly, when parsing the same metadata file using the Iceberg Java library, it works without any issues.

Full stack trace:

 File "reader.py", line 91, in _get_iceberg_table
    return StaticTable.from_metadata(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/table/__init__.py", line 1101, in from_metadata
    metadata = FromInputFile.table_metadata(file)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 113, in table_metadata
    return FromByteStream.table_metadata(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/serializers.py", line 94, in table_metadata
    return TableMetadataUtil.parse_raw(metadata)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pyiceberg/table/metadata.py", line 461, in parse_raw
    return TableMetadataWrapper.model_validate_json(data).root
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".venv/lib/python3.12/site-packages/pydantic/main.py", line 580, in model_validate_json
    return cls.__pydantic_validator__.validate_json(json_data, strict=strict, context=context)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Summary.__init__() missing 1 required positional argument: 'operation'

Metadata.json example:

{
  "format-version" : 2,
  "table-uuid" : "9996bcdf-3277-48f4-9e76-9e81766c9e0e",
  "location" : "file://t/some_table/",
  "last-sequence-number" : 45,
  "last-updated-ms" : 1724611070351,
  "last-column-id" : 79,
  "current-schema-id" : 0,
  "schemas" : [ {
    "type" : "struct",
    "schema-id" : 0,
    "fields" : [ {
      "id" : 1,
      "name" : "DATA",
      "required" : false,
      "type" : "string"
    }, {
      "id" : 2,
      "name" : "COLUMN_NAME",
      "required" : false,
      "type" : "string"
    }]
  } ],
  "default-spec-id" : 0,
  "partition-specs" : [ {
    "spec-id" : 0,
    "fields" : [ ]
  } ],
  "last-partition-id" : 999,
  "default-sort-order-id" : 0,
  "sort-orders" : [ {
    "order-id" : 0,
    "fields" : [ ]
  } ],
  "properties" : {
    "format-version" : "2"
  },
  "current-snapshot-id" : 1724611070351000000,
  "snapshots" : [ {
    "sequence-number" : 44,
    "snapshot-id" : 1724610129117000000,
    "timestamp-ms" : 1724610129117,
    "manifest-list" : "file://t/some_table/metadata/snap-1724610129117000000-d9b50309-0dff-472d-8711-86ca70021ffb.avro",
    "schema-id" : 0,
    "summary" : {
      "manifests-created" : "8",
      "total-records" : "26508666891",
      "added-files-size" : "3927895626752",
      "manifests-kept" : "0",
      "total-files-size" : "3927895626752",
      "added-records" : "26508666891",
      "added-data-files" : "231513",
      "manifests-replaced" : "0",
      "total-data-files" : "231513"
    }
  }, {
    "sequence-number" : 43,
    "snapshot-id" : 1724006578422000000,
    "timestamp-ms" : 1724006578422,
    "manifest-list" : "file://t/some_table/metadata/snap-1724006578422000000-289566b5-78fe-4b60-9ffa-ab25dee1edde.avro",
    "schema-id" : 0,
    "summary" : {
      "total-files-size" : "3888310341632",
      "added-records" : "26224534820",
      "added-data-files" : "225313",
      "manifests-replaced" : "0",
      "total-data-files" : "225313",
      "manifests-created" : "56",
      "total-records" : "26224534820",
      "added-files-size" : "3888310341632",
      "manifests-kept" : "0"
    }
  }, {
    "sequence-number" : 45,
    "snapshot-id" : 1724611070351000000,
    "timestamp-ms" : 1724611070351,
    "manifest-list" : "file://t/some_table/metadata/snap-1724611070351000000-6a307203-7148-467f-88eb-f932b32dd7f4.avro",
    "schema-id" : 0,
    "summary" : {
      "added-files-size" : "3929709293568",
      "total-records" : "26508666891",
      "manifests-created" : "8",
      "total-data-files" : "227581",
      "manifests-replaced" : "0",
      "added-data-files" : "227581",
      "added-records" : "26508666891",
      "total-files-size" : "3929709293568",
      "operation" : "append",
      "manifests-kept" : "0"
    }
  } ],
  "snapshot-log" : [ {
    "snapshot-id" : 1724006578422000000,
    "timestamp-ms" : 1724006578422
  }, {
    "snapshot-id" : 1724610129117000000,
    "timestamp-ms" : 1724610129117
  }, {
    "snapshot-id" : 1724611070351000000,
    "timestamp-ms" : 1724611070351
  } ]
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions