-
Notifications
You must be signed in to change notification settings - Fork 390
Closed
Milestone
Description
Apache Iceberg version
0.7.0
Please describe the bug 🐞
Hi I added this test which breaks:
def test_delete_overwrite_with_null(session_catalog: RestCatalog) -> None:
arrow_schema = pa.schema([pa.field("ints", pa.int32())])
arrow_tbl = pa.Table.from_pylist(
[
{"ints": 1},
{"ints": 2},
{"ints": None}
],
schema=arrow_schema,
)
iceberg_schema = Schema(NestedField(1, "ints", IntegerType()))
tbl_identifier = "default.test_delete_overwrite_with_null"
try:
session_catalog.drop_table(tbl_identifier)
except NoSuchTableError:
pass
tbl = session_catalog.create_table(tbl_identifier, iceberg_schema)
tbl.append(arrow_tbl)
assert [snapshot.summary.operation for snapshot in tbl.snapshots()] == [Operation.APPEND]
arrow_tbl_overwrite = pa.Table.from_pylist(
[
{"ints": 3},
{"ints": 4},
],
schema=arrow_schema,
)
tbl.overwrite(arrow_tbl_overwrite, "ints == 2") # Should rewrite one file
assert [snapshot.summary.operation for snapshot in tbl.snapshots()] == [
Operation.APPEND,
Operation.OVERWRITE,
Operation.APPEND,
]
assert tbl.scan().to_arrow()["ints"].to_pylist() == [3, 4, 1, None]
FAILED tests/integration/test_deletes.py::test_delete_overwrite_with_null - assert [3, 4, 1] == [3, 4, 1, None]
The bug is from this line:
https://github.com/apache/iceberg-python/blob/main/pyiceberg/table/__init__.py#L596
which does not build a predicate including the null because the revert operator of pyarrow.compute underneath only handles null correctly when the raw operator mentions the field is with pc.is_null or pc.is_valid
Working on a fix now.
Fokko
Metadata
Metadata
Assignees
Labels
No labels