Skip to content

Commit da403d2

Browse files
authored
Add support for Transaction.update_statistics() (#1831)
# Rationale for this change Addd a new API `Transaction.update_statistics()` to use the existing transaction instead of a brand new one. # Are these changes tested? I extended `tests/integration/test_statistics_operations.py::test_manage_statistics` to test the new API. # Are there any user-facing changes? Users can now update statistics files within an ongoing transaction.
1 parent 5c4e59f commit da403d2

File tree

2 files changed

+16
-0
lines changed

2 files changed

+16
-0
lines changed

pyiceberg/table/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,15 @@ def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> U
438438
"""
439439
return UpdateSnapshot(self, io=self._table.io, snapshot_properties=snapshot_properties)
440440

441+
def update_statistics(self) -> UpdateStatistics:
442+
"""
443+
Create a new UpdateStatistics to update the statistics of the table.
444+
445+
Returns:
446+
A new UpdateStatistics
447+
"""
448+
return UpdateStatistics(transaction=self)
449+
441450
def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
442451
"""
443452
Shorthand API for appending a PyArrow table to a table transaction.

tests/integration/test_statistics_operations.py

+7
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,10 @@ def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
8282
update.remove_statistics(add_snapshot_id_1)
8383

8484
assert len(tbl.metadata.statistics) == 1
85+
86+
with tbl.transaction() as txn:
87+
with txn.update_statistics() as update:
88+
update.set_statistics(statistics_file_snap_1)
89+
update.set_statistics(statistics_file_snap_2)
90+
91+
assert len(tbl.metadata.statistics) == 2

0 commit comments

Comments
 (0)