|
50 | 50 | from pyiceberg.io.pyarrow import _dataframe_to_data_files, schema_to_pyarrow
|
51 | 51 | from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC
|
52 | 52 | from pyiceberg.schema import Schema
|
| 53 | +from pyiceberg.table import TableProperties |
53 | 54 | from pyiceberg.table.snapshots import Operation
|
54 | 55 | from pyiceberg.table.sorting import (
|
55 | 56 | NullOrder,
|
@@ -1613,3 +1614,50 @@ def test_merge_manifests_local_file_system(catalog: SqlCatalog, arrow_table_with
|
1613 | 1614 | tbl.append(arrow_table_with_null)
|
1614 | 1615 |
|
1615 | 1616 | assert len(tbl.scan().to_arrow()) == 5 * len(arrow_table_with_null)
|
| 1617 | + |
| 1618 | + |
| 1619 | +@pytest.mark.parametrize( |
| 1620 | + "catalog", |
| 1621 | + [ |
| 1622 | + lazy_fixture("catalog_memory"), |
| 1623 | + lazy_fixture("catalog_sqlite"), |
| 1624 | + lazy_fixture("catalog_sqlite_without_rowcount"), |
| 1625 | + ], |
| 1626 | +) |
| 1627 | +def test_delete_metadata_multiple(catalog: SqlCatalog, table_schema_nested: Schema, random_table_identifier: str) -> None: |
| 1628 | + namespace = Catalog.namespace_from(random_table_identifier) |
| 1629 | + catalog.create_namespace(namespace) |
| 1630 | + table = catalog.create_table(random_table_identifier, table_schema_nested) |
| 1631 | + |
| 1632 | + original_metadata_location = table.metadata_location |
| 1633 | + |
| 1634 | + for i in range(5): |
| 1635 | + with table.transaction() as transaction: |
| 1636 | + with transaction.update_schema() as update: |
| 1637 | + update.add_column(path=f"new_column_{i}", field_type=IntegerType()) |
| 1638 | + |
| 1639 | + assert len(table.metadata.metadata_log) == 5 |
| 1640 | + assert os.path.exists(original_metadata_location[len("file://") :]) |
| 1641 | + |
| 1642 | + # Set the max versions property to 2, and delete after commit |
| 1643 | + new_property = { |
| 1644 | + TableProperties.METADATA_PREVIOUS_VERSIONS_MAX: "2", |
| 1645 | + TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED: "true", |
| 1646 | + } |
| 1647 | + |
| 1648 | + with table.transaction() as transaction: |
| 1649 | + transaction.set_properties(properties=new_property) |
| 1650 | + |
| 1651 | + # Verify that only the most recent metadata files are kept |
| 1652 | + assert len(table.metadata.metadata_log) == 2 |
| 1653 | + updated_metadata_1, updated_metadata_2 = table.metadata.metadata_log |
| 1654 | + |
| 1655 | + # new metadata log was added, so earlier metadata logs are removed. |
| 1656 | + with table.transaction() as transaction: |
| 1657 | + with transaction.update_schema() as update: |
| 1658 | + update.add_column(path="new_column_x", field_type=IntegerType()) |
| 1659 | + |
| 1660 | + assert len(table.metadata.metadata_log) == 2 |
| 1661 | + assert not os.path.exists(original_metadata_location[len("file://") :]) |
| 1662 | + assert not os.path.exists(updated_metadata_1.metadata_file[len("file://") :]) |
| 1663 | + assert os.path.exists(updated_metadata_2.metadata_file[len("file://") :]) |
0 commit comments