Skip to content

Commit cdc3e54

Browse files
authored
Disallow writing empty Manifest files (apache#876)
* Disallow writing empty Avro files/blocks Raising an exception when doing this might look extreme, but there is no real good reason to allow this. * Relax the constaints a bit
1 parent b68e109 commit cdc3e54

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

pyiceberg/manifest.py

+6
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,10 @@ def __exit__(
685685
traceback: Optional[TracebackType],
686686
) -> None:
687687
"""Close the writer."""
688+
if (self._added_files + self._existing_files + self._deleted_files) == 0:
689+
# This is just a guard to ensure that we don't write empty manifest files
690+
raise ValueError("An empty manifest file has been written")
691+
688692
self.closed = True
689693
self._writer.__exit__(exc_type, exc_value, traceback)
690694

@@ -757,6 +761,8 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter:
757761
elif entry.status == ManifestEntryStatus.DELETED:
758762
self._deleted_files += 1
759763
self._deleted_rows += entry.data_file.record_count
764+
else:
765+
raise ValueError(f"Unknown entry: {entry.status}")
760766

761767
self._partitions.append(entry.data_file.partition)
762768

tests/utils/test_manifest.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
write_manifest,
3636
write_manifest_list,
3737
)
38-
from pyiceberg.partitioning import PartitionField, PartitionSpec
38+
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
3939
from pyiceberg.schema import Schema
4040
from pyiceberg.table.snapshots import Operation, Snapshot, Summary
4141
from pyiceberg.transforms import IdentityTransform
@@ -306,6 +306,23 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:
306306
assert entry.status == ManifestEntryStatus.ADDED
307307

308308

309+
def test_write_empty_manifest() -> None:
310+
io = load_file_io()
311+
test_schema = Schema(NestedField(1, "foo", IntegerType(), False))
312+
with TemporaryDirectory() as tmpdir:
313+
tmp_avro_file = tmpdir + "/test_write_manifest.avro"
314+
315+
with pytest.raises(ValueError, match="An empty manifest file has been written"):
316+
with write_manifest(
317+
format_version=1,
318+
spec=UNPARTITIONED_PARTITION_SPEC,
319+
schema=test_schema,
320+
output_file=io.new_output(tmp_avro_file),
321+
snapshot_id=8744736658442914487,
322+
) as _:
323+
pass
324+
325+
309326
@pytest.mark.parametrize("format_version", [1, 2])
310327
def test_write_manifest(
311328
generated_manifest_file_file_v1: str, generated_manifest_file_file_v2: str, format_version: TableVersion

0 commit comments

Comments
 (0)