Skip to content

Commit a7da318

Browse files
committed
address review comments
1 parent 1ec5edd commit a7da318

File tree

1 file changed

+12
-20
lines changed

1 file changed

+12
-20
lines changed

pyiceberg/table/__init__.py

+12-20
Original file line numberDiff line numberDiff line change
@@ -3922,29 +3922,24 @@ def __init__(
39223922
self._merge_enabled = merge_enabled
39233923
self._snapshot_producer = snapshot_producer
39243924

3925-
def _group_by_spec(
3926-
self, first_manifest: ManifestFile, remaining_manifests: List[ManifestFile]
3927-
) -> Dict[int, List[ManifestFile]]:
3925+
def _group_by_spec(self, manifests: List[ManifestFile]) -> Dict[int, List[ManifestFile]]:
39283926
groups = defaultdict(list)
3929-
groups[first_manifest.partition_spec_id].append(first_manifest)
3930-
for manifest in remaining_manifests:
3927+
for manifest in manifests:
39313928
groups[manifest.partition_spec_id].append(manifest)
39323929
return groups
39333930

39343931
def _create_manifest(self, spec_id: int, manifest_bin: List[ManifestFile]) -> ManifestFile:
39353932
with self._snapshot_producer.new_manifest_writer(spec=self._snapshot_producer.spec(spec_id)) as writer:
39363933
for manifest in manifest_bin:
39373934
for entry in self._snapshot_producer.fetch_manifest_entry(manifest=manifest, discard_deleted=False):
3938-
if entry.status == ManifestEntryStatus.DELETED:
3939-
# suppress deletes from previous snapshots. only files deleted by this snapshot
3940-
# should be added to the new manifest
3941-
if entry.snapshot_id == self._snapshot_producer.snapshot_id:
3942-
writer.delete(entry)
3935+
if entry.status == ManifestEntryStatus.DELETED and entry.snapshot_id == self._snapshot_producer.snapshot_id:
3936+
# only files deleted by this snapshot should be added to the new manifest
3937+
writer.delete(entry)
39433938
elif entry.status == ManifestEntryStatus.ADDED and entry.snapshot_id == self._snapshot_producer.snapshot_id:
3944-
# adds from this snapshot are still adds, otherwise they should be existing
3939+
# added entries from this snapshot are still added, otherwise they should be existing
39453940
writer.add(entry)
3946-
else:
3947-
# add all files from the old manifest as existing files
3941+
elif entry.status != ManifestEntryStatus.DELETED:
3942+
# add all non-deleted files from the old manifest as existing files
39483943
writer.existing(entry)
39493944

39503945
return writer.to_manifest_file()
@@ -3958,11 +3953,9 @@ def merge_bin(manifest_bin: List[ManifestFile]) -> List[ManifestFile]:
39583953
if len(manifest_bin) == 1:
39593954
output_manifests.append(manifest_bin[0])
39603955
elif first_manifest in manifest_bin and len(manifest_bin) < self._min_count_to_merge:
3961-
# if the bin has the first manifest (the new data files or an appended manifest file)
3962-
# then only merge it
3963-
# if the number of manifests is above the minimum count. this is applied only to bins
3964-
# with an in-memory
3965-
# manifest so that large manifests don't prevent merging older groups.
3956+
# if the bin has the first manifest (the new data files or an appended manifest file) then only
3957+
# merge it if the number of manifests is above the minimum count. this is applied only to bins
3958+
# with an in-memory manifest so that large manifests don't prevent merging older groups.
39663959
output_manifests.extend(manifest_bin)
39673960
else:
39683961
output_manifests.append(self._create_manifest(spec_id, manifest_bin))
@@ -3987,8 +3980,7 @@ def merge_manifests(self, manifests: List[ManifestFile]) -> List[ManifestFile]:
39873980
return manifests
39883981

39893982
first_manifest = manifests[0]
3990-
remaining_manifests = manifests[1:]
3991-
groups = self._group_by_spec(first_manifest, remaining_manifests)
3983+
groups = self._group_by_spec(manifests)
39923984

39933985
merged_manifests = []
39943986
for spec_id in reversed(groups.keys()):

0 commit comments

Comments
 (0)