@@ -377,7 +377,7 @@ def __eq__(self, other: Any) -> bool:
377
377
2 : Schema (
378
378
NestedField (0 , "status" , IntegerType (), required = True ),
379
379
NestedField (1 , "snapshot_id" , LongType (), required = False ),
380
- NestedField (3 , "data_sequence_number " , LongType (), required = False ),
380
+ NestedField (3 , "sequence_number " , LongType (), required = False ),
381
381
NestedField (4 , "file_sequence_number" , LongType (), required = False ),
382
382
NestedField (2 , "data_file" , DATA_FILE_TYPE [2 ], required = True ),
383
383
),
@@ -394,10 +394,10 @@ def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file
394
394
395
395
396
396
class ManifestEntry (Record ):
397
- __slots__ = ("status" , "snapshot_id" , "data_sequence_number " , "file_sequence_number" , "data_file" )
397
+ __slots__ = ("status" , "snapshot_id" , "sequence_number " , "file_sequence_number" , "data_file" )
398
398
status : ManifestEntryStatus
399
399
snapshot_id : Optional [int ]
400
- data_sequence_number : Optional [int ]
400
+ sequence_number : Optional [int ]
401
401
file_sequence_number : Optional [int ]
402
402
data_file : DataFile
403
403
@@ -408,43 +408,39 @@ def _wrap(
408
408
self ,
409
409
new_status : ManifestEntryStatus ,
410
410
new_snapshot_id : Optional [int ],
411
- new_data_sequence_number : Optional [int ],
411
+ new_sequence_number : Optional [int ],
412
412
new_file_sequence_number : Optional [int ],
413
413
new_file : DataFile ,
414
414
) -> ManifestEntry :
415
415
self .status = new_status
416
416
self .snapshot_id = new_snapshot_id
417
- self .data_sequence_number = new_data_sequence_number
417
+ self .sequence_number = new_sequence_number
418
418
self .file_sequence_number = new_file_sequence_number
419
419
self .data_file = new_file
420
420
return self
421
421
422
422
def _wrap_append (
423
- self , new_snapshot_id : Optional [int ], new_data_sequence_number : Optional [int ], new_file : DataFile
423
+ self , new_snapshot_id : Optional [int ], new_sequence_number : Optional [int ], new_file : DataFile
424
424
) -> ManifestEntry :
425
- return self ._wrap (ManifestEntryStatus .ADDED , new_snapshot_id , new_data_sequence_number , None , new_file )
425
+ return self ._wrap (ManifestEntryStatus .ADDED , new_snapshot_id , new_sequence_number , None , new_file )
426
426
427
427
def _wrap_delete (
428
428
self ,
429
429
new_snapshot_id : Optional [int ],
430
- new_data_sequence_number : Optional [int ],
430
+ new_sequence_number : Optional [int ],
431
431
new_file_sequence_number : Optional [int ],
432
432
new_file : DataFile ,
433
433
) -> ManifestEntry :
434
- return self ._wrap (
435
- ManifestEntryStatus .DELETED , new_snapshot_id , new_data_sequence_number , new_file_sequence_number , new_file
436
- )
434
+ return self ._wrap (ManifestEntryStatus .DELETED , new_snapshot_id , new_sequence_number , new_file_sequence_number , new_file )
437
435
438
436
def _wrap_existing (
439
437
self ,
440
438
new_snapshot_id : Optional [int ],
441
- new_data_sequence_number : Optional [int ],
439
+ new_sequence_number : Optional [int ],
442
440
new_file_sequence_number : Optional [int ],
443
441
new_file : DataFile ,
444
442
) -> ManifestEntry :
445
- return self ._wrap (
446
- ManifestEntryStatus .EXISTING , new_snapshot_id , new_data_sequence_number , new_file_sequence_number , new_file
447
- )
443
+ return self ._wrap (ManifestEntryStatus .EXISTING , new_snapshot_id , new_sequence_number , new_file_sequence_number , new_file )
448
444
449
445
450
446
PARTITION_FIELD_SUMMARY_TYPE = StructType (
@@ -665,10 +661,10 @@ def _inherit_from_manifest(entry: ManifestEntry, manifest: ManifestFile) -> Mani
665
661
if entry .snapshot_id is None :
666
662
entry .snapshot_id = manifest .added_snapshot_id
667
663
668
- # in v1 tables, the data sequence number is not persisted and can be safely defaulted to 0
669
- # in v2 tables, the data sequence number should be inherited iff the entry status is ADDED
670
- if entry .data_sequence_number is None and (manifest .sequence_number == 0 or entry .status == ManifestEntryStatus .ADDED ):
671
- entry .data_sequence_number = manifest .sequence_number
664
+ # in v1 tables, the sequence number is not persisted and can be safely defaulted to 0
665
+ # in v2 tables, the sequence number should be inherited iff the entry status is ADDED
666
+ if entry .sequence_number is None and (manifest .sequence_number == 0 or entry .status == ManifestEntryStatus .ADDED ):
667
+ entry .sequence_number = manifest .sequence_number
672
668
673
669
# in v1 tables, the file sequence number is not persisted and can be safely defaulted to 0
674
670
# in v2 tables, the file sequence number should be inherited iff the entry status is ADDED
@@ -695,7 +691,7 @@ class ManifestWriter(ABC):
695
691
_existing_rows : int
696
692
_deleted_files : int
697
693
_deleted_rows : int
698
- _min_data_sequence_number : Optional [int ]
694
+ _min_sequence_number : Optional [int ]
699
695
_partitions : List [Record ]
700
696
_reused_entry_wrapper : ManifestEntry
701
697
@@ -712,7 +708,7 @@ def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile,
712
708
self ._existing_rows = 0
713
709
self ._deleted_files = 0
714
710
self ._deleted_rows = 0
715
- self ._min_data_sequence_number = None
711
+ self ._min_sequence_number = None
716
712
self ._partitions = []
717
713
self ._reused_entry_wrapper = ManifestEntry ()
718
714
@@ -774,7 +770,7 @@ def to_manifest_file(self) -> ManifestFile:
774
770
"""Return the manifest file."""
775
771
# once the manifest file is generated, no more entries can be added
776
772
self .closed = True
777
- min_sequence_number = self ._min_data_sequence_number or UNASSIGNED_SEQ
773
+ min_sequence_number = self ._min_sequence_number or UNASSIGNED_SEQ
778
774
return ManifestFile (
779
775
manifest_path = self ._output_file .location ,
780
776
manifest_length = len (self ._writer .output_file ),
@@ -812,35 +808,33 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter:
812
808
813
809
if (
814
810
(entry .status == ManifestEntryStatus .ADDED or entry .status == ManifestEntryStatus .EXISTING )
815
- and entry .data_sequence_number is not None
816
- and (self ._min_data_sequence_number is None or entry .data_sequence_number < self ._min_data_sequence_number )
811
+ and entry .sequence_number is not None
812
+ and (self ._min_sequence_number is None or entry .sequence_number < self ._min_sequence_number )
817
813
):
818
- self ._min_data_sequence_number = entry .data_sequence_number
814
+ self ._min_sequence_number = entry .sequence_number
819
815
820
816
self ._writer .write_block ([self .prepare_entry (entry )])
821
817
return self
822
818
823
819
def add (self , entry : ManifestEntry ) -> ManifestWriter :
824
- if entry .data_sequence_number is not None and entry .data_sequence_number >= 0 :
825
- self .add_entry (
826
- self ._reused_entry_wrapper ._wrap_append (self ._snapshot_id , entry .data_sequence_number , entry .data_file )
827
- )
820
+ if entry .sequence_number is not None and entry .sequence_number >= 0 :
821
+ self .add_entry (self ._reused_entry_wrapper ._wrap_append (self ._snapshot_id , entry .sequence_number , entry .data_file ))
828
822
else :
829
823
self .add_entry (self ._reused_entry_wrapper ._wrap_append (self ._snapshot_id , None , entry .data_file ))
830
824
return self
831
825
832
826
def delete (self , entry : ManifestEntry ) -> ManifestWriter :
833
827
self .add_entry (
834
828
self ._reused_entry_wrapper ._wrap_delete (
835
- self ._snapshot_id , entry .data_sequence_number , entry .file_sequence_number , entry .data_file
829
+ self ._snapshot_id , entry .sequence_number , entry .file_sequence_number , entry .data_file
836
830
)
837
831
)
838
832
return self
839
833
840
834
def existing (self , entry : ManifestEntry ) -> ManifestWriter :
841
835
self .add_entry (
842
836
self ._reused_entry_wrapper ._wrap_existing (
843
- entry .snapshot_id , entry .data_sequence_number , entry .file_sequence_number , entry .data_file
837
+ entry .snapshot_id , entry .sequence_number , entry .file_sequence_number , entry .data_file
844
838
)
845
839
)
846
840
return self
@@ -885,7 +879,7 @@ def _meta(self) -> Dict[str, str]:
885
879
}
886
880
887
881
def prepare_entry (self , entry : ManifestEntry ) -> ManifestEntry :
888
- if entry .data_sequence_number is None :
882
+ if entry .sequence_number is None :
889
883
if entry .snapshot_id is not None and entry .snapshot_id != self ._snapshot_id :
890
884
raise ValueError (f"Found unassigned sequence number for an entry from snapshot: { entry .snapshot_id } " )
891
885
if entry .status != ManifestEntryStatus .ADDED :
0 commit comments