125
125
visit_with_partner ,
126
126
)
127
127
from pyiceberg .table import PropertyUtil , TableProperties , WriteTask
128
+ from pyiceberg .table .metadata import TableMetadata
128
129
from pyiceberg .table .name_mapping import NameMapping
129
130
from pyiceberg .transforms import TruncateTransform
130
131
from pyiceberg .typedef import EMPTY_DICT , Properties , Record
@@ -1720,7 +1721,7 @@ def fill_parquet_file_metadata(
1720
1721
data_file .split_offsets = split_offsets
1721
1722
1722
1723
1723
- def write_file (table : Table , tasks : Iterator [ WriteTask ], file_schema : Optional [ Schema ] = None ) -> Iterator [DataFile ]:
1724
+ def write_file (io : FileIO , table_metadata : TableMetadata , tasks : Iterator [ WriteTask ] ) -> Iterator [DataFile ]:
1724
1725
task = next (tasks )
1725
1726
1726
1727
try :
@@ -1730,15 +1731,15 @@ def write_file(table: Table, tasks: Iterator[WriteTask], file_schema: Optional[S
1730
1731
except StopIteration :
1731
1732
pass
1732
1733
1733
- parquet_writer_kwargs = _get_parquet_writer_kwargs (table .properties )
1734
+ parquet_writer_kwargs = _get_parquet_writer_kwargs (table_metadata .properties )
1734
1735
1735
- file_path = f'{ table .location () } /data/{ task .generate_data_file_filename ("parquet" )} '
1736
- file_schema = file_schema or table .schema ()
1737
- arrow_file_schema = schema_to_pyarrow (file_schema )
1736
+ file_path = f'{ table_metadata .location } /data/{ task .generate_data_file_filename ("parquet" )} '
1737
+ schema = table_metadata .schema ()
1738
+ arrow_file_schema = schema_to_pyarrow (schema )
1738
1739
1739
- fo = table . io .new_output (file_path )
1740
+ fo = io .new_output (file_path )
1740
1741
row_group_size = PropertyUtil .property_as_int (
1741
- properties = table .properties ,
1742
+ properties = table_metadata .properties ,
1742
1743
property_name = TableProperties .PARQUET_ROW_GROUP_SIZE_BYTES ,
1743
1744
default = TableProperties .PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT ,
1744
1745
)
@@ -1757,16 +1758,16 @@ def write_file(table: Table, tasks: Iterator[WriteTask], file_schema: Optional[S
1757
1758
# sort_order_id=task.sort_order_id,
1758
1759
sort_order_id = None ,
1759
1760
# Just copy these from the table for now
1760
- spec_id = table . spec (). spec_id ,
1761
+ spec_id = table_metadata . default_spec_id ,
1761
1762
equality_ids = None ,
1762
1763
key_metadata = None ,
1763
1764
)
1764
1765
1765
1766
fill_parquet_file_metadata (
1766
1767
data_file = data_file ,
1767
1768
parquet_metadata = writer .writer .metadata ,
1768
- stats_columns = compute_statistics_plan (file_schema , table .properties ),
1769
- parquet_column_mapping = parquet_path_to_id_mapping (file_schema ),
1769
+ stats_columns = compute_statistics_plan (schema , table_metadata .properties ),
1770
+ parquet_column_mapping = parquet_path_to_id_mapping (schema ),
1770
1771
)
1771
1772
return iter ([data_file ])
1772
1773
0 commit comments