@@ -247,40 +247,33 @@ def _read_parquet_chunked(
247
247
if pq_file is None :
248
248
continue
249
249
250
- schema = pq_file .schema .to_arrow_schema ()
250
+ metadata = pq_file .metadata
251
+ schema = metadata .schema .to_arrow_schema ()
251
252
if columns :
252
253
schema = pa .schema ([schema .field (column ) for column in columns ], schema .metadata )
253
254
254
255
use_threads_flag : bool = use_threads if isinstance (use_threads , bool ) else bool (use_threads > 1 )
255
- iterate_at_least_once = False
256
- for chunk in pq_file .iter_batches (
257
- batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
258
- ):
259
- iterate_at_least_once = True
260
- table = _add_table_partitions (
261
- table = pa .Table .from_batches ([chunk ], schema = schema ),
262
- path = path ,
263
- path_root = path_root ,
264
- )
265
- df = _table_to_df (table = table , kwargs = arrow_kwargs )
266
- if chunked is True :
267
- yield df
268
- else :
269
- if next_slice is not None :
270
- df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
271
- while len (df .index ) >= chunked :
272
- yield df .iloc [:chunked , :].copy ()
273
- df = df .iloc [chunked :, :]
274
- if df .empty :
275
- next_slice = None
256
+ table_kwargs = {"path" : path , "path_root" : path_root }
257
+ if metadata .num_rows > 0 :
258
+ for chunk in pq_file .iter_batches (
259
+ batch_size = batch_size , columns = columns , use_threads = use_threads_flag , use_pandas_metadata = False
260
+ ):
261
+ table = _add_table_partitions (table = pa .Table .from_batches ([chunk ], schema = schema ), ** table_kwargs )
262
+ df = _table_to_df (table = table , kwargs = arrow_kwargs )
263
+ if chunked is True :
264
+ yield df
276
265
else :
277
- next_slice = df
278
- if not iterate_at_least_once :
279
- table = _add_table_partitions (
280
- table = pa .Table .from_batches ([], schema = schema ),
281
- path = path ,
282
- path_root = path_root ,
283
- )
266
+ if next_slice is not None :
267
+ df = pd .concat (objs = [next_slice , df ], sort = False , copy = False )
268
+ while len (df .index ) >= chunked :
269
+ yield df .iloc [:chunked , :].copy ()
270
+ df = df .iloc [chunked :, :]
271
+ if df .empty :
272
+ next_slice = None
273
+ else :
274
+ next_slice = df
275
+ else :
276
+ table = _add_table_partitions (table = pa .Table .from_batches ([], schema = schema ), ** table_kwargs )
284
277
df = _table_to_df (table = table , kwargs = arrow_kwargs )
285
278
yield df
286
279
0 commit comments