@@ -1291,31 +1291,11 @@ def to_panel(self):
1291
1291
1292
1292
def _helper_csv (self , writer , na_rep = None , cols = None ,
1293
1293
header = True , index = True ,
1294
- index_label = None , float_format = None ):
1294
+ index_label = None , float_format = None ,
1295
+ chunksize = None ):
1295
1296
if cols is None :
1296
1297
cols = self .columns
1297
1298
1298
- series = {}
1299
- for k , v in self ._series .iteritems ():
1300
- mask = isnull (v )
1301
- imask = - mask
1302
- if v .dtype == 'datetime64[ns]' or v .dtype == 'timedelta64[ns]' :
1303
- values = np .empty (len (v ),dtype = object )
1304
- values [mask ] = 'NaT'
1305
-
1306
- if v .dtype == 'datetime64[ns]' :
1307
- values [imask ] = np .array ([ val ._repr_base for val in v [imask ] ],dtype = object )
1308
- elif v .dtype == 'timedelta64[ns]' :
1309
- values [imask ] = np .array ([ lib .repr_timedelta64 (val ) for val in v [imask ] ],dtype = object )
1310
- else :
1311
- values = np .array (v .values ,dtype = object )
1312
- values [mask ] = na_rep
1313
- if issubclass (v .dtype .type ,np .floating ):
1314
- if float_format :
1315
- values [imask ] = np .array ([ float_format % val for val in v [imask ] ])
1316
-
1317
- series [k ] = values .tolist ()
1318
-
1319
1299
has_aliases = isinstance (header , (tuple , list , np .ndarray ))
1320
1300
if has_aliases or header :
1321
1301
if index :
@@ -1365,12 +1345,50 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
1365
1345
if not index :
1366
1346
nlevels = 0
1367
1347
1368
- lib .write_csv_rows (series , list (data_index ), nlevels , list (cols ), writer )
1348
+ rows = len (data_index )
1349
+
1350
+ # write in chunksize bites
1351
+ if chunksize is None :
1352
+ chunksize = 100000
1353
+ chunks = int (rows / chunksize )+ 1
1354
+
1355
+ for i in xrange (chunks ):
1356
+ start_i = i * chunksize
1357
+ end_i = min ((i + 1 ) * chunksize , rows )
1358
+ if start_i == end_i :
1359
+ continue
1360
+
1361
+ # create the data for a chunk
1362
+ chunk = self .iloc [start_i :end_i ]
1363
+
1364
+ series = {}
1365
+ for k , v in chunk .iteritems ():
1366
+ mask = isnull (v )
1367
+ imask = - mask
1368
+
1369
+ if v .dtype == 'datetime64[ns]' or v .dtype == 'timedelta64[ns]' :
1370
+ values = np .empty (len (v ),dtype = object )
1371
+ values [mask ] = 'NaT'
1372
+
1373
+ if v .dtype == 'datetime64[ns]' :
1374
+ values [imask ] = np .array ([ val ._repr_base for val in v [imask ] ],dtype = object )
1375
+ elif v .dtype == 'timedelta64[ns]' :
1376
+ values [imask ] = np .array ([ lib .repr_timedelta64 (val ) for val in v [imask ] ],dtype = object )
1377
+ else :
1378
+ values = np .array (v .values ,dtype = object )
1379
+ values [mask ] = na_rep
1380
+ if issubclass (v .dtype .type ,np .floating ):
1381
+ if float_format :
1382
+ values [imask ] = np .array ([ float_format % val for val in v [imask ] ])
1383
+
1384
+ series [k ] = values .tolist ()
1385
+
1386
+ lib .write_csv_rows (series , list (data_index [start_i :end_i ]), nlevels , list (cols ), writer )
1369
1387
1370
1388
def to_csv (self , path_or_buf , sep = "," , na_rep = '' , float_format = None ,
1371
1389
cols = None , header = True , index = True , index_label = None ,
1372
1390
mode = 'w' , nanRep = None , encoding = None , quoting = None ,
1373
- line_terminator = '\n ' ):
1391
+ line_terminator = '\n ' , chunksize = None ):
1374
1392
"""
1375
1393
Write DataFrame to a comma-separated values (csv) file
1376
1394
@@ -1407,6 +1425,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
1407
1425
file
1408
1426
quoting : optional constant from csv module
1409
1427
defaults to csv.QUOTE_MINIMAL
1428
+ chunksize : rows to write at a time
1410
1429
"""
1411
1430
if nanRep is not None : # pragma: no cover
1412
1431
import warnings
@@ -1435,7 +1454,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
1435
1454
self ._helper_csv (csvout , na_rep = na_rep ,
1436
1455
float_format = float_format , cols = cols ,
1437
1456
header = header , index = index ,
1438
- index_label = index_label )
1457
+ index_label = index_label ,
1458
+ chunksize = chunksize )
1439
1459
1440
1460
finally :
1441
1461
if close :
0 commit comments