@@ -65,8 +65,8 @@ class ParserWarning(Warning):
65
65
a list of integers that specify row locations for a multi-index on the
66
66
columns E.g. [0,1,3]. Intervening rows that are not specified will be
67
67
skipped (e.g. 2 in this example are skipped). Note that this parameter
68
- ignores commented lines, so header=0 denotes the first line of
69
- data rather than the first line of the file.
68
+ ignores commented lines and empty lines if ``skip_blank_lines=True``, so header=0
69
+ denotes the first line of data rather than the first line of the file.
70
70
skiprows : list-like or integer
71
71
Line numbers to skip (0-indexed) or number of lines to skip (int)
72
72
at the start of the file
@@ -110,10 +110,11 @@ class ParserWarning(Warning):
110
110
comment : str, default None
111
111
Indicates remainder of line should not be parsed. If found at the
112
112
beginning of a line, the line will be ignored altogether. This parameter
113
- must be a single character. Also, fully commented lines
114
- are ignored by the parameter `header` but not by `skiprows`. For example,
115
- if comment='#', parsing '#empty\n 1,2,3\n a,b,c' with `header=0` will
116
- result in '1,2,3' being treated as the header.
113
+ must be a single character. Like empty lines (as long as ``skip_blank_lines=True``),
114
+ fully commented lines are ignored by the parameter `header`
115
+ but not by `skiprows`. For example, if comment='#', parsing
116
+ '#empty\n 1,2,3\n a,b,c' with `header=0` will result in '1,2,3' being
117
+ treated as the header.
117
118
decimal : str, default '.'
118
119
Character to recognize as decimal point. E.g. use ',' for European data
119
120
nrows : int, default None
@@ -160,6 +161,8 @@ class ParserWarning(Warning):
160
161
infer_datetime_format : boolean, default False
161
162
If True and parse_dates is enabled for a column, attempt to infer
162
163
the datetime format to speed up the processing
164
+ skip_blank_lines : boolean, default True
165
+ If True, skip over blank lines rather than interpreting as NaN values
163
166
164
167
Returns
165
168
-------
@@ -288,6 +291,7 @@ def _read(filepath_or_buffer, kwds):
288
291
'mangle_dupe_cols' : True ,
289
292
'tupleize_cols' : False ,
290
293
'infer_datetime_format' : False ,
294
+ 'skip_blank_lines' : True
291
295
}
292
296
293
297
@@ -380,7 +384,8 @@ def parser_f(filepath_or_buffer,
380
384
squeeze = False ,
381
385
mangle_dupe_cols = True ,
382
386
tupleize_cols = False ,
383
- infer_datetime_format = False ):
387
+ infer_datetime_format = False ,
388
+ skip_blank_lines = True ):
384
389
385
390
# Alias sep -> delimiter.
386
391
if delimiter is None :
@@ -452,7 +457,8 @@ def parser_f(filepath_or_buffer,
452
457
buffer_lines = buffer_lines ,
453
458
mangle_dupe_cols = mangle_dupe_cols ,
454
459
tupleize_cols = tupleize_cols ,
455
- infer_datetime_format = infer_datetime_format )
460
+ infer_datetime_format = infer_datetime_format ,
461
+ skip_blank_lines = skip_blank_lines )
456
462
457
463
return _read (filepath_or_buffer , kwds )
458
464
@@ -1346,6 +1352,7 @@ def __init__(self, f, **kwds):
1346
1352
self .quoting = kwds ['quoting' ]
1347
1353
self .mangle_dupe_cols = kwds .get ('mangle_dupe_cols' , True )
1348
1354
self .usecols = kwds ['usecols' ]
1355
+ self .skip_blank_lines = kwds ['skip_blank_lines' ]
1349
1356
1350
1357
self .names_passed = kwds ['names' ] or None
1351
1358
@@ -1401,6 +1408,7 @@ def __init__(self, f, **kwds):
1401
1408
1402
1409
# needs to be cleaned/refactored
1403
1410
# multiple date column thing turning into a real spaghetti factory
1411
+
1404
1412
if not self ._has_complex_date_col :
1405
1413
(index_names ,
1406
1414
self .orig_names , self .columns ) = self ._get_index_name (self .columns )
@@ -1598,6 +1606,7 @@ def _infer_columns(self):
1598
1606
1599
1607
while self .line_pos <= hr :
1600
1608
line = self ._next_line ()
1609
+
1601
1610
unnamed_count = 0
1602
1611
this_columns = []
1603
1612
for i , c in enumerate (line ):
@@ -1735,25 +1744,35 @@ def _next_line(self):
1735
1744
line = self ._check_comments ([self .data [self .pos ]])[0 ]
1736
1745
self .pos += 1
1737
1746
# either uncommented or blank to begin with
1738
- if self ._empty (self .data [self .pos - 1 ]) or line :
1747
+ if not self .skip_blank_lines and (self ._empty (self .data [
1748
+ self .pos - 1 ]) or line ):
1739
1749
break
1750
+ elif self .skip_blank_lines :
1751
+ ret = self ._check_empty ([line ])
1752
+ if ret :
1753
+ line = ret [0 ]
1754
+ break
1740
1755
except IndexError :
1741
1756
raise StopIteration
1742
1757
else :
1743
1758
while self .pos in self .skiprows :
1744
- next (self .data )
1745
1759
self .pos += 1
1760
+ next (self .data )
1746
1761
1747
1762
while True :
1748
1763
orig_line = next (self .data )
1749
1764
line = self ._check_comments ([orig_line ])[0 ]
1750
1765
self .pos += 1
1751
- if self ._empty (orig_line ) or line :
1766
+ if not self .skip_blank_lines and ( self . _empty (orig_line ) or line ) :
1752
1767
break
1768
+ elif self .skip_blank_lines :
1769
+ ret = self ._check_empty ([line ])
1770
+ if ret :
1771
+ line = ret [0 ]
1772
+ break
1753
1773
1754
1774
self .line_pos += 1
1755
1775
self .buf .append (line )
1756
-
1757
1776
return line
1758
1777
1759
1778
def _check_comments (self , lines ):
@@ -1774,6 +1793,15 @@ def _check_comments(self, lines):
1774
1793
ret .append (rl )
1775
1794
return ret
1776
1795
1796
+ def _check_empty (self , lines ):
1797
+ ret = []
1798
+ for l in lines :
1799
+ # Remove empty lines and lines with only one whitespace value
1800
+ if len (l ) > 1 or len (l ) == 1 and (not isinstance (l [0 ],
1801
+ compat .string_types ) or l [0 ].strip ()):
1802
+ ret .append (l )
1803
+ return ret
1804
+
1777
1805
def _check_thousands (self , lines ):
1778
1806
if self .thousands is None :
1779
1807
return lines
@@ -1909,7 +1937,6 @@ def _get_lines(self, rows=None):
1909
1937
1910
1938
# already fetched some number
1911
1939
if rows is not None :
1912
-
1913
1940
# we already have the lines in the buffer
1914
1941
if len (self .buf ) >= rows :
1915
1942
new_rows , self .buf = self .buf [:rows ], self .buf [rows :]
@@ -1974,6 +2001,8 @@ def _get_lines(self, rows=None):
1974
2001
lines = lines [:- self .skip_footer ]
1975
2002
1976
2003
lines = self ._check_comments (lines )
2004
+ if self .skip_blank_lines :
2005
+ lines = self ._check_empty (lines )
1977
2006
return self ._check_thousands (lines )
1978
2007
1979
2008
0 commit comments