Skip to content

CLN: replace %s syntax with .format in pandas.io.parsers #24721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 19, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 37 additions & 28 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
@@ -1485,9 +1485,9 @@ def extract(r):
for n in range(len(columns[0])):
if all(compat.to_str(c[n]) in self.unnamed_cols for c in columns):
raise ParserError(
"Passed header=[%s] are too many rows for this "
"Passed header=[{header}] are too many rows for this "
"multi_index of columns"
% ','.join(str(x) for x in self.header)
.format(header=','.join(str(x) for x in self.header))
)

# Clean the column names (if we have an index_col).
@@ -1520,9 +1520,11 @@ def _maybe_dedup_names(self, names):
counts[col] = cur_count + 1

if is_potential_mi:
col = col[:-1] + ('%s.%d' % (col[-1], cur_count),)
col = col[:-1] + ('{column}.{count}'.format(
column=col[-1], count=cur_count),)
else:
col = '%s.%d' % (col, cur_count)
col = '{column}.{count}'.format(
column=col, count=cur_count)
cur_count = counts[col]

names[i] = col
@@ -1569,7 +1571,7 @@ def _get_simple_index(self, data, columns):
def ix(col):
if not isinstance(col, compat.string_types):
return col
raise ValueError('Index %s invalid' % col)
raise ValueError('Index {col} invalid'.format(col=col))

to_remove = []
index = []
@@ -1593,8 +1595,8 @@ def _get_name(icol):
return icol

if col_names is None:
raise ValueError(('Must supply column order to use %s as '
'index') % str(icol))
raise ValueError(('Must supply column order to use {icol!s} '
'as index').format(icol=icol))

for i, c in enumerate(col_names):
if i == icol:
@@ -1709,7 +1711,8 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,

result[c] = cvals
if verbose and na_count:
print('Filled %d NA values in column %s' % (na_count, str(c)))
print('Filled {count} NA values in column {c!s}'.format(
count=na_count, c=c))
return result

def _infer_types(self, values, na_values, try_num_bool=True):
@@ -1810,8 +1813,10 @@ def _cast_types(self, values, cast_type, column):
values = astype_nansafe(values, cast_type,
copy=True, skipna=True)
except ValueError:
raise ValueError("Unable to convert column %s to "
"type %s" % (column, cast_type))
raise ValueError(
"Unable to convert column {column} to type "
"{cast_type}".format(
column=column, cast_type=cast_type))
return values

def _do_date_conversions(self, names, data):
@@ -1874,7 +1879,7 @@ def __init__(self, src, **kwds):

if self.names is None:
if self.prefix:
self.names = ['%s%d' % (self.prefix, i)
self.names = ['{prefix}{i}'.format(prefix=self.prefix, i=i)
for i in range(self._reader.table_width)]
else:
self.names = lrange(self._reader.table_width)
@@ -2276,10 +2281,11 @@ def __init__(self, f, **kwds):
raise ValueError('Only length-1 decimal markers supported')

if self.thousands is None:
self.nonnum = re.compile('[^-^0-9^%s]+' % self.decimal)
self.nonnum = re.compile(
r'[^-^0-9^{decimal}]+'.format(decimal=self.decimal))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm does converting this to a raw string when it wasn't previously affect the behavior at all? Might also be the root cause of your issue, though we technically drop Py2 support with v0.25 anyway

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Temporarily, I want to try to put that code back for clearing the problem from ci.
And then I'll find out how can I solve the problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change has no problem :)

else:
self.nonnum = re.compile('[^-^0-9^%s^%s]+' % (self.thousands,
self.decimal))
self.nonnum = re.compile(r'[^-^0-9^{thousands}^{decimal}]+'.format(
thousands=self.thousands, decimal=self.decimal))

def _set_no_thousands_columns(self):
# Create a set of column ids that are not to be stripped of thousands
@@ -2518,8 +2524,8 @@ def _infer_columns(self):
except StopIteration:
if self.line_pos < hr:
raise ValueError(
'Passed header=%s but only %d lines in file'
% (hr, self.line_pos + 1))
'Passed header={hr} but only {pos} lines in '
'file'.format(hr=hr, pos=(self.line_pos + 1)))

# We have an empty file, so check
# if columns are provided. That will
@@ -2560,7 +2566,8 @@ def _infer_columns(self):

while cur_count > 0:
counts[col] = cur_count + 1
col = "%s.%d" % (col, cur_count)
col = '{column}.{count}'.format(
column=col, count=cur_count)
cur_count = counts[col]

this_columns[i] = col
@@ -2628,8 +2635,8 @@ def _infer_columns(self):

if not names:
if self.prefix:
columns = [['%s%d' % (self.prefix, i)
for i in range(ncols)]]
columns = [['{prefix}{idx}'.format(
prefix=self.prefix, idx=i) for i in range(ncols)]]
else:
columns = [lrange(ncols)]
columns = self._handle_usecols(columns, columns[0])
@@ -3056,8 +3063,9 @@ def _rows_to_cols(self, content):
content.append(l)

for row_num, actual_len in bad_lines:
msg = ('Expected %d fields in line %d, saw %d' %
(col_len, row_num + 1, actual_len))
msg = ('Expected {col_len} fields in line {line}, saw '
'{length}'.format(col_len=col_len, line=(row_num + 1),
length=actual_len))
if (self.delimiter and
len(self.delimiter) > 1 and
self.quoting != csv.QUOTE_NONE):
@@ -3228,8 +3236,9 @@ def _isindex(colspec):
new_name, col, old_names = _try_convert_dates(
converter, colspec, data_dict, orig_names)
if new_name in data_dict:
raise ValueError('New date column already in dict %s' %
new_name)
raise ValueError(
'New date column already in dict {name}'.format(
name=new_name))
new_data[new_name] = col
new_cols.append(new_name)
date_cols.update(old_names)
@@ -3238,8 +3247,8 @@ def _isindex(colspec):
# dict of new name to column list
for new_name, colspec in compat.iteritems(parse_spec):
if new_name in data_dict:
raise ValueError('Date column %s already in dict' %
new_name)
raise ValueError(
'Date column {name} already in dict'.format(name=new_name))

_, col, old_names = _try_convert_dates(converter, colspec,
data_dict, orig_names)
@@ -3418,7 +3427,7 @@ def _stringify_na_values(na_values):
# we are like 999 here
if v == int(v):
v = int(v)
result.append("%s.0" % v)
result.append("{value}.0".format(value=v))
result.append(str(v))

result.append(v)
@@ -3563,8 +3572,8 @@ def get_rows(self, infer_nrows, skiprows=None):

def detect_colspecs(self, infer_nrows=100, skiprows=None):
# Regex escape the delimiters
delimiters = ''.join(r'\%s' % x for x in self.delimiter)
pattern = re.compile('([^%s]+)' % delimiters)
delimiters = ''.join(r'\{}'.format(x) for x in self.delimiter)
pattern = re.compile('([^{}]+)'.format(delimiters))
rows = self.get_rows(infer_nrows, skiprows)
if not rows:
raise EmptyDataError("No rows from which to infer column width")