Skip to content

CLN: default for tupleize_cols is now False for both to_csv and read_csv. Fair warning in 0.12 (GH3604) #4797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 10, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ They can take a number of arguments:
time and lower memory usage.
- ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified
as 'X.0'...'X.N', rather than 'X'...'X'
- ``tupleize_cols``: boolean, default True, if False, convert a list of tuples
- ``tupleize_cols``: boolean, default False, if False, convert a list of tuples
to a multi-index of columns, otherwise, leave the column index as a list of tuples

.. ipython:: python
Expand Down Expand Up @@ -860,19 +860,16 @@ Reading columns with a ``MultiIndex``

By specifying list of row locations for the ``header`` argument, you
can read in a ``MultiIndex`` for the columns. Specifying non-consecutive
rows will skip the interveaning rows.
rows will skip the interveaning rows. In order to have the pre-0.13 behavior
of tupleizing columns, specify ``tupleize_cols=True``.

.. ipython:: python

from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
df.to_csv('mi.csv',tupleize_cols=False)
df.to_csv('mi.csv')
print open('mi.csv').read()
pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)

Note: The default behavior in 0.12 remains unchanged (``tupleize_cols=True``) from prior versions,
but starting with 0.13, the default *to* write and read multi-index columns will be in the new
format (``tupleize_cols=False``)
pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1])

Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*.
Expand Down Expand Up @@ -966,7 +963,7 @@ function takes a number of arguments. Only the first is required.
- ``sep`` : Field delimiter for the output file (default ",")
- ``encoding``: a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
- ``tupleize_cols``: boolean, default True, if False, write as a list of tuples,
- ``tupleize_cols``: boolean, default False, if False, write as a list of tuples,
otherwise write in an expanded line format suitable for ``read_csv``

Writing a formatted string
Expand Down
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ API Changes
a list can be passed to ``to_replace`` (:issue:`4743`).
- provide automatic dtype conversions on _reduce operations (:issue:`3371`)
- exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)

Internal Refactoring
~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
cols=None, header=True, index=True, index_label=None,
mode='w', nanRep=None, encoding=None, quoting=None,
line_terminator='\n', chunksize=None, engine=None,
tupleize_cols=True, quotechar='"'):
tupleize_cols=False, quotechar='"'):

self.engine = engine # remove for 0.13
self.obj = obj
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
is used. Different default from read_table
parse_dates : boolean, default True
Parse dates. Different default from read_table
tupleize_cols : boolean, default True
tupleize_cols : boolean, default False
write multi_index columns as a list of tuples (if True)
or new (expanded format) if False)

Expand All @@ -1208,7 +1208,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
from pandas.io.parsers import read_table
return read_table(path, header=header, sep=sep,
parse_dates=parse_dates, index_col=index_col,
encoding=encoding, tupleize_cols=False)
encoding=encoding, tupleize_cols=tupleize_cols)

def to_sparse(self, fill_value=None, kind='block'):
"""
Expand Down Expand Up @@ -1291,7 +1291,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
cols=None, header=True, index=True, index_label=None,
mode='w', nanRep=None, encoding=None, quoting=None,
line_terminator='\n', chunksize=None,
tupleize_cols=True, **kwds):
tupleize_cols=False, **kwds):
r"""Write DataFrame to a comma-separated values (csv) file

Parameters
Expand Down Expand Up @@ -1331,7 +1331,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
defaults to csv.QUOTE_MINIMAL
chunksize : int or None
rows to write at a time
tupleize_cols : boolean, default True
tupleize_cols : boolean, default False
write multi_index columns as a list of tuples (if True)
or new (expanded format) if False)
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _read(filepath_or_buffer, kwds):
'squeeze': False,
'compression': None,
'mangle_dupe_cols': True,
'tupleize_cols':True,
'tupleize_cols':False,
}


Expand Down Expand Up @@ -336,7 +336,7 @@ def parser_f(filepath_or_buffer,
encoding=None,
squeeze=False,
mangle_dupe_cols=True,
tupleize_cols=True,
tupleize_cols=False,
):

# Alias sep -> delimiter.
Expand Down Expand Up @@ -656,7 +656,7 @@ def __init__(self, kwds):
self.na_fvalues = kwds.get('na_fvalues')
self.true_values = kwds.get('true_values')
self.false_values = kwds.get('false_values')
self.tupleize_cols = kwds.get('tupleize_cols',True)
self.tupleize_cols = kwds.get('tupleize_cols',False)

self._date_conv = _make_date_converter(date_parser=self.date_parser,
dayfirst=self.dayfirst)
Expand Down
2 changes: 1 addition & 1 deletion pandas/parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ cdef class TextReader:
skip_footer=0,
verbose=False,
mangle_dupe_cols=True,
tupleize_cols=True):
tupleize_cols=False):

self.parser = parser_new()
self.parser.chunksize = tokenize_chunksize
Expand Down