Skip to content

Commit d4393b7

Browse files
bourbakiPingviinituutti
authored andcommitted
Proper boxing of scalars in DataFrame.to_dict (pandas-dev#23921)
Closes pandas-devgh-23753
1 parent 8d66f2b commit d4393b7

File tree

3 files changed

+31
-21
lines changed

3 files changed

+31
-21
lines changed

doc/source/whatsnew/v0.24.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,7 @@ MultiIndex
14171417
I/O
14181418
^^^
14191419

1420+
14201421
.. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
14211422

14221423
Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1480,6 +1481,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
14801481
- Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
14811482
- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
14821483
- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
1484+
- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
14831485
- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
14841486

14851487
Plotting

pandas/core/frame.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -1298,10 +1298,10 @@ def to_dict(self, orient='dict', into=dict):
12981298
12991299
>>> df.to_dict('split')
13001300
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
1301-
'data': [[1.0, 0.5], [2.0, 0.75]]}
1301+
'data': [[1, 0.5], [2, 0.75]]}
13021302
13031303
>>> df.to_dict('records')
1304-
[{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
1304+
[{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
13051305
13061306
>>> df.to_dict('index')
13071307
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
@@ -1317,8 +1317,8 @@ def to_dict(self, orient='dict', into=dict):
13171317
13181318
>>> dd = defaultdict(list)
13191319
>>> df.to_dict('records', into=dd)
1320-
[defaultdict(<class 'list'>, {'col1': 1.0, 'col2': 0.5}),
1321-
defaultdict(<class 'list'>, {'col1': 2.0, 'col2': 0.75})]
1320+
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
1321+
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
13221322
"""
13231323
if not self.columns.is_unique:
13241324
warnings.warn("DataFrame columns are not unique, some "
@@ -1334,16 +1334,18 @@ def to_dict(self, orient='dict', into=dict):
13341334
elif orient.lower().startswith('sp'):
13351335
return into_c((('index', self.index.tolist()),
13361336
('columns', self.columns.tolist()),
1337-
('data', lib.map_infer(self.values.ravel(),
1338-
com.maybe_box_datetimelike)
1339-
.reshape(self.values.shape).tolist())))
1337+
('data', [
1338+
list(map(com.maybe_box_datetimelike, t))
1339+
for t in self.itertuples(index=False)]
1340+
)))
13401341
elif orient.lower().startswith('s'):
13411342
return into_c((k, com.maybe_box_datetimelike(v))
13421343
for k, v in compat.iteritems(self))
13431344
elif orient.lower().startswith('r'):
1344-
return [into_c((k, com.maybe_box_datetimelike(v))
1345-
for k, v in zip(self.columns, np.atleast_1d(row)))
1346-
for row in self.values]
1345+
return [
1346+
into_c((k, com.maybe_box_datetimelike(v))
1347+
for k, v in compat.iteritems(row._asdict()))
1348+
for row in self.itertuples(index=False)]
13471349
elif orient.lower().startswith('i'):
13481350
if not self.index.is_unique:
13491351
raise ValueError(

pandas/tests/frame/test_convert_to.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def test_to_records_index_name(self):
150150
def test_to_records_with_unicode_index(self):
151151
# GH13172
152152
# unicode_literals conflict with to_records
153-
result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a')\
153+
result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \
154154
.to_records()
155155
expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
156156
tm.assert_almost_equal(result, expected)
@@ -281,17 +281,23 @@ def test_to_records_datetimeindex_with_tz(self, tz):
281281
# both converted to UTC, so they are equal
282282
tm.assert_numpy_array_equal(result, expected)
283283

284-
def test_to_dict_box_scalars(self):
285-
# 14216
284+
# orient - orient argument to to_dict function
285+
# item_getter - function for extracting value from
286+
# the resulting dict using column name and index
287+
@pytest.mark.parametrize('orient,item_getter', [
288+
('dict', lambda d, col, idx: d[col][idx]),
289+
('records', lambda d, col, idx: d[idx][col]),
290+
('list', lambda d, col, idx: d[col][idx]),
291+
('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
292+
('index', lambda d, col, idx: d[idx][col])
293+
])
294+
def test_to_dict_box_scalars(self, orient, item_getter):
295+
# 14216, 23753
286296
# make sure that we are boxing properly
287-
d = {'a': [1], 'b': ['b']}
288-
289-
result = DataFrame(d).to_dict()
290-
assert isinstance(list(result['a'])[0], (int, long))
291-
assert isinstance(list(result['b'])[0], (int, long))
292-
293-
result = DataFrame(d).to_dict(orient='records')
294-
assert isinstance(result[0]['a'], (int, long))
297+
df = DataFrame({'a': [1, 2], 'b': [.1, .2]})
298+
result = df.to_dict(orient=orient)
299+
assert isinstance(item_getter(result, 'a', 0), (int, long))
300+
assert isinstance(item_getter(result, 'b', 0), float)
295301

296302
def test_frame_to_dict_tz(self):
297303
# GH18372 When converting to dict with orient='records' columns of

0 commit comments

Comments
 (0)