Skip to content

Commit e9de5f3

Browse files
albertvillanovajreback
authored andcommitted
BUG: Fix index type casting in read_json with orient='table' and float index (#25433) (#25434)
1 parent f04342a commit e9de5f3

File tree

4 files changed

+45
-19
lines changed

4 files changed

+45
-19
lines changed

Diff for: doc/source/whatsnew/v0.25.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ I/O
210210

211211
- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
212212
- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
213+
- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
214+
- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
213215
-
214216
-
215217
-

Diff for: pandas/io/json/json.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
227227

228228

229229
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
230-
convert_axes=True, convert_dates=True, keep_default_dates=True,
230+
convert_axes=None, convert_dates=True, keep_default_dates=True,
231231
numpy=False, precise_float=False, date_unit=None, encoding=None,
232232
lines=False, chunksize=None, compression='infer'):
233233
"""
@@ -277,18 +277,25 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
277277
'table' as an allowed value for the ``orient`` argument
278278
279279
typ : type of object to recover (series or frame), default 'frame'
280-
dtype : boolean or dict, default True
280+
dtype : boolean or dict, default None
281281
If True, infer dtypes; if a dict of column to dtype, then use those;
282282
if False, then don't infer dtypes at all, applies only to the data.
283283
284-
Not applicable with ``orient='table'``.
284+
For all ``orient`` values except ``'table'``, default is True.
285285
286-
.. versionchanged:: 0.25
286+
.. versionchanged:: 0.25.0
287287
288-
Not applicable with ``orient='table'``.
288+
Not applicable for ``orient='table'``.
289289
290-
convert_axes : boolean, default True
290+
convert_axes : boolean, default None
291291
Try to convert the axes to the proper dtypes.
292+
293+
For all ``orient`` values except ``'table'``, default is True.
294+
295+
.. versionchanged:: 0.25.0
296+
297+
Not applicable for ``orient='table'``.
298+
292299
convert_dates : boolean, default True
293300
List of columns to parse for dates; If True, then try to parse
294301
datelike columns default is True; a column label is datelike if
@@ -417,8 +424,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
417424

418425
if orient == 'table' and dtype:
419426
raise ValueError("cannot pass both dtype and orient='table'")
427+
if orient == 'table' and convert_axes:
428+
raise ValueError("cannot pass both convert_axes and orient='table'")
420429

421-
dtype = orient != 'table' if dtype is None else dtype
430+
if dtype is None and orient != 'table':
431+
dtype = True
432+
if convert_axes is None and orient != 'table':
433+
convert_axes = True
422434

423435
compression = _infer_compression(path_or_buf, compression)
424436
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
@@ -692,7 +704,7 @@ def _try_convert_data(self, name, data, use_dtypes=True,
692704

693705
# don't try to coerce, unless a force conversion
694706
if use_dtypes:
695-
if self.dtype is False:
707+
if not self.dtype:
696708
return data, False
697709
elif self.dtype is True:
698710
pass

Diff for: pandas/tests/io/json/test_json_table_schema.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -564,17 +564,10 @@ def test_multiindex(self, index_names):
564564
result = pd.read_json(out, orient="table")
565565
tm.assert_frame_equal(df, result)
566566

567-
@pytest.mark.parametrize("strict_check", [
568-
pytest.param(True, marks=pytest.mark.xfail),
569-
False
570-
])
571-
def test_empty_frame_roundtrip(self, strict_check):
567+
def test_empty_frame_roundtrip(self):
572568
# GH 21287
573569
df = pd.DataFrame([], columns=['a', 'b', 'c'])
574570
expected = df.copy()
575571
out = df.to_json(orient='table')
576572
result = pd.read_json(out, orient='table')
577-
# TODO: When DF coercion issue (#21345) is resolved tighten type checks
578-
tm.assert_frame_equal(expected, result,
579-
check_dtype=strict_check,
580-
check_index_type=strict_check)
573+
tm.assert_frame_equal(expected, result)

Diff for: pandas/tests/io/json/test_pandas.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def _check_orient(df, orient, dtype=None, numpy=False,
194194
else:
195195
unser = unser.sort_index()
196196

197-
if dtype is False:
197+
if not dtype:
198198
check_dtype = False
199199

200200
if not convert_axes and df.index.dtype.type == np.datetime64:
@@ -1202,6 +1202,16 @@ def test_data_frame_size_after_to_json(self):
12021202

12031203
assert size_before == size_after
12041204

1205+
@pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'],
1206+
['1', '2'], ['1.', '2.']])
1207+
@pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']])
1208+
def test_from_json_to_json_table_index_and_columns(self, index, columns):
1209+
# GH25433 GH25435
1210+
expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
1211+
dfjson = expected.to_json(orient='table')
1212+
result = pd.read_json(dfjson, orient='table')
1213+
assert_frame_equal(result, expected)
1214+
12051215
def test_from_json_to_json_table_dtypes(self):
12061216
# GH21345
12071217
expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
@@ -1214,9 +1224,18 @@ def test_read_json_table_dtype_raises(self, dtype):
12141224
# GH21345
12151225
df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
12161226
dfjson = df.to_json(orient='table')
1217-
with pytest.raises(ValueError):
1227+
msg = "cannot pass both dtype and orient='table'"
1228+
with pytest.raises(ValueError, match=msg):
12181229
pd.read_json(dfjson, orient='table', dtype=dtype)
12191230

1231+
def test_read_json_table_convert_axes_raises(self):
1232+
# GH25433 GH25435
1233+
df = DataFrame([[1, 2], [3, 4]], index=[1., 2.], columns=['1.', '2.'])
1234+
dfjson = df.to_json(orient='table')
1235+
msg = "cannot pass both convert_axes and orient='table'"
1236+
with pytest.raises(ValueError, match=msg):
1237+
pd.read_json(dfjson, orient='table', convert_axes=True)
1238+
12201239
@pytest.mark.parametrize('data, expected', [
12211240
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
12221241
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),

0 commit comments

Comments
 (0)