Skip to content

Commit 326107c

Browse files
committed
BUG: fixed json_normalize for subrecords with NoneTypes (#20030)
TST: additional coverage for the test cases from (#20030) DOC: added changes to whatsnew/v0.23.0.txt (#20030)
1 parent 670c2e4 commit 326107c

File tree

3 files changed

+55
-1
lines changed

3 files changed

+55
-1
lines changed

Diff for: doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,7 @@ I/O
979979
- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`)
980980
- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
981981
- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
982+
- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
982983

983984
Plotting
984985
^^^^^^^^

Diff for: pandas/io/json/normalize.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
8080
if level != 0: # so we skip copying for top level, common case
8181
v = new_d.pop(k)
8282
new_d[newkey] = v
83+
if v is None: # pop the key if the value is None
84+
new_d.pop(k)
8385
continue
8486
else:
8587
v = new_d.pop(k)
@@ -189,7 +191,8 @@ def _pull_field(js, spec):
189191
data = [data]
190192

191193
if record_path is None:
192-
if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
194+
if any([[isinstance(x, dict)
195+
for x in compat.itervalues(y)] for y in data]):
193196
# naive normalization, this is idempotent for flat records
194197
# and potentially will inflate the data considerably for
195198
# deeply nested structures:

Diff for: pandas/tests/io/json/test_normalize.py

+50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
import numpy as np
33
import json
4+
import math
45

56
import pandas.util.testing as tm
67
from pandas import compat, Index, DataFrame
@@ -54,6 +55,17 @@ def state_data():
5455
'state': 'Ohio'}]
5556

5657

58+
@pytest.fixture
59+
def author_missing_data():
60+
return [
61+
{'info': None},
62+
{'info':
63+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
64+
'author_name':
65+
{'first': 'Jane', 'last_name': 'Doe'}
66+
}]
67+
68+
5769
class TestJSONNormalize(object):
5870

5971
def test_simple_records(self):
@@ -226,6 +238,21 @@ def test_non_ascii_key(self):
226238
result = json_normalize(json.loads(testjson))
227239
tm.assert_frame_equal(result, expected)
228240

241+
def test_missing_field(self, author_missing_data):
242+
result = json_normalize(author_missing_data)
243+
ex_data = [
244+
{'author_name.first': math.nan,
245+
'author_name.last_name': math.nan,
246+
'info.created_at': math.nan,
247+
'info.last_updated': math.nan},
248+
{'author_name.first': 'Jane',
249+
'author_name.last_name': 'Doe',
250+
'info.created_at': '11/08/1993',
251+
'info.last_updated': '26/05/2012'}
252+
]
253+
expected = DataFrame(ex_data)
254+
tm.assert_frame_equal(result, expected)
255+
229256

230257
class TestNestedToRecord(object):
231258

@@ -322,3 +349,26 @@ def test_json_normalize_errors(self):
322349
['general', 'trade_version']],
323350
errors='raise'
324351
)
352+
353+
def test_nonetype_dropping(self):
354+
data = [
355+
{'info': None,
356+
'author_name':
357+
{'first': 'Smith', 'last_name': 'Appleseed'}
358+
},
359+
{'info':
360+
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
361+
'author_name':
362+
{'first': 'Jane', 'last_name': 'Doe'}
363+
}
364+
]
365+
result = nested_to_record(data)
366+
expected = [
367+
{'author_name.first': 'Smith',
368+
'author_name.last_name': 'Appleseed'},
369+
{'author_name.first': 'Jane',
370+
'author_name.last_name': 'Doe',
371+
'info.created_at': '11/08/1993',
372+
'info.last_updated': '26/05/2012'}]
373+
374+
assert result == expected

0 commit comments

Comments
 (0)