Skip to content

Commit cd58f3b

Browse files
authored
Json fix normalize (#49920)
* added failing test * fix + whatsnew * Refactor for readability * Better compat
1 parent 0d2c579 commit cd58f3b

File tree

3 files changed

+20
-5
lines changed

3 files changed

+20
-5
lines changed

Diff for: doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ I/O
717717
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
718718
- Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
719719
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
720+
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
720721
-
721722

722723
Period

Diff for: pandas/io/json/_normalize.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
defaultdict,
88
)
99
import copy
10+
import sys
1011
from typing import (
1112
Any,
1213
DefaultDict,
@@ -148,13 +149,18 @@ def _normalise_json(
148149
if isinstance(data, dict):
149150
for key, value in data.items():
150151
new_key = f"{key_string}{separator}{key}"
152+
153+
if not key_string:
154+
if sys.version_info < (3, 9):
155+
from pandas.util._str_methods import removeprefix
156+
157+
new_key = removeprefix(new_key, separator)
158+
else:
159+
new_key = new_key.removeprefix(separator)
160+
151161
_normalise_json(
152162
data=value,
153-
# to avoid adding the separator to the start of every key
154-
# GH#43831 avoid adding key if key_string blank
155-
key_string=new_key
156-
if new_key[: len(separator)] != separator
157-
else new_key[len(separator) :],
163+
key_string=new_key,
158164
normalized_dict=normalized_dict,
159165
separator=separator,
160166
)

Diff for: pandas/tests/io/json/test_normalize.py

+8
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,14 @@ def generator_data():
561561

562562
tm.assert_frame_equal(result, expected)
563563

564+
def test_top_column_with_leading_underscore(self):
565+
# 49861
566+
data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
567+
result = json_normalize(data, sep="_")
568+
expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])
569+
570+
tm.assert_frame_equal(result, expected)
571+
564572

565573
class TestNestedToRecord:
566574
def test_flat_stays_flat(self):

0 commit comments

Comments
 (0)