pandas-dev · arthurlw · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 5, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -35,6 +35,7 @@ Other enhancements
 - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)
 - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
+- :func:`json_normalize` now supports parsing JSON strings and bytes directly, eliminating the need for an intermediate apply(json.loads) step (:issue:`61006`)
 - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
 - :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`)
 - :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -11,6 +11,7 @@
     TYPE_CHECKING,
     Any,
     DefaultDict,
+    Union,
     overload,
 )
 
@@ -267,7 +268,7 @@ def _simple_json_normalize(
 
 
 def json_normalize(
-    data: dict | list[dict] | Series,
+    data: Union[dict[Any, Any], list[dict[Any, Any]], Series, str, bytes],
     record_path: str | list | None = None,
     meta: str | list[str | list[str]] | None = None,
     meta_prefix: str | None = None,
@@ -285,8 +286,8 @@ def json_normalize(
 
     Parameters
     ----------
-    data : dict, list of dicts, or Series of dicts
-        Unserialized JSON objects.
+    data : dict, list of dicts, Series of dicts/JSON str/bytes, or JSON str/bytes
+        Unserialized JSON objects or JSON strings/bytes.
     record_path : str or list of str, default None
         Path in each object to list of records. If not passed, data will be
         assumed to be an array of records.
@@ -434,7 +435,30 @@ def json_normalize(
     1          2
 
     Returns normalized data with columns prefixed with the given string.
+
+    >>> # JSON string input
+    >>> json_str = '{"id": 1, "name": {"first": "John", "last": "Doe"}}'
+    >>> pd.json_normalize(json_str)
+       id name.first name.last
+    0   1       John      Doe
     """
+    if isinstance(data, (str, bytes)):
+        import json
+
+        data = json.loads(data)
+
+    if isinstance(data, Series):
+        if data.empty:
+            return DataFrame()
+
+        sample = data.iloc[0]
+        if isinstance(sample, (str, bytes)):
+            import json
+
+            data = data.apply(json.loads)
+        index = data.index
+    else:
+        index = None
 
     def _pull_field(
         js: dict[str, Any], spec: list | str, extract_record: bool = False
@@ -485,11 +509,6 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
                 )
         return result
 
-    if isinstance(data, Series):
-        index = data.index
-    else:
-        index = None
-
     if isinstance(data, list) and not data:
         return DataFrame()
     elif isinstance(data, dict):

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -162,21 +162,20 @@ def test_empty_array(self):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "data, record_path, exception_type",
+        "data, record_path, exception_type, expected",
         [
-            ([{"a": 0}, {"a": 1}], None, None),
-            ({"a": [{"a": 0}, {"a": 1}]}, "a", None),
-            ('{"a": [{"a": 0}, {"a": 1}]}', None, NotImplementedError),
-            (None, None, NotImplementedError),
+            ([{"a": 0}, {"a": 1}], None, None, DataFrame([0, 1], columns=["a"])),
+            ({"a": [{"a": 0}, {"a": 1}]}, "a", None, DataFrame([0, 1], columns=["a"])),
+            ('[{"a": 0}, {"a": 1}]', None, None, DataFrame([0, 1], columns=["a"])),
+            (None, None, NotImplementedError, None),
         ],
     )
-    def test_accepted_input(self, data, record_path, exception_type):
+    def test_accepted_input(self, data, record_path, exception_type, expected):
         if exception_type is not None:
             with pytest.raises(exception_type, match=""):
                 json_normalize(data, record_path=record_path)
         else:
             result = json_normalize(data, record_path=record_path)
-            expected = DataFrame([0, 1], columns=["a"])
             tm.assert_frame_equal(result, expected)
 
     def test_simple_normalize_with_separator(self, deep_nested):
@@ -569,6 +568,61 @@ def test_series_index(self, state_data):
         result = json_normalize(series, "counties")
         tm.assert_index_equal(result.index, idx.repeat([3, 2]))
 
+    def test_json_string_input(self):
+        # GH61006: Accept JSON as str input
+        json_str = '{"id": 1, "name": {"first": "John", "last": "Doe"}}'
+        result = json_normalize(json_str)
+        expected = DataFrame({"id": [1], "name.first": ["John"], "name.last": ["Doe"]})
+        tm.assert_frame_equal(result, expected)
+
+        json_array_str = """[
+            {"id": 1, "name": {"first": "John", "last": "Doe"}},
+            {"id": 2, "name": {"first": "Jane", "last": "Smith"}}
+        ]"""
+        result = json_normalize(json_array_str)
+        expected = DataFrame(
+            {
+                "id": [1, 2],
+                "name.first": ["John", "Jane"],
+                "name.last": ["Doe", "Smith"],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_json_bytes_input(self):
+        # GH61006: Accept JSON as bytes input
+        json_bytes = b'{"id": 1, "name": {"first": "John", "last": "Doe"}}'
+        result = json_normalize(json_bytes)
+        expected = DataFrame({"id": [1], "name.first": ["John"], "name.last": ["Doe"]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_series_json_string(self):
+        # GH61006:
+        s = Series(['{"value": 0.0}', '{"value": 0.5}', '{"value": 1.0}'])
+        result = json_normalize(s)
+        expected = DataFrame({"value": [0.0, 0.5, 1.0]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_series_json_string_with_index(self):
+        # GH61006:
+        s = Series(['{"value": 0.0}', '{"value": 0.5}'], index=["a", "b"])
+        result = json_normalize(s)
+        expected = DataFrame({"value": [0.0, 0.5]}, index=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_invalid_json_string(self):
+        incomplete_json = '{"id": 1, "name": {"first": "John", "last": "Doe"'
+        with pytest.raises(json.JSONDecodeError):
+            json_normalize(incomplete_json)
+
+        non_json = "Hello World"
+        with pytest.raises(json.JSONDecodeError):
+            json_normalize(non_json)
+
+        malformed_json = '{"a": 1,}'
+        with pytest.raises(json.JSONDecodeError):
+            json_normalize(malformed_json)
+
 
 class TestNestedToRecord:
     def test_flat_stays_flat(self):