pandas-dev · WillAyd · Sep 18, 2019 · May 19, 2019 · May 19, 2019 · May 19, 2019
diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h
@@ -244,6 +244,10 @@ typedef struct __JSONObjectEncoder {
   If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */
   int encodeHTMLChars;
 
+  /*
+  Configuration for spaces of indent */
+  int indent;
+
   /*
   Set to an error message if error occurred */
   const char *errorMsg;

diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c
@@ -728,6 +728,20 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
     while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux;
 }
 
+void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc)
+{
+  if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n');
+}
+
+void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value)
+{
+  int i;
+  if (enc->indent > 0)
+    while (value-- > 0)
+      for (i = 0; i < enc->indent; i++)
+        Buffer_AppendCharUnchecked(enc, ' ');
+}
+
 void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
     char *wstr;
     JSUINT32 uvalue = (value < 0) ? -value : value;
@@ -972,6 +986,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
                 iterObj = enc->iterGetValue(obj, &tc);
 
                 enc->level++;
+		Buffer_AppendIndentUnchecked (enc, enc->level);
                 encode(iterObj, enc, NULL, 0);
                 count++;
             }
@@ -987,13 +1002,17 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
             enc->iterBegin(obj, &tc);
 
             Buffer_AppendCharUnchecked(enc, '{');
+	    Buffer_AppendIndentNewlineUnchecked (enc);
+	    Buffer_AppendIndentUnchecked (enc, enc->level);
 
             while (enc->iterNext(obj, &tc)) {
                 if (count > 0) {
                     Buffer_AppendCharUnchecked(enc, ',');
 #ifndef JSON_NO_EXTRA_WHITESPACE
                     Buffer_AppendCharUnchecked(enc, ' ');
 #endif
+		    Buffer_AppendIndentNewlineUnchecked (enc);
+		    Buffer_AppendIndentUnchecked (enc, enc->level);
                 }
 
                 iterObj = enc->iterGetValue(obj, &tc);
@@ -1005,7 +1024,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name,
             }
 
             enc->iterEnd(obj, &tc);
-            Buffer_Reserve(enc, 2);
+	    Buffer_AppendIndentNewlineUnchecked (enc);
+	    Buffer_AppendIndentUnchecked (enc, enc->level);
             Buffer_AppendCharUnchecked(enc, '}');
             break;
         }

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -2381,7 +2381,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
     static char *kwlist[] = {
         "obj",    "ensure_ascii", "double_precision", "encode_html_chars",
         "orient", "date_unit",    "iso_dates",        "default_handler",
-        NULL};
+	"indent", NULL};
 
     char buffer[65536];
     char *ret;
@@ -2394,6 +2394,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
     char *sdateFormat = NULL;
     PyObject *oisoDates = 0;
     PyObject *odefHandler = 0;
+    int indent = 0;
 
     PyObjectEncoder pyEncoder = {{
         Object_beginTypeContext,
@@ -2415,6 +2416,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
         idoublePrecision,
         1,  // forceAscii
         0,  // encodeHTMLChars
+	0,  // indent
     }};
     JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;
 
@@ -2439,10 +2441,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
 
     PRINTMARK();
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOO", kwlist, &oinput,
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, &oinput,
                                      &oensureAscii, &idoublePrecision,
                                      &oencodeHTMLChars, &sOrient, &sdateFormat,
-                                     &oisoDates, &odefHandler)) {
+                                     &oisoDates, &odefHandler, &indent)) {
         return NULL;
     }
 
@@ -2508,6 +2510,8 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
         pyEncoder.defaultHandler = odefHandler;
     }
 
+    encoder->indent = indent;
+
     pyEncoder.originalOutputFormat = pyEncoder.outputFormat;
     PRINTMARK();
     ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer));

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2249,6 +2249,7 @@ def to_json(
         lines=False,
         compression="infer",
         index=True,
+        indent=0,
     ):
         """
         Convert the object to a JSON string.
@@ -2329,6 +2330,11 @@ def to_json(
 
             .. versionadded:: 0.23.0
 
+        indent : integer, default 0
+           Length of whitespace used to indent each record.
+
+           .. versionadded:: 1.0.0
+
         Returns
         -------
         None or str
@@ -2401,6 +2407,7 @@ def to_json(
             lines=lines,
             compression=compression,
             index=index,
+            indent=indent,
         )
 
     def to_hdf(self, path_or_buf, key, **kwargs):

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -45,6 +45,7 @@ def to_json(
     lines=False,
     compression="infer",
     index=True,
+    indent=0,
 ):
 
     if not index and orient not in ["split", "table"]:
@@ -76,6 +77,7 @@ def to_json(
         date_unit=date_unit,
         default_handler=default_handler,
         index=index,
+        indent=indent,
     ).write()
 
     if lines:
@@ -94,6 +96,7 @@ def to_json(
 
 
 class Writer:
+
     def __init__(
         self,
         obj,
@@ -104,6 +107,7 @@ def __init__(
         date_unit,
         index,
         default_handler=None,
+        indent=0,
     ):
         self.obj = obj
 
@@ -117,6 +121,7 @@ def __init__(
         self.date_unit = date_unit
         self.default_handler = default_handler
         self.index = index
+        self.indent = indent
 
         self.is_copy = None
         self._format_axes()
@@ -133,6 +138,7 @@ def write(self):
             self.date_unit,
             self.date_format == "iso",
             self.default_handler,
+            self.indent,
         )
 
     def _write(
@@ -144,6 +150,7 @@ def _write(
         date_unit,
         iso_dates,
         default_handler,
+        indent,
     ):
         return dumps(
             obj,
@@ -153,6 +160,7 @@ def _write(
             date_unit=date_unit,
             iso_dates=iso_dates,
             default_handler=default_handler,
+            indent=indent,
         )
 
 
@@ -175,6 +183,7 @@ def _write(
         date_unit,
         iso_dates,
         default_handler,
+        indent,
     ):
         if not self.index and orient == "split":
             obj = {"name": obj.name, "data": obj.values}
@@ -186,6 +195,7 @@ def _write(
             date_unit,
             iso_dates,
             default_handler,
+            indent,
         )
 
 
@@ -220,6 +230,7 @@ def _write(
         date_unit,
         iso_dates,
         default_handler,
+        indent,
     ):
         if not self.index and orient == "split":
             obj = obj.to_dict(orient="split")
@@ -232,6 +243,7 @@ def _write(
             date_unit,
             iso_dates,
             default_handler,
+            indent,
         )
 
 
@@ -248,13 +260,15 @@ def __init__(
         date_unit,
         index,
         default_handler=None,
+        indent=0,
     ):
         """
         Adds a `schema` attribute with the Table Schema, resets
         the index (can't do in caller, because the schema inference needs
         to know what the index is, forces orient to records, and forces
         date_format to 'iso'.
         """
+
         super().__init__(
             obj,
             orient,
@@ -264,6 +278,7 @@ def __init__(
             date_unit,
             index,
             default_handler=default_handler,
+            indent=indent,
         )
 
         if date_format != "iso":
@@ -315,6 +330,7 @@ def _write(
         date_unit,
         iso_dates,
         default_handler,
+        indent,
     ):
         data = super()._write(
             obj,
@@ -324,6 +340,7 @@ def _write(
             date_unit,
             iso_dates,
             default_handler,
+            indent
         )
         serialized = '{{"schema": {schema}, "data": {data}}}'.format(
             schema=dumps(self.schema), data=data

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1647,3 +1647,25 @@ def test_tuple_labels(self, orient, expected):
         df = pd.DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
         result = df.to_json(orient=orient)
         assert result == expected
+
+    @pytest.mark.parametrize("indent", [1, 2, 4])
+    def test_to_json_indent(self, indent):
+        # GH 12004
+        df = pd.DataFrame([
+            ['foo', 'bar'], ['baz', 'qux']
+        ], columns=['a', 'b'])
+
+        result = df.to_json(indent=indent)
+        spaces = " " * indent
+        expected = """{{
+"a":{{
+{spaces}"0":"foo",
+{spaces}"1":"baz"
+{spaces}}},
+"b":{{
+{spaces}"0":"bar",
+{spaces}"1":"qux"
+{spaces}}}
+}}""".format(spaces=spaces)
+
+        assert result == expected