pandas-dev · TomAugspurger · Jan 9, 2020 · Dec 31, 2019 · Jan 2, 2020 · Jan 2, 2020
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -614,6 +614,7 @@ Deprecations
 - The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
 - ``pandas.SparseArray`` has been deprecated.  Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
 - The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
+- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
 
 **Selecting Columns from a Grouped DataFrame**
 
@@ -1075,6 +1076,7 @@ Other
 - Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
 - Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
 - Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`)
+- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2007,9 +2007,10 @@ def __getitem__(self, key):
         if com.is_bool_indexer(key):
             key = check_bool_array_indexer(self, key)
 
-        return self._constructor(
-            values=self._codes[key], dtype=self.dtype, fastpath=True
-        )
+        result = self._codes[key]
+        if result.ndim > 1:
+            return result
+        return self._constructor(result, dtype=self.dtype, fastpath=True)
 
     def __setitem__(self, key, value):
         """

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -543,8 +543,6 @@ def __getitem__(self, key):
         if result.ndim > 1:
             # To support MPL which performs slicing with 2 dim
             # even though it only has 1 dim by definition
-            if is_period:
-                return self._simple_new(result, dtype=self.dtype, freq=freq)
             return result
 
         return self._simple_new(result, dtype=self.dtype, freq=freq)

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -500,8 +500,11 @@ def __getitem__(self, value):
 
         # scalar
         if not isinstance(left, ABCIndexClass):
-            if isna(left):
+            if is_scalar(left) and isna(left):
                 return self._fill_value
+            if np.ndim(left) > 1:
+                # GH#30588 multi-dimensional indexer disallowed
+                raise ValueError("multi-dimensional indexing not allowed")
             return Interval(left, right, self.closed)
 
         return self._shallow_copy(left, right)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -393,6 +393,9 @@ def __new__(
 
             if kwargs:
                 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
+            if subarr.ndim > 1:
+                # GH#13601, GH#20285, GH#27125
+                raise ValueError("Index data must be 1-dimensional")
             return cls._simple_new(subarr, name, **kwargs)
 
         elif hasattr(data, "__array__"):
@@ -608,7 +611,7 @@ def __array_wrap__(self, result, context=None):
         Gets called after a ufunc.
         """
         result = lib.item_from_zerodim(result)
-        if is_bool_dtype(result) or lib.is_scalar(result):
+        if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
             return result
 
         attrs = self._get_attributes_dict()
@@ -687,11 +690,10 @@ def astype(self, dtype, copy=True):
             return Index(np.asarray(self), dtype=dtype, copy=copy)
 
         try:
-            return Index(
-                self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype
-            )
+            casted = self.values.astype(dtype, copy=copy)
         except (TypeError, ValueError):
             raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}")
+        return Index(casted, name=self.name, dtype=dtype)
 
     _index_shared_docs[
         "take"
@@ -3902,6 +3904,9 @@ def __getitem__(self, key):
         key = com.values_from_object(key)
         result = getitem(key)
         if not is_scalar(result):
+            if np.ndim(result) > 1:
+                deprecate_ndim_indexing(result)
+                return result
             return promote(result)
         else:
             return result
@@ -5533,3 +5538,17 @@ def _try_convert_to_int_array(
         pass
 
     raise ValueError
+
+
+def deprecate_ndim_indexing(result):
+    if np.ndim(result) > 1:
+        # GH#27125 indexer like idx[:, None] expands dim, but we
+        #  cannot do that and keep an index, so return ndarray
+        # Deprecation GH#30588
+        warnings.warn(
+            "Support for multi-dimensional indexing (e.g. `index[:, None]`) "
+            "on an Index is deprecated and will be removed in a future "
+            "version.  Convert to a numpy array before indexing instead.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -12,7 +12,7 @@
 from pandas.core.dtypes.generic import ABCSeries
 
 from pandas.core.arrays import ExtensionArray
-from pandas.core.indexes.base import Index
+from pandas.core.indexes.base import Index, deprecate_ndim_indexing
 from pandas.core.ops import get_op_result_name
 
 
@@ -178,6 +178,7 @@ def __getitem__(self, key):
             return type(self)(result, name=self.name)
 
         # Includes cases where we get a 2D ndarray back for MPL compat
+        deprecate_ndim_indexing(result)
         return result
 
     def __iter__(self):

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -73,6 +73,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
         else:
             subarr = data
 
+        if subarr.ndim > 1:
+            # GH#13601, GH#20285, GH#27125
+            raise ValueError("Index data must be 1-dimensional")
+
         name = maybe_extract_name(name, data, cls)
         return cls._simple_new(subarr, name=name)
 

diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
@@ -975,3 +975,9 @@ def test_engine_type(self, dtype, engine_type):
             ci.values._codes = ci.values._codes.astype("int64")
         assert np.issubdtype(ci.codes.dtype, dtype)
         assert isinstance(ci._engine, engine_type)
+
+    def test_getitem_2d_deprecated(self):
+        # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
+        idx = self.create_index()
+        with pytest.raises(ValueError, match="cannot mask with array containing NA"):
+            idx[:, None]
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -875,3 +875,11 @@ def test_engine_reference_cycle(self):
         nrefs_pre = len(gc.get_referrers(index))
         index._engine
         assert len(gc.get_referrers(index)) == nrefs_pre
+
+    def test_getitem_2d_deprecated(self):
+        # GH#30588
+        idx = self.create_index()
+        with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
+            res = idx[:, None]
+
+        assert isinstance(res, np.ndarray), type(res)
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -86,7 +86,9 @@ def test_dti_business_getitem(self):
 
     def test_dti_business_getitem_matplotlib_hackaround(self):
         rng = pd.bdate_range(START, END)
-        values = rng[:, None]
+        with tm.assert_produces_warning(DeprecationWarning):
+            # GH#30588 multi-dimensional indexing deprecated
+            values = rng[:, None]
         expected = rng.values[:, None]
         tm.assert_numpy_array_equal(values, expected)
 
@@ -110,7 +112,9 @@ def test_dti_custom_getitem(self):
 
     def test_dti_custom_getitem_matplotlib_hackaround(self):
         rng = pd.bdate_range(START, END, freq="C")
-        values = rng[:, None]
+        with tm.assert_produces_warning(DeprecationWarning):
+            # GH#30588 multi-dimensional indexing deprecated
+            values = rng[:, None]
         expected = rng.values[:, None]
         tm.assert_numpy_array_equal(values, expected)
 

diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py
@@ -79,3 +79,10 @@ def test_where(self, closed, klass):
         expected = IntervalIndex([np.nan] + idx[1:].tolist())
         result = idx.where(klass(cond))
         tm.assert_index_equal(result, expected)
+
+    def test_getitem_2d_deprecated(self):
+        # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
+        idx = self.create_index()
+        with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
+            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
+                idx[:, None]
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -71,7 +71,9 @@ def test_can_hold_identifiers(self):
 
     @pytest.mark.parametrize("index", ["datetime"], indirect=True)
     def test_new_axis(self, index):
-        new_index = index[None, :]
+        with tm.assert_produces_warning(DeprecationWarning):
+            # GH#30588 multi-dimensional indexing deprecated
+            new_index = index[None, :]
         assert new_index.ndim == 2
         assert isinstance(new_index, np.ndarray)
 
@@ -2784,9 +2786,35 @@ def test_shape_of_invalid_index():
     # about this). However, as long as this is not solved in general,this test ensures
     # that the returned shape is consistent with this underlying array for
     # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775)
-    a = np.arange(8).reshape(2, 2, 2)
-    idx = pd.Index(a)
-    assert idx.shape == a.shape
-
     idx = pd.Index([0, 1, 2, 3])
-    assert idx[:, None].shape == (4, 1)
+    with tm.assert_produces_warning(DeprecationWarning):
+        # GH#30588 multi-dimensional indexing deprecated
+        assert idx[:, None].shape == (4, 1)
+
+
+def test_validate_1d_input():
+    # GH#27125 check that we do not have >1-dimensional input
+    msg = "Index data must be 1-dimensional"
+
+    arr = np.arange(8).reshape(2, 2, 2)
+    with pytest.raises(ValueError, match=msg):
+        pd.Index(arr)
+
+    with pytest.raises(ValueError, match=msg):
+        pd.Float64Index(arr.astype(np.float64))
+
+    with pytest.raises(ValueError, match=msg):
+        pd.Int64Index(arr.astype(np.int64))
+
+    with pytest.raises(ValueError, match=msg):
+        pd.UInt64Index(arr.astype(np.uint64))
+
+    df = pd.DataFrame(arr.reshape(4, 2))
+    with pytest.raises(ValueError, match=msg):
+        pd.Index(df)
+
+    # GH#13601 trying to assign a multi-dimensional array to an index is not
+    #  allowed
+    ser = pd.Series(0, range(4))
+    with pytest.raises(ValueError, match=msg):
+        ser.index = np.array([[2, 3]] * 4)
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -83,12 +83,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
         msg = (
             r"Buffer has wrong number of dimensions \(expected 1,"
             r" got 3\)|"
-            "The truth value of an array with more than one element is "
-            "ambiguous|"
             "Cannot index with multidimensional key|"
             r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
-            "No matching signature found|"  # TypeError
-            "unhashable type: 'numpy.ndarray'"  # TypeError
+            "Index data must be 1-dimensional"
         )
 
         if (
@@ -104,21 +101,12 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
                 "categorical",
             ]
         ):
-            idxr[nd3]
-        else:
-            if (
-                isinstance(obj, DataFrame)
-                and idxr_id == "getitem"
-                and index.inferred_type == "boolean"
-            ):
-                error = TypeError
-            elif idxr_id == "getitem" and index.inferred_type == "interval":
-                error = TypeError
-            else:
-                error = ValueError
-
-            with pytest.raises(error, match=msg):
+            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
                 idxr[nd3]
+        else:
+            with pytest.raises(ValueError, match=msg):
+                with tm.assert_produces_warning(DeprecationWarning):
+                    idxr[nd3]
 
     @pytest.mark.parametrize(
         "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__
@@ -146,16 +134,14 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
         nd3 = np.random.randint(5, size=(2, 2, 2))
 
         msg = (
-            r"Buffer has wrong number of dimensions \(expected 1, "
-            r"got 3\)|"
-            "The truth value of an array with more than one element is "
-            "ambiguous|"
-            "Only 1-dimensional input arrays are supported|"
+            r"Buffer has wrong number of dimensions \(expected 1,"
+            r" got 3\)|"
             "'pandas._libs.interval.IntervalTree' object has no attribute "
             "'set_value'|"  # AttributeError
             "unhashable type: 'numpy.ndarray'|"  # TypeError
             "No matching signature found|"  # TypeError
-            r"^\[\[\["  # pandas.core.indexing.IndexingError
+            r"^\[\[\[|"  # pandas.core.indexing.IndexingError
+            "Index data must be 1-dimensional"
         )
 
         if (idxr_id == "iloc") or (
@@ -176,10 +162,8 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
         ):
             idxr[nd3] = 0
         else:
-            with pytest.raises(
-                (ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError),
-                match=msg,
-            ):
+            err = (ValueError, AttributeError)
+            with pytest.raises(err, match=msg):
                 idxr[nd3] = 0
 
     def test_inf_upcast(self):

diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
@@ -136,7 +136,7 @@ def test_write_with_index(self):
 
         # column multi-index
         df.index = [0, 1, 2]
-        df.columns = (pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),)
+        df.columns = pd.MultiIndex.from_tuples([("a", 1)])
         self.check_error_on_write(df, ValueError)
 
     def test_path_pathlib(self):

diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
@@ -66,11 +66,10 @@ def test_registering_no_warning(self):
 
         # Set to the "warn" state, in case this isn't the first test run
         register_matplotlib_converters()
-        with tm.assert_produces_warning(None) as w:
+        with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
+            # GH#30588 DeprecationWarning from 2D indexing
             ax.plot(s.index, s.values)
 
-        assert len(w) == 0
-
     def test_pandas_plots_register(self):
         pytest.importorskip("matplotlib.pyplot")
         s = Series(range(12), index=date_range("2017", periods=12))
@@ -101,19 +100,16 @@ def test_option_no_warning(self):
 
         # Test without registering first, no warning
         with ctx:
-            with tm.assert_produces_warning(None) as w:
+            # GH#30588 DeprecationWarning from 2D indexing on Index
+            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
                 ax.plot(s.index, s.values)
 
-        assert len(w) == 0
-
         # Now test with registering
         register_matplotlib_converters()
         with ctx:
-            with tm.assert_produces_warning(None) as w:
+            with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
                 ax.plot(s.index, s.values)
 
-        assert len(w) == 0
-
     def test_registry_resets(self):
         units = pytest.importorskip("matplotlib.units")
         dates = pytest.importorskip("matplotlib.dates")

diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py
@@ -137,7 +137,9 @@ def test_first_last_valid(self, datetime_series):
         assert ts.last_valid_index().freq == ts.index.freq
 
     def test_mpl_compat_hack(self, datetime_series):
-        result = datetime_series[:, np.newaxis]
+        with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
+            # GH#30588 multi-dimensional indexing deprecated
+            result = datetime_series[:, np.newaxis]
         expected = datetime_series.values[:, np.newaxis]
         tm.assert_almost_equal(result, expected)