Skip to content

Commit 6167eaa

Browse files
authored
Vendor pandas to xarray conversion tests (#10187)
xref #9661
1 parent 61d4e26 commit 6167eaa

File tree

3 files changed

+261
-29
lines changed

3 files changed

+261
-29
lines changed

Diff for: xarray/compat/pdcompat.py

+27-28
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,37 @@
11
# For reference, here is a copy of the pandas copyright notice:
22

3-
# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
4-
# All rights reserved.
3+
# BSD 3-Clause License
54

6-
# Copyright (c) 2008-2011 AQR Capital Management, LLC
5+
# Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
76
# All rights reserved.
87

8+
# Copyright (c) 2011-2025, Open source contributors.
9+
910
# Redistribution and use in source and binary forms, with or without
10-
# modification, are permitted provided that the following conditions are
11-
# met:
12-
13-
# * Redistributions of source code must retain the above copyright
14-
# notice, this list of conditions and the following disclaimer.
15-
16-
# * Redistributions in binary form must reproduce the above
17-
# copyright notice, this list of conditions and the following
18-
# disclaimer in the documentation and/or other materials provided
19-
# with the distribution.
20-
21-
# * Neither the name of the copyright holder nor the names of any
22-
# contributors may be used to endorse or promote products derived
23-
# from this software without specific prior written permission.
24-
25-
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
26-
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27-
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28-
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29-
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30-
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31-
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32-
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33-
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34-
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
11+
# modification, are permitted provided that the following conditions are met:
12+
13+
# * Redistributions of source code must retain the above copyright notice, this
14+
# list of conditions and the following disclaimer.
15+
16+
# * Redistributions in binary form must reproduce the above copyright notice,
17+
# this list of conditions and the following disclaimer in the documentation
18+
# and/or other materials provided with the distribution.
19+
20+
# * Neither the name of the copyright holder nor the names of its
21+
# contributors may be used to endorse or promote products derived from
22+
# this software without specific prior written permission.
23+
24+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3533
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34+
3635
from __future__ import annotations
3736

3837
from enum import Enum

Diff for: xarray/core/extension_array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def __array_ufunc__(ufunc, method, *inputs, **kwargs):
106106
return ufunc(*inputs, **kwargs)
107107

108108
def __repr__(self):
109-
return f"{type(self)}(array={self.array!r})"
109+
return f"PandasExtensionArray(array={self.array!r})"
110110

111111
def __getattr__(self, attr: str) -> object:
112112
return getattr(self.array, attr)

Diff for: xarray/tests/test_pandas_to_xarray.py

+233
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# This file contains code vendored from pandas
2+
# For reference, here is a copy of the pandas copyright notice:
3+
4+
# BSD 3-Clause License
5+
6+
# Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
7+
# All rights reserved.
8+
9+
# Copyright (c) 2011-2025, Open source contributors.
10+
11+
# Redistribution and use in source and binary forms, with or without
12+
# modification, are permitted provided that the following conditions are met:
13+
14+
# * Redistributions of source code must retain the above copyright notice, this
15+
# list of conditions and the following disclaimer.
16+
17+
# * Redistributions in binary form must reproduce the above copyright notice,
18+
# this list of conditions and the following disclaimer in the documentation
19+
# and/or other materials provided with the distribution.
20+
21+
# * Neither the name of the copyright holder nor the names of its
22+
# contributors may be used to endorse or promote products derived from
23+
# this software without specific prior written permission.
24+
25+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
29+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35+
36+
import numpy as np
37+
import pandas as pd
38+
import pandas._testing as tm
39+
import pytest
40+
from pandas import (
41+
Categorical,
42+
CategoricalIndex,
43+
DataFrame,
44+
Index,
45+
IntervalIndex,
46+
MultiIndex,
47+
RangeIndex,
48+
Series,
49+
date_range,
50+
period_range,
51+
timedelta_range,
52+
)
53+
54+
indices_dict = {
55+
"object": Index([f"pandas_{i}" for i in range(10)], dtype=object),
56+
"string": Index([f"pandas_{i}" for i in range(10)], dtype="str"),
57+
"datetime": date_range("2020-01-01", periods=10),
58+
"datetime-tz": date_range("2020-01-01", periods=10, tz="US/Pacific"),
59+
"period": period_range("2020-01-01", periods=10, freq="D"),
60+
"timedelta": timedelta_range(start="1 day", periods=10, freq="D"),
61+
"range": RangeIndex(10),
62+
"int8": Index(np.arange(10), dtype="int8"),
63+
"int16": Index(np.arange(10), dtype="int16"),
64+
"int32": Index(np.arange(10), dtype="int32"),
65+
"int64": Index(np.arange(10), dtype="int64"),
66+
"uint8": Index(np.arange(10), dtype="uint8"),
67+
"uint16": Index(np.arange(10), dtype="uint16"),
68+
"uint32": Index(np.arange(10), dtype="uint32"),
69+
"uint64": Index(np.arange(10), dtype="uint64"),
70+
"float32": Index(np.arange(10), dtype="float32"),
71+
"float64": Index(np.arange(10), dtype="float64"),
72+
"bool-object": Index([True, False] * 5, dtype=object),
73+
"bool-dtype": Index([True, False] * 5, dtype=bool),
74+
"complex64": Index(
75+
np.arange(10, dtype="complex64") + 1.0j * np.arange(10, dtype="complex64")
76+
),
77+
"complex128": Index(
78+
np.arange(10, dtype="complex128") + 1.0j * np.arange(10, dtype="complex128")
79+
),
80+
"categorical": CategoricalIndex(list("abcd") * 2),
81+
"interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=11)),
82+
"empty": Index([]),
83+
# "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
84+
# "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
85+
# "multi": _create_multiindex(),
86+
"repeats": Index([0, 0, 1, 1, 2, 2]),
87+
"nullable_int": Index(np.arange(10), dtype="Int64"),
88+
"nullable_uint": Index(np.arange(10), dtype="UInt16"),
89+
"nullable_float": Index(np.arange(10), dtype="Float32"),
90+
"nullable_bool": Index(np.arange(10).astype(bool), dtype="boolean"),
91+
"string-python": Index(
92+
pd.array([f"pandas_{i}" for i in range(10)], dtype="string[python]")
93+
),
94+
}
95+
96+
97+
@pytest.fixture(
98+
params=[
99+
key for key, value in indices_dict.items() if not isinstance(value, MultiIndex)
100+
]
101+
)
102+
def index_flat(request):
103+
"""
104+
index fixture, but excluding MultiIndex cases.
105+
"""
106+
key = request.param
107+
return indices_dict[key].copy()
108+
109+
110+
@pytest.fixture
111+
def using_infer_string() -> bool:
112+
"""
113+
Fixture to check if infer string option is enabled.
114+
"""
115+
return pd.options.future.infer_string is True # type: ignore[union-attr]
116+
117+
118+
class TestDataFrameToXArray:
119+
@pytest.fixture
120+
def df(self):
121+
return DataFrame(
122+
{
123+
"a": list("abcd"),
124+
"b": list(range(1, 5)),
125+
"c": np.arange(3, 7).astype("u1"),
126+
"d": np.arange(4.0, 8.0, dtype="float64"),
127+
"e": [True, False, True, False],
128+
"f": Categorical(list("abcd")),
129+
"g": date_range("20130101", periods=4),
130+
"h": date_range("20130101", periods=4, tz="US/Eastern"),
131+
}
132+
)
133+
134+
@pytest.mark.xfail(reason="needs some work")
135+
def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
136+
index = index_flat
137+
# MultiIndex is tested in test_to_xarray_with_multiindex
138+
if len(index) == 0:
139+
pytest.skip("Test doesn't make sense for empty index")
140+
141+
from xarray import Dataset
142+
143+
df.index = index[:4]
144+
df.index.name = "foo"
145+
df.columns.name = "bar"
146+
result = df.to_xarray()
147+
assert result.sizes["foo"] == 4
148+
assert len(result.coords) == 1
149+
assert len(result.data_vars) == 8
150+
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
151+
assert isinstance(result, Dataset)
152+
153+
# idempotency
154+
# datetimes w/tz are preserved
155+
# column names are lost
156+
expected = df.copy()
157+
expected["f"] = expected["f"].astype(
158+
object if not using_infer_string else "str"
159+
)
160+
expected.columns.name = None
161+
tm.assert_frame_equal(result.to_dataframe(), expected)
162+
163+
def test_to_xarray_empty(self, df):
164+
from xarray import Dataset
165+
166+
df.index.name = "foo"
167+
result = df[0:0].to_xarray()
168+
assert result.sizes["foo"] == 0
169+
assert isinstance(result, Dataset)
170+
171+
def test_to_xarray_with_multiindex(self, df, using_infer_string):
172+
from xarray import Dataset
173+
174+
# MultiIndex
175+
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
176+
result = df.to_xarray()
177+
assert result.sizes["one"] == 1
178+
assert result.sizes["two"] == 4
179+
assert len(result.coords) == 2
180+
assert len(result.data_vars) == 8
181+
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
182+
assert isinstance(result, Dataset)
183+
184+
result = result.to_dataframe()
185+
expected = df.copy()
186+
expected["f"] = expected["f"].astype(
187+
object if not using_infer_string else "str"
188+
)
189+
expected.columns.name = None
190+
tm.assert_frame_equal(result, expected)
191+
192+
193+
class TestSeriesToXArray:
194+
def test_to_xarray_index_types(self, index_flat):
195+
index = index_flat
196+
# MultiIndex is tested in test_to_xarray_with_multiindex
197+
198+
from xarray import DataArray
199+
200+
ser = Series(range(len(index)), index=index, dtype="int64")
201+
ser.index.name = "foo"
202+
result = ser.to_xarray()
203+
repr(result)
204+
assert len(result) == len(index)
205+
assert len(result.coords) == 1
206+
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
207+
assert isinstance(result, DataArray)
208+
209+
# idempotency
210+
tm.assert_series_equal(result.to_series(), ser)
211+
212+
def test_to_xarray_empty(self):
213+
from xarray import DataArray
214+
215+
ser = Series([], dtype=object)
216+
ser.index.name = "foo"
217+
result = ser.to_xarray()
218+
assert len(result) == 0
219+
assert len(result.coords) == 1
220+
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
221+
assert isinstance(result, DataArray)
222+
223+
def test_to_xarray_with_multiindex(self):
224+
from xarray import DataArray
225+
226+
mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
227+
ser = Series(range(6), dtype="int64", index=mi)
228+
result = ser.to_xarray()
229+
assert len(result) == 2
230+
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
231+
assert isinstance(result, DataArray)
232+
res = result.to_series()
233+
tm.assert_series_equal(res, ser)

0 commit comments

Comments
 (0)