Skip to content

Commit 8347ff8

Browse files
rockgjreback
authored andcommittedJan 12, 2018
BUG: Stack/unstack do not return subclassed objects (GH15563) (#18929)
1 parent 5853b79 commit 8347ff8

File tree

5 files changed

+318
-21
lines changed

5 files changed

+318
-21
lines changed
 

‎doc/source/whatsnew/v0.23.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,8 @@ Reshaping
452452
- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
453453
- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)
454454
- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)
455+
- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`)
456+
-
455457

456458
Numeric
457459
^^^^^^^

‎pandas/core/reshape/melt.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
8080
mdata[col] = np.asanyarray(frame.columns
8181
._get_level_values(i)).repeat(N)
8282

83-
from pandas import DataFrame
84-
return DataFrame(mdata, columns=mcolumns)
83+
return frame._constructor(mdata, columns=mcolumns)
8584

8685

8786
def lreshape(data, groups, dropna=True, label=None):
@@ -152,8 +151,7 @@ def lreshape(data, groups, dropna=True, label=None):
152151
if not mask.all():
153152
mdata = {k: v[mask] for k, v in compat.iteritems(mdata)}
154153

155-
from pandas import DataFrame
156-
return DataFrame(mdata, columns=id_cols + pivot_cols)
154+
return data._constructor(mdata, columns=id_cols + pivot_cols)
157155

158156

159157
def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):

‎pandas/core/reshape/reshape.py

+35-12
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,23 @@ class _Unstacker(object):
3737
3838
Parameters
3939
----------
40+
values : ndarray
41+
Values of DataFrame to "Unstack"
42+
index : object
43+
Pandas ``Index``
4044
level : int or str, default last level
4145
Level to "unstack". Accepts a name for the level.
46+
value_columns : Index, optional
47+
Pandas ``Index`` or ``MultiIndex`` object if unstacking a DataFrame
48+
fill_value : scalar, optional
49+
Default value to fill in missing values if subgroups do not have the
50+
same set of labels. By default, missing values will be replaced with
51+
the default fill value for that data type, NaN for float, NaT for
52+
datetimelike, etc. For integer types, by default data will converted to
53+
float and missing values will be set to NaN.
54+
constructor : object
55+
Pandas ``DataFrame`` or subclass used to create unstacked
56+
response. If None, DataFrame or SparseDataFrame will be used.
4257
4358
Examples
4459
--------
@@ -69,7 +84,7 @@ class _Unstacker(object):
6984
"""
7085

7186
def __init__(self, values, index, level=-1, value_columns=None,
72-
fill_value=None):
87+
fill_value=None, constructor=None):
7388

7489
self.is_categorical = None
7590
self.is_sparse = is_sparse(values)
@@ -86,6 +101,14 @@ def __init__(self, values, index, level=-1, value_columns=None,
86101
self.value_columns = value_columns
87102
self.fill_value = fill_value
88103

104+
if constructor is None:
105+
if self.is_sparse:
106+
self.constructor = SparseDataFrame
107+
else:
108+
self.constructor = DataFrame
109+
else:
110+
self.constructor = constructor
111+
89112
if value_columns is None and values.shape[1] != 1: # pragma: no cover
90113
raise ValueError('must pass column labels for multi-column data')
91114

@@ -173,8 +196,7 @@ def get_result(self):
173196
ordered=ordered)
174197
for i in range(values.shape[-1])]
175198

176-
klass = SparseDataFrame if self.is_sparse else DataFrame
177-
return klass(values, index=index, columns=columns)
199+
return self.constructor(values, index=index, columns=columns)
178200

179201
def get_new_values(self):
180202
values = self.values
@@ -374,8 +396,9 @@ def pivot(self, index=None, columns=None, values=None):
374396
index = self.index
375397
else:
376398
index = self[index]
377-
indexed = Series(self[values].values,
378-
index=MultiIndex.from_arrays([index, self[columns]]))
399+
indexed = self._constructor_sliced(
400+
self[values].values,
401+
index=MultiIndex.from_arrays([index, self[columns]]))
379402
return indexed.unstack(columns)
380403

381404

@@ -461,7 +484,8 @@ def unstack(obj, level, fill_value=None):
461484
return obj.T.stack(dropna=False)
462485
else:
463486
unstacker = _Unstacker(obj.values, obj.index, level=level,
464-
fill_value=fill_value)
487+
fill_value=fill_value,
488+
constructor=obj._constructor_expanddim)
465489
return unstacker.get_result()
466490

467491

@@ -470,12 +494,12 @@ def _unstack_frame(obj, level, fill_value=None):
470494
unstacker = partial(_Unstacker, index=obj.index,
471495
level=level, fill_value=fill_value)
472496
blocks = obj._data.unstack(unstacker)
473-
klass = type(obj)
474-
return klass(blocks)
497+
return obj._constructor(blocks)
475498
else:
476499
unstacker = _Unstacker(obj.values, obj.index, level=level,
477500
value_columns=obj.columns,
478-
fill_value=fill_value)
501+
fill_value=fill_value,
502+
constructor=obj._constructor)
479503
return unstacker.get_result()
480504

481505

@@ -528,8 +552,7 @@ def factorize(index):
528552
new_values = new_values[mask]
529553
new_index = new_index[mask]
530554

531-
klass = type(frame)._constructor_sliced
532-
return klass(new_values, index=new_index)
555+
return frame._constructor_sliced(new_values, index=new_index)
533556

534557

535558
def stack_multiple(frame, level, dropna=True):
@@ -676,7 +699,7 @@ def _convert_level_number(level_num, columns):
676699
new_index = MultiIndex(levels=new_levels, labels=new_labels,
677700
names=new_names, verify_integrity=False)
678701

679-
result = DataFrame(new_data, index=new_index, columns=new_columns)
702+
result = frame._constructor(new_data, index=new_index, columns=new_columns)
680703

681704
# more efficient way to go about this? can do the whole masking biz but
682705
# will only save a small amount of time...

‎pandas/tests/frame/test_subclass.py

+268-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from warnings import catch_warnings
66
import numpy as np
77

8-
from pandas import DataFrame, Series, MultiIndex, Panel
8+
from pandas import DataFrame, Series, MultiIndex, Panel, Index
99
import pandas as pd
1010
import pandas.util.testing as tm
1111

@@ -247,3 +247,270 @@ def test_subclass_sparse_transpose(self):
247247
[2, 5],
248248
[3, 6]])
249249
tm.assert_sp_frame_equal(ossdf.T, essdf)
250+
251+
def test_subclass_stack(self):
252+
# GH 15564
253+
df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
254+
index=['a', 'b', 'c'],
255+
columns=['X', 'Y', 'Z'])
256+
257+
res = df.stack()
258+
exp = tm.SubclassedSeries(
259+
[1, 2, 3, 4, 5, 6, 7, 8, 9],
260+
index=[list('aaabbbccc'), list('XYZXYZXYZ')])
261+
262+
tm.assert_series_equal(res, exp)
263+
264+
def test_subclass_stack_multi(self):
265+
# GH 15564
266+
df = tm.SubclassedDataFrame([
267+
[10, 11, 12, 13],
268+
[20, 21, 22, 23],
269+
[30, 31, 32, 33],
270+
[40, 41, 42, 43]],
271+
index=MultiIndex.from_tuples(
272+
list(zip(list('AABB'), list('cdcd'))),
273+
names=['aaa', 'ccc']),
274+
columns=MultiIndex.from_tuples(
275+
list(zip(list('WWXX'), list('yzyz'))),
276+
names=['www', 'yyy']))
277+
278+
exp = tm.SubclassedDataFrame([
279+
[10, 12],
280+
[11, 13],
281+
[20, 22],
282+
[21, 23],
283+
[30, 32],
284+
[31, 33],
285+
[40, 42],
286+
[41, 43]],
287+
index=MultiIndex.from_tuples(list(zip(
288+
list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))),
289+
names=['aaa', 'ccc', 'yyy']),
290+
columns=Index(['W', 'X'], name='www'))
291+
292+
res = df.stack()
293+
tm.assert_frame_equal(res, exp)
294+
295+
res = df.stack('yyy')
296+
tm.assert_frame_equal(res, exp)
297+
298+
exp = tm.SubclassedDataFrame([
299+
[10, 11],
300+
[12, 13],
301+
[20, 21],
302+
[22, 23],
303+
[30, 31],
304+
[32, 33],
305+
[40, 41],
306+
[42, 43]],
307+
index=MultiIndex.from_tuples(list(zip(
308+
list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))),
309+
names=['aaa', 'ccc', 'www']),
310+
columns=Index(['y', 'z'], name='yyy'))
311+
312+
res = df.stack('www')
313+
tm.assert_frame_equal(res, exp)
314+
315+
def test_subclass_stack_multi_mixed(self):
316+
# GH 15564
317+
df = tm.SubclassedDataFrame([
318+
[10, 11, 12.0, 13.0],
319+
[20, 21, 22.0, 23.0],
320+
[30, 31, 32.0, 33.0],
321+
[40, 41, 42.0, 43.0]],
322+
index=MultiIndex.from_tuples(
323+
list(zip(list('AABB'), list('cdcd'))),
324+
names=['aaa', 'ccc']),
325+
columns=MultiIndex.from_tuples(
326+
list(zip(list('WWXX'), list('yzyz'))),
327+
names=['www', 'yyy']))
328+
329+
exp = tm.SubclassedDataFrame([
330+
[10, 12.0],
331+
[11, 13.0],
332+
[20, 22.0],
333+
[21, 23.0],
334+
[30, 32.0],
335+
[31, 33.0],
336+
[40, 42.0],
337+
[41, 43.0]],
338+
index=MultiIndex.from_tuples(list(zip(
339+
list('AAAABBBB'), list('ccddccdd'), list('yzyzyzyz'))),
340+
names=['aaa', 'ccc', 'yyy']),
341+
columns=Index(['W', 'X'], name='www'))
342+
343+
res = df.stack()
344+
tm.assert_frame_equal(res, exp)
345+
346+
res = df.stack('yyy')
347+
tm.assert_frame_equal(res, exp)
348+
349+
exp = tm.SubclassedDataFrame([
350+
[10.0, 11.0],
351+
[12.0, 13.0],
352+
[20.0, 21.0],
353+
[22.0, 23.0],
354+
[30.0, 31.0],
355+
[32.0, 33.0],
356+
[40.0, 41.0],
357+
[42.0, 43.0]],
358+
index=MultiIndex.from_tuples(list(zip(
359+
list('AAAABBBB'), list('ccddccdd'), list('WXWXWXWX'))),
360+
names=['aaa', 'ccc', 'www']),
361+
columns=Index(['y', 'z'], name='yyy'))
362+
363+
res = df.stack('www')
364+
tm.assert_frame_equal(res, exp)
365+
366+
def test_subclass_unstack(self):
367+
# GH 15564
368+
df = tm.SubclassedDataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
369+
index=['a', 'b', 'c'],
370+
columns=['X', 'Y', 'Z'])
371+
372+
res = df.unstack()
373+
exp = tm.SubclassedSeries(
374+
[1, 4, 7, 2, 5, 8, 3, 6, 9],
375+
index=[list('XXXYYYZZZ'), list('abcabcabc')])
376+
377+
tm.assert_series_equal(res, exp)
378+
379+
def test_subclass_unstack_multi(self):
380+
# GH 15564
381+
df = tm.SubclassedDataFrame([
382+
[10, 11, 12, 13],
383+
[20, 21, 22, 23],
384+
[30, 31, 32, 33],
385+
[40, 41, 42, 43]],
386+
index=MultiIndex.from_tuples(
387+
list(zip(list('AABB'), list('cdcd'))),
388+
names=['aaa', 'ccc']),
389+
columns=MultiIndex.from_tuples(
390+
list(zip(list('WWXX'), list('yzyz'))),
391+
names=['www', 'yyy']))
392+
393+
exp = tm.SubclassedDataFrame([
394+
[10, 20, 11, 21, 12, 22, 13, 23],
395+
[30, 40, 31, 41, 32, 42, 33, 43]],
396+
index=Index(['A', 'B'], name='aaa'),
397+
columns=MultiIndex.from_tuples(list(zip(
398+
list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))),
399+
names=['www', 'yyy', 'ccc']))
400+
401+
res = df.unstack()
402+
tm.assert_frame_equal(res, exp)
403+
404+
res = df.unstack('ccc')
405+
tm.assert_frame_equal(res, exp)
406+
407+
exp = tm.SubclassedDataFrame([
408+
[10, 30, 11, 31, 12, 32, 13, 33],
409+
[20, 40, 21, 41, 22, 42, 23, 43]],
410+
index=Index(['c', 'd'], name='ccc'),
411+
columns=MultiIndex.from_tuples(list(zip(
412+
list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))),
413+
names=['www', 'yyy', 'aaa']))
414+
415+
res = df.unstack('aaa')
416+
tm.assert_frame_equal(res, exp)
417+
418+
def test_subclass_unstack_multi_mixed(self):
419+
# GH 15564
420+
df = tm.SubclassedDataFrame([
421+
[10, 11, 12.0, 13.0],
422+
[20, 21, 22.0, 23.0],
423+
[30, 31, 32.0, 33.0],
424+
[40, 41, 42.0, 43.0]],
425+
index=MultiIndex.from_tuples(
426+
list(zip(list('AABB'), list('cdcd'))),
427+
names=['aaa', 'ccc']),
428+
columns=MultiIndex.from_tuples(
429+
list(zip(list('WWXX'), list('yzyz'))),
430+
names=['www', 'yyy']))
431+
432+
exp = tm.SubclassedDataFrame([
433+
[10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0],
434+
[30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0]],
435+
index=Index(['A', 'B'], name='aaa'),
436+
columns=MultiIndex.from_tuples(list(zip(
437+
list('WWWWXXXX'), list('yyzzyyzz'), list('cdcdcdcd'))),
438+
names=['www', 'yyy', 'ccc']))
439+
440+
res = df.unstack()
441+
tm.assert_frame_equal(res, exp)
442+
443+
res = df.unstack('ccc')
444+
tm.assert_frame_equal(res, exp)
445+
446+
exp = tm.SubclassedDataFrame([
447+
[10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0],
448+
[20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0]],
449+
index=Index(['c', 'd'], name='ccc'),
450+
columns=MultiIndex.from_tuples(list(zip(
451+
list('WWWWXXXX'), list('yyzzyyzz'), list('ABABABAB'))),
452+
names=['www', 'yyy', 'aaa']))
453+
454+
res = df.unstack('aaa')
455+
tm.assert_frame_equal(res, exp)
456+
457+
def test_subclass_pivot(self):
458+
# GH 15564
459+
df = tm.SubclassedDataFrame({
460+
'index': ['A', 'B', 'C', 'C', 'B', 'A'],
461+
'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
462+
'values': [1., 2., 3., 3., 2., 1.]})
463+
464+
pivoted = df.pivot(
465+
index='index', columns='columns', values='values')
466+
467+
expected = tm.SubclassedDataFrame({
468+
'One': {'A': 1., 'B': 2., 'C': 3.},
469+
'Two': {'A': 1., 'B': 2., 'C': 3.}})
470+
471+
expected.index.name, expected.columns.name = 'index', 'columns'
472+
473+
tm.assert_frame_equal(pivoted, expected)
474+
475+
def test_subclassed_melt(self):
476+
# GH 15564
477+
cheese = tm.SubclassedDataFrame({
478+
'first': ['John', 'Mary'],
479+
'last': ['Doe', 'Bo'],
480+
'height': [5.5, 6.0],
481+
'weight': [130, 150]})
482+
483+
melted = pd.melt(cheese, id_vars=['first', 'last'])
484+
485+
expected = tm.SubclassedDataFrame([
486+
['John', 'Doe', 'height', 5.5],
487+
['Mary', 'Bo', 'height', 6.0],
488+
['John', 'Doe', 'weight', 130],
489+
['Mary', 'Bo', 'weight', 150]],
490+
columns=['first', 'last', 'variable', 'value'])
491+
492+
tm.assert_frame_equal(melted, expected)
493+
494+
def test_subclassed_wide_to_long(self):
495+
# GH 9762
496+
497+
np.random.seed(123)
498+
x = np.random.randn(3)
499+
df = tm.SubclassedDataFrame({
500+
"A1970": {0: "a", 1: "b", 2: "c"},
501+
"A1980": {0: "d", 1: "e", 2: "f"},
502+
"B1970": {0: 2.5, 1: 1.2, 2: .7},
503+
"B1980": {0: 3.2, 1: 1.3, 2: .1},
504+
"X": dict(zip(range(3), x))})
505+
506+
df["id"] = df.index
507+
exp_data = {"X": x.tolist() + x.tolist(),
508+
"A": ['a', 'b', 'c', 'd', 'e', 'f'],
509+
"B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
510+
"year": [1970, 1970, 1970, 1980, 1980, 1980],
511+
"id": [0, 1, 2, 0, 1, 2]}
512+
expected = tm.SubclassedDataFrame(exp_data)
513+
expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
514+
long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")
515+
516+
tm.assert_frame_equal(long_frame, expected)

‎pandas/tests/series/test_subclass.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,31 @@ def test_indexing_sliced(self):
1313
res = s.loc[['a', 'b']]
1414
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
1515
tm.assert_series_equal(res, exp)
16-
assert isinstance(res, tm.SubclassedSeries)
1716

1817
res = s.iloc[[2, 3]]
1918
exp = tm.SubclassedSeries([3, 4], index=list('cd'))
2019
tm.assert_series_equal(res, exp)
21-
assert isinstance(res, tm.SubclassedSeries)
2220

2321
res = s.loc[['a', 'b']]
2422
exp = tm.SubclassedSeries([1, 2], index=list('ab'))
2523
tm.assert_series_equal(res, exp)
26-
assert isinstance(res, tm.SubclassedSeries)
2724

2825
def test_to_frame(self):
2926
s = tm.SubclassedSeries([1, 2, 3, 4], index=list('abcd'), name='xxx')
3027
res = s.to_frame()
3128
exp = tm.SubclassedDataFrame({'xxx': [1, 2, 3, 4]}, index=list('abcd'))
3229
tm.assert_frame_equal(res, exp)
33-
assert isinstance(res, tm.SubclassedDataFrame)
30+
31+
def test_subclass_unstack(self):
32+
# GH 15564
33+
s = tm.SubclassedSeries(
34+
[1, 2, 3, 4], index=[list('aabb'), list('xyxy')])
35+
36+
res = s.unstack()
37+
exp = tm.SubclassedDataFrame(
38+
{'x': [1, 3], 'y': [2, 4]}, index=['a', 'b'])
39+
40+
tm.assert_frame_equal(res, exp)
3441

3542

3643
class TestSparseSeriesSubclassing(object):

0 commit comments

Comments
 (0)
Please sign in to comment.