Skip to content

Commit f8e7e72

Browse files
author
Christoph Möhl
committed
added whatsnew and reformatted tests to be more readable
1 parent bbb979c commit f8e7e72

File tree

3 files changed

+77
-65
lines changed

3 files changed

+77
-65
lines changed

doc/source/whatsnew/v0.21.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ Sparse
123123

124124
Reshaping
125125
^^^^^^^^^
126-
126+
- Bug in ``pd.crosstab(normalize=True, margins=True)`` when at least one axis has a multi-index (:issue:`15150`).
127127

128128

129129
Numeric

pandas/core/reshape/pivot.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -511,16 +511,14 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
511511
df = DataFrame(data)
512512
df['__dummy__'] = 0
513513
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
514-
aggfunc=len, margins=False,
515-
margins_name=margins_name, dropna=dropna)
514+
aggfunc=len, margins=False, dropna=dropna)
516515
table = table.fillna(0).astype(np.int64)
517516

518517
else:
519518
data['__dummy__'] = values
520519
df = DataFrame(data)
521520
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
522-
aggfunc=aggfunc, margins=False,
523-
margins_name=margins_name, dropna=dropna)
521+
aggfunc=aggfunc, margins=False, dropna=dropna)
524522

525523
if margins:
526524
_check_margins_name(margins_name, table)

pandas/tests/reshape/test_pivot.py

Lines changed: 74 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,66 +1328,80 @@ def test_crosstab_norm_margins_with_multiindex(self):
13281328
b = np.array(['one', 'one', 'two', 'one', 'two', 'two'])
13291329
c = np.array(['dull', 'shiny', 'dull', 'dull', 'dull', 'shiny'])
13301330
d = np.array(['a', 'a', 'b', 'a', 'b', 'b'])
1331-
expected_col_colnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
1332-
['', 'a', 'b']],
1333-
labels=[[1, 1, 2, 2, 0],
1334-
[1, 2, 1, 2, 0]],
1335-
names=['col_0', 'col_1'])
1336-
expected_index_colnorm = MultiIndex(levels=[['All', 'bar', 'foo'],
1337-
['', 'one', 'two']],
1338-
labels=[[1, 1, 2, 2],
1339-
[1, 2, 1, 2]],
1340-
names=['row_0', 'row_1'])
1341-
expected_data_colnorm = np.array([[.5, 0., 1., 0., .333333],
1342-
[0., .5, 0., 0., .166667],
1343-
[.5, 0., 0., 0., .166667],
1344-
[0., .5, 0., 1., .333333]])
1345-
expected_colnorm = pd.DataFrame(expected_data_colnorm,
1346-
index=expected_index_colnorm,
1347-
columns=expected_col_colnorm)
1348-
expected_col_indexnorm = MultiIndex(levels=[['All', 'dull', 'shiny'],
1349-
['', 'a', 'b']],
1350-
labels=[[1, 1, 2, 2],
1351-
[1, 2, 1, 2]],
1352-
names=['col_0', 'col_1'])
1353-
expected_index_indexnorm = MultiIndex(levels=[['All', 'bar', 'foo'],
1354-
['', 'one', 'two']],
1355-
labels=[[1, 1, 2, 2, 0],
1356-
[1, 2, 1, 2, 0]],
1357-
names=['row_0', 'row_1'])
1358-
expected_data_indexnorm = np.array([[.5, 0., .5, 0.],
1359-
[0., 1., 0., 0.],
1360-
[1., 0., 0., 0.],
1361-
[0., .5, 0., .5],
1362-
[.33333333, .33333333,
1363-
.16666667, .16666667]])
1364-
expected_indexnorm = pd.DataFrame(expected_data_indexnorm,
1365-
index=expected_index_indexnorm,
1366-
columns=expected_col_indexnorm)
1367-
expected_data_allnorm = np.array([[0.16666667, 0., .16666667,
1368-
0., .33333333],
1369-
[0., .16666667, 0.,
1370-
0., .16666667],
1371-
[.16666667, 0., 0.,
1372-
0., .16666667],
1373-
[0., .16666667, 0.,
1374-
.16666667, .33333333],
1375-
[0.33333333, .33333333, .16666667,
1376-
.16666667, 1.]])
1377-
expected_allnorm = pd.DataFrame(expected_data_allnorm,
1378-
index=expected_index_indexnorm,
1379-
columns=expected_col_colnorm)
1380-
1381-
result_colnorm = pd.crosstab([a, b], [c, d], normalize='columns',
1382-
margins=True)
1383-
result_indexnorm = pd.crosstab([a, b], [c, d], normalize='index',
1384-
margins=True)
1385-
result_allnorm = pd.crosstab([a, b], [c, d], normalize='all',
1386-
margins=True)
1387-
1388-
tm.assert_frame_equal(result_colnorm, expected_colnorm)
1389-
tm.assert_frame_equal(result_indexnorm, expected_indexnorm)
1390-
tm.assert_frame_equal(result_allnorm, expected_allnorm)
1331+
1332+
# test for normalize == 'columns'
1333+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1334+
['', 'a', 'b']],
1335+
labels=[[1, 1, 2, 2, 0],
1336+
[1, 2, 1, 2, 0]],
1337+
names=['col_0', 'col_1'])
1338+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1339+
['', 'one', 'two']],
1340+
labels=[[1, 1, 2, 2],
1341+
[1, 2, 1, 2]],
1342+
names=['row_0', 'row_1'])
1343+
expected_data = np.array([[.5, 0., 1., 0., .333333],
1344+
[0., .5, 0., 0., .166667],
1345+
[.5, 0., 0., 0., .166667],
1346+
[0., .5, 0., 1., .333333]])
1347+
expected = pd.DataFrame(expected_data,
1348+
index=expected_index,
1349+
columns=expected_columns)
1350+
result = pd.crosstab([a, b], [c, d], normalize='columns',
1351+
margins=True)
1352+
tm.assert_frame_equal(result, expected)
1353+
1354+
# test for normalize == 'index'
1355+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1356+
['', 'a', 'b']],
1357+
labels=[[1, 1, 2, 2],
1358+
[1, 2, 1, 2]],
1359+
names=['col_0', 'col_1'])
1360+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1361+
['', 'one', 'two']],
1362+
labels=[[1, 1, 2, 2, 0],
1363+
[1, 2, 1, 2, 0]],
1364+
names=['row_0', 'row_1'])
1365+
expected_data = np.array([[.5, 0., .5, 0.],
1366+
[0., 1., 0., 0.],
1367+
[1., 0., 0., 0.],
1368+
[0., .5, 0., .5],
1369+
[.33333333, .33333333,
1370+
.16666667, .16666667]])
1371+
expected = pd.DataFrame(expected_data,
1372+
index=expected_index,
1373+
columns=expected_columns)
1374+
result = pd.crosstab([a, b], [c, d], normalize='index',
1375+
margins=True)
1376+
tm.assert_frame_equal(result, expected)
1377+
1378+
# test for normalize == 'all'
1379+
expected_columns = MultiIndex(levels=[['All', 'dull', 'shiny'],
1380+
['', 'a', 'b']],
1381+
labels=[[1, 1, 2, 2, 0],
1382+
[1, 2, 1, 2, 0]],
1383+
names=['col_0', 'col_1'])
1384+
expected_index = MultiIndex(levels=[['All', 'bar', 'foo'],
1385+
['', 'one', 'two']],
1386+
labels=[[1, 1, 2, 2, 0],
1387+
[1, 2, 1, 2, 0]],
1388+
names=['row_0', 'row_1'])
1389+
expected_data = np.array([[0.16666667, 0., .16666667,
1390+
0., .33333333],
1391+
[0., .16666667, 0.,
1392+
0., .16666667],
1393+
[.16666667, 0., 0.,
1394+
0., .16666667],
1395+
[0., .16666667, 0.,
1396+
.16666667, .33333333],
1397+
[0.33333333, .33333333, .16666667,
1398+
.16666667, 1.]])
1399+
expected = pd.DataFrame(expected_data,
1400+
index=expected_index,
1401+
columns=expected_columns)
1402+
result = pd.crosstab([a, b], [c, d], normalize='all',
1403+
margins=True)
1404+
tm.assert_frame_equal(result, expected)
13911405

13921406
def test_crosstab_with_empties(self):
13931407
# Check handling of empties

0 commit comments

Comments
 (0)