Skip to content

Commit 3139eb3

Browse files
committed
Implemented NDFrame.argsort() and NDFrame.ordering().
1 parent 247fe07 commit 3139eb3

File tree

5 files changed

+134
-55
lines changed

5 files changed

+134
-55
lines changed

doc/source/api.rst

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,9 @@ Reshaping, sorting
398398
.. autosummary::
399399
:toctree: generated/
400400

401-
Series.argsort
402401
Series.reorder_levels
402+
Series.argsort
403+
Series.ordering
403404
Series.sort_values
404405
Series.sort_index
405406
Series.sortlevel
@@ -909,6 +910,8 @@ Reshaping, sorting, transposing
909910

910911
DataFrame.pivot
911912
DataFrame.reorder_levels
913+
DataFrame.argsort
914+
DataFrame.ordering
912915
DataFrame.sort_values
913916
DataFrame.sort_index
914917
DataFrame.sortlevel
@@ -1181,6 +1184,9 @@ Reshaping, sorting, transposing
11811184
.. autosummary::
11821185
:toctree: generated/
11831186

1187+
Panel.argsort
1188+
Panel.ordering
1189+
Panel.sort_values
11841190
Panel.sort_index
11851191
Panel.swaplevel
11861192
Panel.transpose
@@ -1271,6 +1277,15 @@ Conversion
12711277
Panel4D.isnull
12721278
Panel4D.notnull
12731279

1280+
Reshaping, sorting, transposing
1281+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1282+
.. autosummary::
1283+
:toctree: generated/
1284+
1285+
Panel4D.argsort
1286+
Panel4D.ordering
1287+
Panel4D.sort_values
1288+
12741289
.. _api.index:
12751290

12761291
Index

pandas/core/generic.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ def _get_axis_number(self, axis):
307307
if com.is_integer(axis):
308308
if axis in self._AXIS_NAMES:
309309
return axis
310+
elif self.ndim + axis in self._AXIS_NAMES:
311+
return self.ndim + axis
310312
else:
311313
try:
312314
return self._AXIS_NUMBERS[axis]
@@ -931,6 +933,68 @@ def to_dense(self):
931933
# compat
932934
return self
933935

936+
# ----------------------------------------------------------------------
937+
# sorting
938+
939+
_shared_docs['argsort'] = """
940+
Returns the indices that would sort the %(klass)s.
941+
Equivalent to ``self.values.argsort(axis, kind, order)``.
942+
943+
Parameters
944+
----------
945+
%(argsort_args)s
946+
947+
Returns
948+
-------
949+
index_array : numpy.ndarray
950+
Array of indices that sort the %(klass)s along the specified axis.
951+
952+
See also
953+
--------
954+
numpy.ndarray.argsort
955+
"""
956+
957+
_shared_doc_kwargs['argsort_args'] = """
958+
axis : int or axis name, default -1
959+
Axis along which to sort.
960+
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
961+
Sorting algorithm. See np.sort for more information.
962+
'mergesort' is the only stable algorithm.
963+
order : ignored
964+
"""
965+
966+
@Appender(_shared_docs['argsort'] % _shared_doc_kwargs)
967+
def argsort(self, axis=-1, kind='quicksort', order=None):
968+
return self.values.argsort(self._get_axis_number(axis), kind, order)
969+
970+
_shared_docs['ordering'] = """
971+
Returns the order of each entry in the %(klass)s along the specified axis.
972+
973+
Parameters
974+
----------
975+
%(argsort_args)s
976+
fill_value : default -1
977+
Value to place in locations of NA/null values.
978+
979+
Returns
980+
-------
981+
ordering : %(klass)s
982+
%(klass)s with the same shape and axes, with values equal to
983+
the order of each entry along the specified axis.
984+
985+
See also
986+
--------
987+
%(klass)s.argsort
988+
"""
989+
990+
@Appender(_shared_docs['ordering'] % _shared_doc_kwargs)
991+
def ordering(self, axis=-1, kind='quicksort', order=None, fill_value=-1):
992+
axis = self._get_axis_number(axis)
993+
new_values = self.argsort(axis, kind, order).argsort(axis, kind, order)
994+
result = self._constructor(new_values, *self.axes)
995+
result[self.isnull()] = fill_value
996+
return result
997+
934998
# ----------------------------------------------------------------------
935999
# Picklability
9361000

pandas/core/series.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,42 +1802,6 @@ def order(self, na_last=None, ascending=True, kind='quicksort',
18021802
return self.sort_values(ascending=ascending, kind=kind,
18031803
na_position=na_position, inplace=inplace)
18041804

1805-
def argsort(self, axis=0, kind='quicksort', order=None):
1806-
"""
1807-
Overrides ndarray.argsort. Argsorts the value, omitting NA/null values,
1808-
and places the result in the same locations as the non-NA values
1809-
1810-
Parameters
1811-
----------
1812-
axis : int (can only be zero)
1813-
kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
1814-
Choice of sorting algorithm. See np.sort for more
1815-
information. 'mergesort' is the only stable algorithm
1816-
order : ignored
1817-
1818-
Returns
1819-
-------
1820-
argsorted : Series, with -1 indicated where nan values are present
1821-
1822-
See also
1823-
--------
1824-
numpy.ndarray.argsort
1825-
"""
1826-
values = self._values
1827-
mask = isnull(values)
1828-
1829-
if mask.any():
1830-
result = Series(-1, index=self.index, name=self.name,
1831-
dtype='int64')
1832-
notmask = ~mask
1833-
result[notmask] = np.argsort(values[notmask], kind=kind)
1834-
return self._constructor(result,
1835-
index=self.index).__finalize__(self)
1836-
else:
1837-
return self._constructor(
1838-
np.argsort(values, kind=kind), index=self.index,
1839-
dtype='int64').__finalize__(self)
1840-
18411805
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
18421806
False: 'first'})
18431807
def nlargest(self, n=5, keep='first'):

pandas/tests/frame/test_sorting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def test_sort_index(self):
9393

9494
# by column
9595
sorted_df = frame.sort_values(by='A')
96-
indexer = frame['A'].argsort().values
96+
indexer = frame['A'].argsort()
9797
expected = frame.ix[frame.index[indexer]]
9898
assert_frame_equal(sorted_df, expected)
9999

pandas/tests/series/test_analytics.py

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import numpy as np
1212
import pandas as pd
1313

14-
from pandas import (Index, Series, DataFrame, isnull, notnull, bdate_range,
15-
date_range, _np_version_under1p9)
14+
from pandas import (Index, Series, DataFrame, Panel, Panel4D, isnull, notnull,
15+
bdate_range, date_range, _np_version_under1p9)
1616
from pandas.core.index import MultiIndex
1717
from pandas.tseries.index import Timestamp
1818
from pandas.tseries.tdi import Timedelta
@@ -262,11 +262,7 @@ def test_kurt(self):
262262
self.assertEqual(0, s.kurt())
263263
self.assertTrue((df.kurt() == 0).all())
264264

265-
def test_argsort(self):
266-
self._check_accum_op('argsort')
267-
argsorted = self.ts.argsort()
268-
self.assertTrue(issubclass(argsorted.dtype.type, np.integer))
269-
265+
def test_argsort_timestamps(self):
270266
# GH 2967 (introduced bug in 0.11-dev I think)
271267
s = Series([Timestamp('201301%02d' % (i + 1)) for i in range(5)])
272268
self.assertEqual(s.dtype, 'datetime64[ns]')
@@ -275,24 +271,64 @@ def test_argsort(self):
275271
self.assertTrue(isnull(shifted[4]))
276272

277273
result = s.argsort()
278-
expected = Series(lrange(5), dtype='int64')
274+
expected = np.arange(5, dtype=np.int64)
279275
assert_series_equal(result, expected)
280276

281277
result = shifted.argsort()
282-
expected = Series(lrange(4) + [-1], dtype='int64')
278+
expected = np.arange(5, dtype=np.int64)
283279
assert_series_equal(result, expected)
284280

285-
def test_argsort_stable(self):
281+
def test_argsort_and_ordering(self):
282+
argsorted = self.ts.argsort()
283+
self.assertTrue(issubclass(argsorted.dtype.type, np.integer))
284+
286285
s = Series(np.random.randint(0, 100, size=10000))
287-
mindexer = s.argsort(kind='mergesort')
288-
qindexer = s.argsort()
286+
s[::21] = nan
287+
df = DataFrame(s.values.reshape(100, 100))
288+
p = Panel(s.values.reshape(100, 10, 10))
289+
p4d = Panel4D(s.values.reshape(10, 10, 10, 10))
290+
291+
for x in [s, df, p, p4d]:
292+
for axis in [-1] + list(x._AXIS_NAMES.keys()) + \
293+
list(x._AXIS_NUMBERS.keys()):
294+
for kind in ['quicksort', 'mergesort', 'heapsort']:
295+
296+
result = x.argsort(axis=axis, kind=kind)
297+
expected = x.values.argsort(axis=x._get_axis_number(axis),
298+
kind=kind)
299+
self.assert_numpy_array_equal(result, expected)
300+
301+
result = x.ordering(axis=axis, kind=kind)
302+
expected = x._constructor(
303+
expected.argsort(axis=x._get_axis_number(axis),
304+
kind=kind),
305+
*x.axes)
306+
expected[x.isnull()] = -1
307+
self.assertEqual(result, expected)
308+
309+
s = Series([1, 5, nan, 0, 4], index=list('abcde'))
310+
result = s.argsort()
311+
expected = np.array([3, 0, 4, 1, 2], dtype=np.int64)
312+
self.assert_numpy_array_equal(result, expected)
289313

290-
mexpected = np.argsort(s.values, kind='mergesort')
291-
qexpected = np.argsort(s.values, kind='quicksort')
314+
result = s.ordering()
315+
expected = Series([1, 3, -1, 0, 2], index=list('abcde'))
316+
self.assert_series_equal(result, expected)
317+
318+
df = DataFrame([[1, 5, nan, 0, 4],
319+
[8, 2, 6, 9, 7]],
320+
index=list('xy'), columns=list('abcde'))
321+
result = df.argsort()
322+
expected = np.array([[3, 0, 4, 1, 2],
323+
[1, 2, 4, 0, 3]],
324+
dtype=np.int64)
325+
self.assert_numpy_array_equal(result, expected)
292326

293-
self.assert_numpy_array_equal(mindexer, mexpected)
294-
self.assert_numpy_array_equal(qindexer, qexpected)
295-
self.assertFalse(np.array_equal(qindexer, mindexer))
327+
result = df.ordering()
328+
expected = DataFrame([[1, 3, -1, 0, 2],
329+
[3, 0, 1, 4, 2]],
330+
index=list('xy'), columns=list('abcde'))
331+
self.assert_frame_equal(result, expected)
296332

297333
def test_cumsum(self):
298334
self._check_accum_op('cumsum')

0 commit comments

Comments
 (0)