Skip to content

ENH: ExtensionArray.unique #19869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 13, 2018
13 changes: 5 additions & 8 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
maybe_promote, construct_1d_object_array_from_listlike)
from pandas.core.dtypes.generic import (
ABCSeries, ABCIndex,
ABCIndexClass, ABCCategorical)
ABCIndexClass)
from pandas.core.dtypes.common import (
is_array_like,
is_unsigned_integer_dtype, is_signed_integer_dtype,
is_integer_dtype, is_complex_dtype,
is_object_dtype,
Expand Down Expand Up @@ -168,8 +169,7 @@ def _ensure_arraylike(values):
"""
ensure that we are arraylike if not already
"""
if not isinstance(values, (np.ndarray, ABCCategorical,
ABCIndexClass, ABCSeries)):
if not is_array_like(values):
inferred = lib.infer_dtype(values)
if inferred in ['mixed', 'string', 'unicode']:
if isinstance(values, tuple):
Expand Down Expand Up @@ -353,11 +353,8 @@ def unique(values):

values = _ensure_arraylike(values)

# categorical is a fast-path
# this will coerce Categorical, CategoricalIndex,
# and category dtypes Series to same return of Category
if is_categorical_dtype(values):
values = getattr(values, '.values', values)
if is_extension_array_dtype(values):
# Dispatch to extension dtype's unique.
return values.unique()

original = values
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,18 @@ def isna(self):
"""
raise AbstractMethodError(self)

def unique(self):
"""Compute the ExtensionArray of unique values.

Returns
-------
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

future PR should prob add some examples here :> (and other doc-strings).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True :) The only problem is that for ExtensionArray we don't have a direct working example, as you first need to subclass it (unless we use one of the existing ones like Categorical, but that also seems a bit strange)

uniques : ExtensionArray
"""
from pandas import unique

uniques = unique(self.astype(object))
return self._constructor_from_sequence(uniques)

# ------------------------------------------------------------------------
# Indexing methods
# ------------------------------------------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,14 @@ def test_count(self, data_missing):
def test_apply_simple_series(self, data):
result = pd.Series(data).apply(id)
assert isinstance(result, pd.Series)

@pytest.mark.parametrize('box', [pd.Series, lambda x: x])
@pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
def test_unique(self, data, box, method):
duplicated = box(data._constructor_from_sequence([data[0], data[0]]))

result = method(duplicated)

assert len(result) == 1
assert isinstance(result, type(data))
assert result[0] == duplicated[0]
7 changes: 7 additions & 0 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ def take(self, indexer, allow_fill=True, fill_value=None):
def copy(self, deep=False):
return type(self)(self.data[:])

def unique(self):
# Parent method doesn't work since np.array will try to infer
# a 2-dim object.
return type(self)([
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use self._constructor rather than type(self) generally

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would change this too

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above comment, define _consturct_from_sequence

dict(x) for x in list(set(tuple(d.items()) for d in self.data))
])

@property
def _na_value(self):
return {}
Expand Down