Skip to content

Unify Index._dir_* with Series implementation #17117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 29, 2017
49 changes: 30 additions & 19 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,35 @@ def __repr__(self):
return str(self)


class PandasObject(StringMixin):
class DirNamesMixin(object):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this class to core/accessor.py

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, thanks.

_accessors = frozenset([])

def _dir_deletions(self):
""" delete unwanted __dir__ for this object """
return self._accessors

def _dir_additions(self):
""" add addtional __dir__ for this object """
rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv

def __dir__(self):
"""
Provide method name lookup and completion
Only provide 'public' methods
"""
rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)


class PandasObject(StringMixin, DirNamesMixin):

"""baseclass for various pandas objects"""

Expand All @@ -91,23 +119,6 @@ def __unicode__(self):
# Should be overwritten by base classes
return object.__repr__(self)

def _dir_additions(self):
""" add addtional __dir__ for this object """
return set()

def _dir_deletions(self):
""" delete unwanted __dir__ for this object """
return set()

def __dir__(self):
"""
Provide method name lookup and completion
Only provide 'public' methods
"""
rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv)

def _reset_cache(self, key=None):
"""
Reset cached properties. If ``key`` is passed, only clears that key.
Expand Down Expand Up @@ -140,7 +151,7 @@ class NoNewAttributesMixin(object):

Prevents additional attributes via xxx.attribute = "something" after a
call to `self.__freeze()`. Mainly used to prevent the user from using
wrong attrirbutes on a accessor (`Series.cat/.str/.dt`).
wrong attributes on a accessor (`Series.cat/.str/.dt`).

If you really want to add a new attribute at a later time, you need to use
`object.__setattr__(self, key, value)`.
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ def __unicode__(self):

def _dir_additions(self):
""" add the string-like attributes from the info_axis """
return set([c for c in self._info_axis
if isinstance(c, string_types) and isidentifier(c)])
additions = set([c for c in self._info_axis
if isinstance(c, string_types) and isidentifier(c)])
return PandasObject._dir_additions(self).union(additions)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason not to use super ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. I developed the habit working in statsmodels where inheritance chains can get pretty messy. Happy to change it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super is the idiom. Calling specifically named methods usually indicates something odd in the inheritance chain.


@property
def _constructor_sliced(self):
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
import pandas.core.sorting as sorting
from pandas.io.formats.printing import pprint_thing
from pandas.core.ops import _comp_method_OBJECT_ARRAY
from pandas.core.strings import StringAccessorMixin
from pandas.core import strings
from pandas.core.config import get_option


Expand Down Expand Up @@ -98,7 +98,7 @@ def _new_Index(cls, d):
return cls.__new__(cls, **d)


class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
class Index(IndexOpsMixin, PandasObject):
"""
Immutable ndarray implementing an ordered, sliceable set. The basic object
storing axis labels for all pandas objects
Expand Down Expand Up @@ -151,6 +151,12 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):

_engine_type = libindex.ObjectEngine

_accessors = frozenset(['dt', 'str', 'cat'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe leave out 'dt' and 'cat' until they are actually added as accessor? (in a next PR I suppose)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, but they are not yet really added as accessor (so no user visible change), only 'removed' from __dir__ (but not really removed since they are not there yet), correct?
In that case not that important.

I suppose we already have tests to check that 'str' is only present on the correct index types?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we already have tests to check that 'str' is only present on the correct index types?

I'm not sure off the top, but I wrote some tests for #17204 that can be adapted pretty readily.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls remove dt/cat, leave for another PR. str is covered I believe.


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are no tests for the non string accessors in Index

# String Methods
str = base.AccessorProperty(strings.StringMethods,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rm this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then should I revert deletion of StringAccessorMixin and mix that back in to Index?

strings.StringMethods._make_accessor)

def __new__(cls, data=None, dtype=None, copy=False, name=None,
fastpath=False, tupleize_cols=True, **kwargs):

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ def ceil(self, freq):
class DatetimeIndexOpsMixin(object):
""" common ops mixin to support a unified inteface datetimelike Index """

@property
def dt(self):
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rm this

For a datetime-like Index object, `self.dt` returns `self` so that
datetime-like attributes can be accessed symmetrically for Index
and Series objects.
"""
return self

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
Expand Down
18 changes: 4 additions & 14 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ def wrapper(self):
# Series class


class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
generic.NDFrame,):
class Series(base.IndexOpsMixin, generic.NDFrame):
"""
One-dimensional ndarray with axis labels (including time series).

Expand Down Expand Up @@ -2934,18 +2933,9 @@ def _make_cat_accessor(self):

cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor)

def _dir_deletions(self):
return self._accessors

def _dir_additions(self):
rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv
# String Methods
str = base.AccessorProperty(strings.StringMethods,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ok

strings.StringMethods._make_accessor)


Series._setup_axes(['index'], info_axis=0, stat_axis=0, aliases={'rows': 0})
Expand Down
37 changes: 11 additions & 26 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from pandas.core.algorithms import take_1d
import pandas.compat as compat
from pandas.core.base import AccessorProperty, NoNewAttributesMixin
from pandas.core.base import NoNewAttributesMixin
from pandas.util._decorators import Appender
import re
import pandas._libs.lib as lib
Expand Down Expand Up @@ -1890,18 +1890,15 @@ def rindex(self, sub, start=0, end=None):
docstring=_shared_docs['ismethods'] %
_shared_docs['isdecimal'])


class StringAccessorMixin(object):
""" Mixin to add a `.str` acessor to the class."""

# string methods
def _make_str_accessor(self):
@classmethod
def _make_accessor(cls, data):
from pandas.core.index import Index

if (isinstance(self, ABCSeries) and
not ((is_categorical_dtype(self.dtype) and
is_object_dtype(self.values.categories)) or
(is_object_dtype(self.dtype)))):
if (isinstance(data, ABCSeries) and
not ((is_categorical_dtype(data.dtype) and
is_object_dtype(data.values.categories)) or
(is_object_dtype(data.dtype)))):
# it's neither a string series not a categorical series with
# strings inside the categories.
# this really should exclude all series with any non-string values
Expand All @@ -1910,30 +1907,18 @@ def _make_str_accessor(self):
raise AttributeError("Can only use .str accessor with string "
"values, which use np.object_ dtype in "
"pandas")
elif isinstance(self, Index):
elif isinstance(data, Index):
# can't use ABCIndex to exclude non-str

# see scc/inferrence.pyx which can contain string values
allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
if self.inferred_type not in allowed_types:
if data.inferred_type not in allowed_types:
message = ("Can only use .str accessor with string values "
"(i.e. inferred_type is 'string', 'unicode' or "
"'mixed')")
raise AttributeError(message)
if self.nlevels > 1:
if data.nlevels > 1:
message = ("Can only use .str accessor with Index, not "
"MultiIndex")
raise AttributeError(message)
return StringMethods(self)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is ok


str = AccessorProperty(StringMethods, _make_str_accessor)

def _dir_additions(self):
return set()

def _dir_deletions(self):
try:
getattr(self, 'str')
except AttributeError:
return set(['str'])
return set()
return cls(data)