-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Add type hints to dtypes/dtypes.py (CategoricalDtype) #26327
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8be18a0
8f6c2e0
0fe837a
d0f150d
86666f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
""" define extension dtypes """ | ||
import re | ||
from typing import Any, Dict, Optional, Tuple, Type | ||
from typing import Any, Dict, List, Optional, Tuple, Type, Union | ||
import warnings | ||
|
||
import numpy as np | ||
|
@@ -18,7 +18,8 @@ | |
str_type = str | ||
|
||
|
||
def register_extension_dtype(cls): | ||
def register_extension_dtype(cls: Type[ExtensionDtype], | ||
) -> Type[ExtensionDtype]: | ||
""" | ||
Register an ExtensionType with pandas as class decorator. | ||
|
||
|
@@ -60,9 +61,9 @@ class Registry: | |
These are tried in order. | ||
""" | ||
def __init__(self): | ||
self.dtypes = [] | ||
self.dtypes = [] # type: List[Type[ExtensionDtype]] | ||
|
||
def register(self, dtype): | ||
def register(self, dtype: Type[ExtensionDtype]) -> None: | ||
""" | ||
Parameters | ||
---------- | ||
|
@@ -73,11 +74,13 @@ def register(self, dtype): | |
|
||
self.dtypes.append(dtype) | ||
|
||
def find(self, dtype): | ||
def find(self, | ||
dtype: Union[Type[ExtensionDtype], str], | ||
) -> Optional[Type[ExtensionDtype]]: | ||
""" | ||
Parameters | ||
---------- | ||
dtype : PandasExtensionDtype or string | ||
dtype : Type[ExtensionDtype] or string | ||
|
||
Returns | ||
------- | ||
|
@@ -126,28 +129,28 @@ class PandasExtensionDtype(ExtensionDtype): | |
isnative = 0 | ||
_cache = {} # type: Dict[str_type, 'PandasExtensionDtype'] | ||
|
||
def __str__(self): | ||
def __str__(self) -> str_type: | ||
""" | ||
Return a string representation for a particular Object | ||
""" | ||
return self.name | ||
|
||
def __repr__(self): | ||
def __repr__(self) -> str_type: | ||
""" | ||
Return a string representation for a particular object. | ||
""" | ||
return str(self) | ||
|
||
def __hash__(self): | ||
def __hash__(self) -> int: | ||
raise NotImplementedError("sub-classes should implement an __hash__ " | ||
"method") | ||
|
||
def __getstate__(self): | ||
def __getstate__(self) -> Dict[str_type, Any]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would like to minimize the use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If the type hints are important to be specific to the ExtensionsDtype, maybe subclass |
||
# pickle support; we don't want to pickle the cache | ||
return {k: getattr(self, k, None) for k in self._metadata} | ||
|
||
@classmethod | ||
def reset_cache(cls): | ||
def reset_cache(cls) -> None: | ||
""" clear the cache """ | ||
cls._cache = {} | ||
|
||
|
@@ -211,17 +214,24 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): | |
_metadata = ('categories', 'ordered') | ||
_cache = {} # type: Dict[str_type, PandasExtensionDtype] | ||
|
||
def __init__(self, categories=None, ordered=None): | ||
def __init__(self, categories=None, ordered: bool = None): | ||
WillAyd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._finalize(categories, ordered, fastpath=False) | ||
|
||
@classmethod | ||
def _from_fastpath(cls, categories=None, ordered=None): | ||
def _from_fastpath(cls, | ||
categories=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you annotate categories? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only imprecisely. The most similar would be a plain I'm a bit apprehensive about accepting wrong types, but maybe that would be the correct compromise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. categories should be ListLike which is ArrayLike + list + tuple + Iterable (define it in _typing) |
||
ordered: bool = None | ||
) -> 'CategoricalDtype': | ||
self = cls.__new__(cls) | ||
self._finalize(categories, ordered, fastpath=True) | ||
return self | ||
|
||
@classmethod | ||
def _from_categorical_dtype(cls, dtype, categories=None, ordered=None): | ||
def _from_categorical_dtype(cls, | ||
dtype: 'CategoricalDtype', | ||
categories=None, | ||
ordered: bool = None, | ||
) -> 'CategoricalDtype': | ||
if categories is ordered is None: | ||
return dtype | ||
if categories is None: | ||
|
@@ -231,8 +241,12 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None): | |
return cls(categories, ordered) | ||
|
||
@classmethod | ||
def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, | ||
dtype=None): | ||
def _from_values_or_dtype(cls, | ||
values=None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you annotate values / categories? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above. The most precise here would again be |
||
categories=None, | ||
ordered: bool = None, | ||
dtype: 'CategoricalDtype' = None, | ||
) -> 'CategoricalDtype': | ||
""" | ||
Construct dtype from the input parameters used in :class:`Categorical`. | ||
|
||
|
@@ -314,7 +328,11 @@ def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, | |
|
||
return dtype | ||
|
||
def _finalize(self, categories, ordered, fastpath=False): | ||
def _finalize(self, | ||
categories, | ||
ordered: Optional[bool], | ||
fastpath: bool = False, | ||
) -> None: | ||
|
||
if ordered is not None: | ||
self.validate_ordered(ordered) | ||
|
@@ -326,14 +344,14 @@ def _finalize(self, categories, ordered, fastpath=False): | |
self._categories = categories | ||
self._ordered = ordered | ||
|
||
def __setstate__(self, state): | ||
def __setstate__(self, state: Dict[str_type, Any]) -> None: | ||
# for pickle compat. __get_state__ is defined in the | ||
# PandasExtensionDtype superclass and uses the public properties to | ||
# pickle -> need to set the settable private ones here (see GH26067) | ||
self._categories = state.pop('categories', None) | ||
self._ordered = state.pop('ordered', False) | ||
|
||
def __hash__(self): | ||
def __hash__(self) -> int: | ||
# _hash_categories returns a uint64, so use the negative | ||
# space for when we have unknown categories to avoid a conflict | ||
if self.categories is None: | ||
|
@@ -344,7 +362,7 @@ def __hash__(self): | |
# We *do* want to include the real self.ordered here | ||
return int(self._hash_categories(self.categories, self.ordered)) | ||
|
||
def __eq__(self, other): | ||
def __eq__(self, other: Any) -> bool: | ||
""" | ||
Rules for CDT equality: | ||
1) Any CDT is equal to the string 'category' | ||
|
@@ -391,7 +409,7 @@ def __repr__(self): | |
return tpl.format(data, self.ordered) | ||
|
||
@staticmethod | ||
def _hash_categories(categories, ordered=True): | ||
def _hash_categories(categories, ordered: Optional[bool] = True) -> int: | ||
from pandas.core.util.hashing import ( | ||
hash_array, _combine_hash_arrays, hash_tuples | ||
) | ||
|
@@ -441,7 +459,7 @@ def construct_array_type(cls): | |
return Categorical | ||
|
||
@staticmethod | ||
def validate_ordered(ordered): | ||
def validate_ordered(ordered: bool) -> None: | ||
""" | ||
Validates that we have a valid ordered parameter. If | ||
it is not a boolean, a TypeError will be raised. | ||
|
@@ -461,7 +479,7 @@ def validate_ordered(ordered): | |
raise TypeError("'ordered' must either be 'True' or 'False'") | ||
|
||
@staticmethod | ||
def validate_categories(categories, fastpath=False): | ||
def validate_categories(categories, fastpath: bool = False): | ||
""" | ||
Validates that we have good categories | ||
|
||
|
@@ -475,7 +493,7 @@ def validate_categories(categories, fastpath=False): | |
------- | ||
categories : Index | ||
""" | ||
from pandas import Index | ||
from pandas.core.indexes.base import Index | ||
|
||
if not fastpath and not is_list_like(categories): | ||
msg = "Parameter 'categories' must be list-like, was {!r}" | ||
|
@@ -496,7 +514,7 @@ def validate_categories(categories, fastpath=False): | |
|
||
return categories | ||
|
||
def update_dtype(self, dtype): | ||
def update_dtype(self, dtype: 'CategoricalDtype') -> 'CategoricalDtype': | ||
""" | ||
Returns a CategoricalDtype with categories and ordered taken from dtype | ||
if specified, otherwise falling back to self if unspecified | ||
|
@@ -538,14 +556,14 @@ def categories(self): | |
return self._categories | ||
|
||
@property | ||
def ordered(self): | ||
def ordered(self) -> Optional[bool]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this optional? I thought ordered was always sure or values (categories are optional). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes: In [1]: pd.CategoricalDtype()
Out[1]: CategoricalDtype(categories=None, ordered=None) I think this has somwthing to to with updating dtypes (don't update None, but do update False). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. grr, this isn't needed, and isn't how we document things. #26336 |
||
""" | ||
Whether the categories have an ordered relationship. | ||
""" | ||
return self._ordered | ||
|
||
@property | ||
def _is_boolean(self): | ||
def _is_boolean(self) -> bool: | ||
from pandas.core.dtypes.common import is_bool_dtype | ||
|
||
return is_bool_dtype(self.categories) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@WillAyd on your how-to-type-annotate can you add when / why to use
str_type