Skip to content

Commit ec2064a

Browse files
committed
BUG: Categorical doesn't show tzinfo properly
1 parent e9b1a10 commit ec2064a

File tree

7 files changed

+653
-21
lines changed

7 files changed

+653
-21
lines changed

Diff for: doc/source/whatsnew/v0.17.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,9 @@ Bug Fixes
606606

607607

608608
- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`)
609+
- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`)
610+
- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`)
611+
609612

610613
- Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`).
611614
- Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`)

Diff for: pandas/core/categorical.py

+16-17
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import pandas.core.common as com
1313
from pandas.util.decorators import cache_readonly, deprecate_kwarg
1414

15-
from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCPeriodIndex, ABCCategoricalIndex,
15+
from pandas.core.common import (CategoricalDtype, ABCSeries, ABCIndexClass, ABCCategoricalIndex,
1616
isnull, notnull, is_dtype_equal,
1717
is_categorical_dtype, is_integer_dtype, is_object_dtype,
1818
_possibly_infer_to_datetimelike, get_dtype_kinds,
@@ -1053,15 +1053,12 @@ def get_values(self):
10531053
Returns
10541054
-------
10551055
values : numpy array
1056-
A numpy array of the same dtype as categorical.categories.dtype or dtype string if
1057-
periods
1056+
A numpy array of the same dtype as categorical.categories.dtype or
1057+
Index if datetime / periods
10581058
"""
1059-
1060-
# if we are a period index, return a string repr
1061-
if isinstance(self.categories, ABCPeriodIndex):
1062-
return take_1d(np.array(self.categories.to_native_types(), dtype=object),
1063-
self._codes)
1064-
1059+
# if we are a datetime and period index, return Index to keep metadata
1060+
if com.is_datetimelike(self.categories):
1061+
return self.categories.take(self._codes)
10651062
return np.array(self)
10661063

10671064
def check_for_ordered(self, op):
@@ -1308,7 +1305,7 @@ def __len__(self):
13081305

13091306
def __iter__(self):
13101307
"""Returns an Iterator over the values of this Categorical."""
1311-
return iter(np.array(self))
1308+
return iter(self.get_values())
13121309

13131310
def _tidy_repr(self, max_vals=10, footer=True):
13141311
""" a short repr displaying only max_vals and an optional (but default footer) """
@@ -1328,7 +1325,7 @@ def _repr_categories(self):
13281325
max_categories = (10 if get_option("display.max_categories") == 0
13291326
else get_option("display.max_categories"))
13301327
from pandas.core import format as fmt
1331-
category_strs = fmt.format_array(self.categories.get_values(), None)
1328+
category_strs = fmt.format_array(self.categories, None)
13321329
if len(category_strs) > max_categories:
13331330
num = max_categories // 2
13341331
head = category_strs[:num]
@@ -1343,22 +1340,24 @@ def _repr_categories_info(self):
13431340
""" Returns a string representation of the footer."""
13441341

13451342
category_strs = self._repr_categories()
1346-
levheader = "Categories (%d, %s): " % (len(self.categories),
1347-
self.categories.dtype)
1343+
dtype = getattr(self.categories, 'dtype_str', str(self.categories.dtype))
1344+
1345+
levheader = "Categories (%d, %s): " % (len(self.categories), dtype)
13481346
width, height = get_terminal_size()
13491347
max_width = get_option("display.width") or width
13501348
if com.in_ipython_frontend():
13511349
# 0 = no breaks
13521350
max_width = 0
13531351
levstring = ""
13541352
start = True
1355-
cur_col_len = len(levheader)
1353+
cur_col_len = len(levheader) # header
13561354
sep_len, sep = (3, " < ") if self.ordered else (2, ", ")
1355+
linesep = sep.rstrip() + "\n" # remove whitespace
13571356
for val in category_strs:
13581357
if max_width != 0 and cur_col_len + sep_len + len(val) > max_width:
1359-
levstring += "\n" + (" "* len(levheader))
1360-
cur_col_len = len(levheader)
1361-
if not start:
1358+
levstring += linesep + (" " * (len(levheader) + 1))
1359+
cur_col_len = len(levheader) + 1 # header + a whitespace
1360+
elif not start:
13621361
levstring += sep
13631362
cur_col_len += len(val)
13641363
levstring += val

Diff for: pandas/core/format.py

+33-4
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def _get_formatted_index(self):
207207
return fmt_index, have_header
208208

209209
def _get_formatted_values(self):
210-
return format_array(self.tr_series.get_values(), None,
210+
return format_array(self.tr_series.values, None,
211211
float_format=self.float_format,
212212
na_rep=self.na_rep)
213213

@@ -681,7 +681,7 @@ def _format_col(self, i):
681681
frame = self.tr_frame
682682
formatter = self._get_formatter(i)
683683
return format_array(
684-
(frame.iloc[:, i]).get_values(),
684+
frame.iloc[:, i].values,
685685
formatter, float_format=self.float_format, na_rep=self.na_rep,
686686
space=self.col_space
687687
)
@@ -1895,8 +1895,13 @@ def get_formatted_cells(self):
18951895

18961896
def format_array(values, formatter, float_format=None, na_rep='NaN',
18971897
digits=None, space=None, justify='right'):
1898-
if com.is_float_dtype(values.dtype):
1898+
1899+
if com.is_categorical_dtype(values):
1900+
fmt_klass = CategoricalArrayFormatter
1901+
elif com.is_float_dtype(values.dtype):
18991902
fmt_klass = FloatArrayFormatter
1903+
elif com.is_period_arraylike(values):
1904+
fmt_klass = PeriodArrayFormatter
19001905
elif com.is_integer_dtype(values.dtype):
19011906
fmt_klass = IntArrayFormatter
19021907
elif com.is_datetime64_dtype(values.dtype):
@@ -1963,6 +1968,8 @@ def _format(x):
19631968
return '%s' % formatter(x)
19641969

19651970
vals = self.values
1971+
if isinstance(vals, Index):
1972+
vals = vals.values
19661973

19671974
is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
19681975
leading_space = is_float.any()
@@ -2076,8 +2083,30 @@ def _format_strings(self):
20762083
values = values.asobject
20772084
is_dates_only = _is_dates_only(values)
20782085
formatter = (self.formatter or _get_format_datetime64(is_dates_only, values, date_format=self.date_format))
2079-
fmt_values = [ formatter(x) for x in self.values ]
2086+
fmt_values = [ formatter(x) for x in values ]
2087+
2088+
return fmt_values
2089+
20802090

2091+
class PeriodArrayFormatter(IntArrayFormatter):
2092+
2093+
def _format_strings(self):
2094+
values = np.array(self.values.to_native_types(), dtype=object)
2095+
formatter = self.formatter or (lambda x: '%s' % x)
2096+
fmt_values = [formatter(x) for x in values]
2097+
return fmt_values
2098+
2099+
2100+
class CategoricalArrayFormatter(GenericArrayFormatter):
2101+
2102+
def __init__(self, values, *args, **kwargs):
2103+
GenericArrayFormatter.__init__(self, values, *args, **kwargs)
2104+
2105+
def _format_strings(self):
2106+
fmt_values = format_array(self.values.get_values(), self.formatter,
2107+
float_format=self.float_format,
2108+
na_rep=self.na_rep, digits=self.digits,
2109+
space=self.space, justify=self.justify)
20812110
return fmt_values
20822111

20832112

Diff for: pandas/core/index.py

+9
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,11 @@ def dtype(self):
276276
""" return the dtype object of the underlying data """
277277
return self._data.dtype
278278

279+
@cache_readonly
280+
def dtype_str(self):
281+
""" return the dtype str of the underlying data """
282+
return str(self.dtype)
283+
279284
@property
280285
def values(self):
281286
""" return the underlying data as an ndarray """
@@ -2994,6 +2999,10 @@ def equals(self, other):
29942999

29953000
return False
29963001

3002+
@property
3003+
def _formatter_func(self):
3004+
return self.categories._formatter_func
3005+
29973006
def _format_attrs(self):
29983007
"""
29993008
Return a list of tuples of the (attr,formatted_value)

0 commit comments

Comments
 (0)