Skip to content

Commit 840ae13

Browse files
committed
WIP: Index no longer an ndarray sub-class (GH5080)
CLN: add searchsorted to core/base (GH6712, GH7447, GH6469) fixup tests in test_timeseries for reverse ndarray/datetimeindex comparisons fix algos / multi-index repeat (essentially this is a bug-fix) ENH: add NumericIndex and operators, related (GH7439)
1 parent 8d80a42 commit 840ae13

38 files changed

+841
-533
lines changed

pandas/compat/pickle_compat.py

+30-39
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,30 @@
77
import pickle as pkl
88
from pandas import compat
99
from pandas.compat import u, string_types
10-
from pandas.core.series import Series, TimeSeries
11-
from pandas.sparse.series import SparseSeries, SparseTimeSeries
12-
1310

1411
def load_reduce(self):
1512
stack = self.stack
1613
args = stack.pop()
1714
func = stack[-1]
15+
1816
if type(args[0]) is type:
1917
n = args[0].__name__
20-
if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'):
21-
stack[-1] = object.__new__(Series)
22-
return
23-
elif (n == u('DeprecatedSparseSeries') or
24-
n == u('DeprecatedSparseTimeSeries')):
25-
stack[-1] = object.__new__(SparseSeries)
26-
return
2718

2819
try:
29-
value = func(*args)
30-
except:
20+
stack[-1] = func(*args)
21+
return
22+
except Exception as e:
23+
24+
# if we have a deprecated function
25+
# try to replace and try again
26+
27+
if '_reconstruct: First argument must be a sub-type of ndarray' in str(e):
28+
try:
29+
cls = args[0]
30+
stack[-1] = object.__new__(cls)
31+
return
32+
except:
33+
pass
3134

3235
# try to reencode the arguments
3336
if getattr(self,'encoding',None) is not None:
@@ -57,6 +60,21 @@ class Unpickler(pkl.Unpickler):
5760
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
5861
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
5962

63+
def load_newobj(self):
64+
args = self.stack.pop()
65+
cls = self.stack[-1]
66+
67+
try:
68+
obj = cls.__new__(cls, *args)
69+
except (Exception) as e:
70+
71+
# replace the default newobj creator to
72+
# handle an odd issue with 0-len series
73+
if not len(args):
74+
obj = object.__new__(cls)
75+
76+
self.stack[-1] = obj
77+
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
6078

6179
def load(fh, encoding=None, compat=False, is_verbose=False):
6280
"""load a pickle, with a provided encoding
@@ -74,11 +92,6 @@ def load(fh, encoding=None, compat=False, is_verbose=False):
7492
"""
7593

7694
try:
77-
if compat:
78-
pandas.core.series.Series = DeprecatedSeries
79-
pandas.core.series.TimeSeries = DeprecatedTimeSeries
80-
pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
81-
pandas.sparse.series.SparseTimeSeries = DeprecatedSparseTimeSeries
8295
fh.seek(0)
8396
if encoding is not None:
8497
up = Unpickler(fh, encoding=encoding)
@@ -89,25 +102,3 @@ def load(fh, encoding=None, compat=False, is_verbose=False):
89102
return up.load()
90103
except:
91104
raise
92-
finally:
93-
if compat:
94-
pandas.core.series.Series = Series
95-
pandas.core.series.Series = TimeSeries
96-
pandas.sparse.series.SparseSeries = SparseSeries
97-
pandas.sparse.series.SparseTimeSeries = SparseTimeSeries
98-
99-
100-
class DeprecatedSeries(np.ndarray, Series):
101-
pass
102-
103-
104-
class DeprecatedTimeSeries(DeprecatedSeries):
105-
pass
106-
107-
108-
class DeprecatedSparseSeries(DeprecatedSeries):
109-
pass
110-
111-
112-
class DeprecatedSparseTimeSeries(DeprecatedSparseSeries):
113-
pass

pandas/core/base.py

+54-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.core import common as com
99
import pandas.core.nanops as nanops
1010
import pandas.tslib as tslib
11-
from pandas.util.decorators import cache_readonly
11+
from pandas.util.decorators import Appender, cache_readonly
1212

1313
class StringMixin(object):
1414

@@ -205,6 +205,19 @@ def __unicode__(self):
205205
quote_strings=True)
206206
return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
207207

208+
def _unbox(func):
209+
@Appender(func.__doc__)
210+
def f(self, *args, **kwargs):
211+
result = func(self.values, *args, **kwargs)
212+
from pandas.core.index import Index
213+
if isinstance(result, (np.ndarray, com.ABCSeries, Index)) and result.ndim == 0:
214+
# return NumPy type
215+
return result.dtype.type(result.item())
216+
else: # pragma: no cover
217+
return result
218+
f.__name__ = func.__name__
219+
return f
220+
208221
class IndexOpsMixin(object):
209222
""" common ops mixin to support a unified inteface / docs for Series / Index """
210223

@@ -340,6 +353,20 @@ def factorize(self, sort=False, na_sentinel=-1):
340353
from pandas.core.algorithms import factorize
341354
return factorize(self, sort=sort, na_sentinel=na_sentinel)
342355

356+
def searchsorted(self, key, side='left'):
357+
""" np.ndarray searchsorted compat """
358+
359+
### FIXME ###
360+
#### needs coercion on the key (DatetimeIndex does alreaY)
361+
#### needs tests/doc-string ###
362+
return self.values.searchsorted(key, side=side)
363+
364+
#----------------------------------------------------------------------
365+
# unbox reductions
366+
367+
all = _unbox(np.ndarray.all)
368+
any = _unbox(np.ndarray.any)
369+
343370
# facilitate the properties on the wrapped ops
344371
def _field_accessor(name, docstring=None):
345372
op_accessor = '_{0}'.format(name)
@@ -456,6 +483,19 @@ def min(self, axis=None):
456483
except ValueError:
457484
return self._na_value
458485

486+
def argmin(self, axis=None):
487+
"""
488+
return the minimum argument indexer
489+
490+
FIXME: need some tests (what do do if all NaT?)
491+
"""
492+
i8 = self.asi8
493+
if self.hasnans:
494+
mask = i8 == tslib.iNaT
495+
i8 = i8.copy()
496+
i8[mask] = np.iinfo('int64').max
497+
return i8.argmin()
498+
459499
def max(self, axis=None):
460500
"""
461501
Overridden ndarray.max to return an object
@@ -477,6 +517,19 @@ def max(self, axis=None):
477517
except ValueError:
478518
return self._na_value
479519

520+
def argmax(self, axis=None):
521+
"""
522+
return the maximum argument indexer
523+
524+
FIXME: need some tests (what do do if all NaT?)
525+
"""
526+
i8 = self.asi8
527+
if self.hasnans:
528+
mask = i8 == tslib.iNaT
529+
i8 = i8.copy()
530+
i8[mask] = 0
531+
return i8.argmax()
532+
480533
@property
481534
def _formatter_func(self):
482535
"""

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,6 @@ def _get_codes_for_values(values, levels):
939939
levels = com._ensure_object(levels)
940940
(hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
941941
t = hash_klass(len(levels))
942-
t.map_locations(levels)
942+
t.map_locations(com._values_from_object(levels))
943943
return com._ensure_platform_int(t.lookup(values))
944944

pandas/core/common.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def _isnull_new(obj):
205205
# hack (for now) because MI registers as ndarray
206206
elif isinstance(obj, pd.MultiIndex):
207207
raise NotImplementedError("isnull is not defined for MultiIndex")
208-
elif isinstance(obj, (ABCSeries, np.ndarray)):
208+
elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
209209
return _isnull_ndarraylike(obj)
210210
elif isinstance(obj, ABCGeneric):
211211
return obj._constructor(obj._data.isnull(func=isnull))
@@ -231,7 +231,7 @@ def _isnull_old(obj):
231231
# hack (for now) because MI registers as ndarray
232232
elif isinstance(obj, pd.MultiIndex):
233233
raise NotImplementedError("isnull is not defined for MultiIndex")
234-
elif isinstance(obj, (ABCSeries, np.ndarray)):
234+
elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
235235
return _isnull_ndarraylike_old(obj)
236236
elif isinstance(obj, ABCGeneric):
237237
return obj._constructor(obj._data.isnull(func=_isnull_old))
@@ -2024,8 +2024,7 @@ def _is_bool_indexer(key):
20242024
def _default_index(n):
20252025
from pandas.core.index import Int64Index
20262026
values = np.arange(n, dtype=np.int64)
2027-
result = values.view(Int64Index)
2028-
result.name = None
2027+
result = Int64Index(values,name=None)
20292028
result.is_unique = True
20302029
return result
20312030

pandas/core/format.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -1186,7 +1186,7 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
11861186
if cols is None:
11871187
cols = self.columns
11881188

1189-
has_aliases = isinstance(header, (tuple, list, np.ndarray))
1189+
has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
11901190
if has_aliases or header:
11911191
if index:
11921192
# should write something for index label
@@ -1205,7 +1205,7 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
12051205
else:
12061206
index_label = [index_label]
12071207
elif not isinstance(index_label,
1208-
(list, tuple, np.ndarray)):
1208+
(list, tuple, np.ndarray, Index)):
12091209
# given a string for a DF with Index
12101210
index_label = [index_label]
12111211

@@ -1327,7 +1327,7 @@ def _save_header(self):
13271327
header = self.header
13281328
encoded_labels = []
13291329

1330-
has_aliases = isinstance(header, (tuple, list, np.ndarray))
1330+
has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
13311331
if not (has_aliases or self.header):
13321332
return
13331333
if has_aliases:
@@ -1355,7 +1355,7 @@ def _save_header(self):
13551355
index_label = ['']
13561356
else:
13571357
index_label = [index_label]
1358-
elif not isinstance(index_label, (list, tuple, np.ndarray)):
1358+
elif not isinstance(index_label, (list, tuple, np.ndarray, Index)):
13591359
# given a string for a DF with Index
13601360
index_label = [index_label]
13611361

@@ -1520,7 +1520,7 @@ def _format_value(self, val):
15201520
return val
15211521

15221522
def _format_header_mi(self):
1523-
has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
1523+
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
15241524
if not(has_aliases or self.header):
15251525
return
15261526

@@ -1566,7 +1566,7 @@ def _format_header_mi(self):
15661566
self.rowcounter = lnum
15671567

15681568
def _format_header_regular(self):
1569-
has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
1569+
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
15701570
if has_aliases or self.header:
15711571
coloffset = 0
15721572

@@ -1611,7 +1611,7 @@ def _format_body(self):
16111611
return self._format_regular_rows()
16121612

16131613
def _format_regular_rows(self):
1614-
has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
1614+
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
16151615
if has_aliases or self.header:
16161616
self.rowcounter += 1
16171617

@@ -1621,7 +1621,7 @@ def _format_regular_rows(self):
16211621
# chek aliases
16221622
# if list only take first as this is not a MultiIndex
16231623
if self.index_label and isinstance(self.index_label,
1624-
(list, tuple, np.ndarray)):
1624+
(list, tuple, np.ndarray, Index)):
16251625
index_label = self.index_label[0]
16261626
# if string good to go
16271627
elif self.index_label and isinstance(self.index_label, str):
@@ -1661,7 +1661,7 @@ def _format_regular_rows(self):
16611661
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val)
16621662

16631663
def _format_hierarchical_rows(self):
1664-
has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
1664+
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
16651665
if has_aliases or self.header:
16661666
self.rowcounter += 1
16671667

@@ -1671,7 +1671,7 @@ def _format_hierarchical_rows(self):
16711671
index_labels = self.df.index.names
16721672
# check for aliases
16731673
if self.index_label and isinstance(self.index_label,
1674-
(list, tuple, np.ndarray)):
1674+
(list, tuple, np.ndarray, Index)):
16751675
index_labels = self.index_label
16761676

16771677
# if index labels are not empty go ahead and dump

0 commit comments

Comments
 (0)