-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
/
Copy pathcategoricals.py
114 lines (80 loc) · 3.03 KB
/
categoricals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from .pandas_vb_common import *
try:
from pandas.api.types import union_categoricals
except ImportError:
try:
from pandas.types.concat import union_categoricals
except ImportError:
pass
class Categoricals(object):
goal_time = 0.2
def setup(self):
N = 100000
self.s = pd.Series((list('aabbcd') * N)).astype('category')
self.a = pd.Categorical((list('aabbcd') * N))
self.b = pd.Categorical((list('bbcdjk') * N))
self.categories = list('abcde')
self.cat_idx = Index(self.categories)
self.values = np.tile(self.categories, N)
self.codes = np.tile(range(len(self.categories)), N)
self.datetimes = pd.Series(pd.date_range(
'1995-01-01 00:00:00', periods=10000, freq='s'))
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
self.values_all_nan = [np.nan] * len(self.values)
def time_concat(self):
concat([self.s, self.s])
def time_union(self):
union_categoricals([self.a, self.b])
def time_constructor_regular(self):
Categorical(self.values, self.categories)
def time_constructor_fastpath(self):
Categorical(self.codes, self.cat_idx, fastpath=True)
def time_constructor_datetimes(self):
Categorical(self.datetimes)
def time_constructor_datetimes_with_nat(self):
t = self.datetimes
t.iloc[-1] = pd.NaT
Categorical(t)
def time_constructor_with_nan(self):
Categorical(self.values_some_nan)
def time_constructor_all_nan(self):
Categorical(self.values_all_nan)
class Categoricals2(object):
goal_time = 0.2
def setup(self):
n = 500000
np.random.seed(2718281)
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
self.ts = Series(arr).astype('category')
self.sel = self.ts.loc[[0]]
def time_value_counts(self):
self.ts.value_counts(dropna=False)
def time_value_counts_dropna(self):
self.ts.value_counts(dropna=True)
def time_rendering(self):
str(self.sel)
def time_set_categories(self):
self.ts.cat.set_categories(self.ts.cat.categories[::2])
class Categoricals3(object):
goal_time = 0.2
def setup(self):
N = 100000
ncats = 100
self.s1 = Series(np.array(tm.makeCategoricalIndex(N, ncats)))
self.s1_cat = self.s1.astype('category')
self.s1_cat_ordered = self.s1.astype('category', ordered=True)
self.s2 = Series(np.random.randint(0, ncats, size=N))
self.s2_cat = self.s2.astype('category')
self.s2_cat_ordered = self.s2.astype('category', ordered=True)
def time_rank_string(self):
self.s1.rank()
def time_rank_string_cat(self):
self.s1_cat.rank()
def time_rank_string_cat_ordered(self):
self.s1_cat_ordered.rank()
def time_rank_int(self):
self.s2.rank()
def time_rank_int_cat(self):
self.s2_cat.rank()
def time_rank_int_cat_ordered(self):
self.s2_cat_ordered.rank()