Skip to content

Commit b02d69f

Browse files
noamherPingviinituutti
authored andcommitted
CLN: Refactor cython to use memory views (pandas-dev#24932)
1 parent f3de7ab commit b02d69f

21 files changed

+240
-214
lines changed

pandas/_libs/algos.pyx

+13-13
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class NegInfinity(object):
7676

7777
@cython.wraparound(False)
7878
@cython.boundscheck(False)
79-
cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
79+
cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
8080
"""
8181
Efficiently find the unique first-differences of the given array.
8282
@@ -150,7 +150,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:
150150

151151
@cython.boundscheck(False)
152152
@cython.wraparound(False)
153-
def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
153+
def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
154154
"""
155155
compute a 1-d indexer that is an ordering of the passed index,
156156
ordered by the groups. This is a reverse of the label
@@ -230,7 +230,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
230230

231231
@cython.boundscheck(False)
232232
@cython.wraparound(False)
233-
def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
233+
def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
234234
cdef:
235235
Py_ssize_t i, j, xi, yi, N, K
236236
bint minpv
@@ -294,7 +294,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
294294

295295
@cython.boundscheck(False)
296296
@cython.wraparound(False)
297-
def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
297+
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
298298
cdef:
299299
Py_ssize_t i, j, xi, yi, N, K
300300
ndarray[float64_t, ndim=2] result
@@ -435,8 +435,8 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
435435

436436
@cython.boundscheck(False)
437437
@cython.wraparound(False)
438-
def pad_inplace(ndarray[algos_t] values,
439-
ndarray[uint8_t, cast=True] mask,
438+
def pad_inplace(algos_t[:] values,
439+
const uint8_t[:] mask,
440440
limit=None):
441441
cdef:
442442
Py_ssize_t i, N
@@ -472,8 +472,8 @@ def pad_inplace(ndarray[algos_t] values,
472472

473473
@cython.boundscheck(False)
474474
@cython.wraparound(False)
475-
def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
476-
ndarray[uint8_t, ndim=2] mask,
475+
def pad_2d_inplace(algos_t[:, :] values,
476+
const uint8_t[:, :] mask,
477477
limit=None):
478478
cdef:
479479
Py_ssize_t i, j, N, K
@@ -602,8 +602,8 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
602602

603603
@cython.boundscheck(False)
604604
@cython.wraparound(False)
605-
def backfill_inplace(ndarray[algos_t] values,
606-
ndarray[uint8_t, cast=True] mask,
605+
def backfill_inplace(algos_t[:] values,
606+
const uint8_t[:] mask,
607607
limit=None):
608608
cdef:
609609
Py_ssize_t i, N
@@ -639,8 +639,8 @@ def backfill_inplace(ndarray[algos_t] values,
639639

640640
@cython.boundscheck(False)
641641
@cython.wraparound(False)
642-
def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
643-
ndarray[uint8_t, ndim=2] mask,
642+
def backfill_2d_inplace(algos_t[:, :] values,
643+
const uint8_t[:, :] mask,
644644
limit=None):
645645
cdef:
646646
Py_ssize_t i, j, N, K
@@ -678,7 +678,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
678678

679679
@cython.wraparound(False)
680680
@cython.boundscheck(False)
681-
def arrmap(ndarray[algos_t] index, object func):
681+
def arrmap(algos_t[:] index, object func):
682682
cdef:
683683
Py_ssize_t length = index.shape[0]
684684
Py_ssize_t i = 0

pandas/_libs/groupby_helper.pxi.in

+46-46
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ def get_dispatch(dtypes):
2929

3030
@cython.wraparound(False)
3131
@cython.boundscheck(False)
32-
def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,
33-
ndarray[int64_t] counts,
34-
ndarray[{{c_type}}, ndim=2] values,
35-
ndarray[int64_t] labels,
32+
def group_add_{{name}}({{c_type}}[:, :] out,
33+
int64_t[:] counts,
34+
{{c_type}}[:, :] values,
35+
const int64_t[:] labels,
3636
Py_ssize_t min_count=0):
3737
"""
3838
Only aggregates on axis=0
@@ -76,10 +76,10 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,
7676

7777
@cython.wraparound(False)
7878
@cython.boundscheck(False)
79-
def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
80-
ndarray[int64_t] counts,
81-
ndarray[{{c_type}}, ndim=2] values,
82-
ndarray[int64_t] labels,
79+
def group_prod_{{name}}({{c_type}}[:, :] out,
80+
int64_t[:] counts,
81+
{{c_type}}[:, :] values,
82+
const int64_t[:] labels,
8383
Py_ssize_t min_count=0):
8484
"""
8585
Only aggregates on axis=0
@@ -123,10 +123,10 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
123123
@cython.wraparound(False)
124124
@cython.boundscheck(False)
125125
@cython.cdivision(True)
126-
def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,
127-
ndarray[int64_t] counts,
128-
ndarray[{{c_type}}, ndim=2] values,
129-
ndarray[int64_t] labels,
126+
def group_var_{{name}}({{c_type}}[:, :] out,
127+
int64_t[:] counts,
128+
{{c_type}}[:, :] values,
129+
const int64_t[:] labels,
130130
Py_ssize_t min_count=-1):
131131
cdef:
132132
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
@@ -175,10 +175,10 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,
175175

176176
@cython.wraparound(False)
177177
@cython.boundscheck(False)
178-
def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,
179-
ndarray[int64_t] counts,
180-
ndarray[{{c_type}}, ndim=2] values,
181-
ndarray[int64_t] labels,
178+
def group_mean_{{name}}({{c_type}}[:, :] out,
179+
int64_t[:] counts,
180+
{{c_type}}[:, :] values,
181+
const int64_t[:] labels,
182182
Py_ssize_t min_count=-1):
183183
cdef:
184184
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
@@ -220,11 +220,11 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,
220220

221221
@cython.wraparound(False)
222222
@cython.boundscheck(False)
223-
def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
224-
ndarray[int64_t] counts,
225-
ndarray[{{c_type}}, ndim=2] values,
226-
ndarray[int64_t] labels,
227-
Py_ssize_t min_count=-1):
223+
def group_ohlc_{{name}}({{c_type}}[:, :] out,
224+
int64_t[:] counts,
225+
{{c_type}}[:, :] values,
226+
const int64_t[:] labels,
227+
Py_ssize_t min_count=-1):
228228
"""
229229
Only aggregates on axis=0
230230
"""
@@ -293,10 +293,10 @@ def get_dispatch(dtypes):
293293

294294
@cython.wraparound(False)
295295
@cython.boundscheck(False)
296-
def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
297-
ndarray[int64_t] counts,
298-
ndarray[{{c_type}}, ndim=2] values,
299-
ndarray[int64_t] labels,
296+
def group_last_{{name}}({{c_type}}[:, :] out,
297+
int64_t[:] counts,
298+
{{c_type}}[:, :] values,
299+
const int64_t[:] labels,
300300
Py_ssize_t min_count=-1):
301301
"""
302302
Only aggregates on axis=0
@@ -350,10 +350,10 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
350350

351351
@cython.wraparound(False)
352352
@cython.boundscheck(False)
353-
def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,
354-
ndarray[int64_t] counts,
355-
ndarray[{{c_type}}, ndim=2] values,
356-
ndarray[int64_t] labels, int64_t rank,
353+
def group_nth_{{name}}({{c_type}}[:, :] out,
354+
int64_t[:] counts,
355+
{{c_type}}[:, :] values,
356+
const int64_t[:] labels, int64_t rank,
357357
Py_ssize_t min_count=-1):
358358
"""
359359
Only aggregates on axis=0
@@ -411,9 +411,9 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,
411411

412412
@cython.boundscheck(False)
413413
@cython.wraparound(False)
414-
def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
415-
ndarray[{{c_type}}, ndim=2] values,
416-
ndarray[int64_t] labels,
414+
def group_rank_{{name}}(float64_t[:, :] out,
415+
{{c_type}}[:, :] values,
416+
const int64_t[:] labels,
417417
bint is_datetimelike, object ties_method,
418418
bint ascending, bint pct, object na_option):
419419
"""
@@ -606,10 +606,10 @@ ctypedef fused groupby_t:
606606

607607
@cython.wraparound(False)
608608
@cython.boundscheck(False)
609-
def group_max(ndarray[groupby_t, ndim=2] out,
610-
ndarray[int64_t] counts,
611-
ndarray[groupby_t, ndim=2] values,
612-
ndarray[int64_t] labels,
609+
def group_max(groupby_t[:, :] out,
610+
int64_t[:] counts,
611+
groupby_t[:, :] values,
612+
const int64_t[:] labels,
613613
Py_ssize_t min_count=-1):
614614
"""
615615
Only aggregates on axis=0
@@ -669,10 +669,10 @@ def group_max(ndarray[groupby_t, ndim=2] out,
669669

670670
@cython.wraparound(False)
671671
@cython.boundscheck(False)
672-
def group_min(ndarray[groupby_t, ndim=2] out,
673-
ndarray[int64_t] counts,
674-
ndarray[groupby_t, ndim=2] values,
675-
ndarray[int64_t] labels,
672+
def group_min(groupby_t[:, :] out,
673+
int64_t[:] counts,
674+
groupby_t[:, :] values,
675+
const int64_t[:] labels,
676676
Py_ssize_t min_count=-1):
677677
"""
678678
Only aggregates on axis=0
@@ -731,9 +731,9 @@ def group_min(ndarray[groupby_t, ndim=2] out,
731731

732732
@cython.boundscheck(False)
733733
@cython.wraparound(False)
734-
def group_cummin(ndarray[groupby_t, ndim=2] out,
735-
ndarray[groupby_t, ndim=2] values,
736-
ndarray[int64_t] labels,
734+
def group_cummin(groupby_t[:, :] out,
735+
groupby_t[:, :] values,
736+
const int64_t[:] labels,
737737
bint is_datetimelike):
738738
"""
739739
Only transforms on axis=0
@@ -779,9 +779,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,
779779

780780
@cython.boundscheck(False)
781781
@cython.wraparound(False)
782-
def group_cummax(ndarray[groupby_t, ndim=2] out,
783-
ndarray[groupby_t, ndim=2] values,
784-
ndarray[int64_t] labels,
782+
def group_cummax(groupby_t[:, :] out,
783+
groupby_t[:, :] values,
784+
const int64_t[:] labels,
785785
bint is_datetimelike):
786786
"""
787787
Only transforms on axis=0

pandas/_libs/hashtable.pyx

+10-8
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ include "hashtable_class_helper.pxi"
5252
include "hashtable_func_helper.pxi"
5353

5454
cdef class Factorizer:
55-
cdef public PyObjectHashTable table
56-
cdef public ObjectVector uniques
57-
cdef public Py_ssize_t count
55+
cdef public:
56+
PyObjectHashTable table
57+
ObjectVector uniques
58+
Py_ssize_t count
5859

5960
def __init__(self, size_hint):
6061
self.table = PyObjectHashTable(size_hint)
@@ -96,9 +97,10 @@ cdef class Factorizer:
9697

9798

9899
cdef class Int64Factorizer:
99-
cdef public Int64HashTable table
100-
cdef public Int64Vector uniques
101-
cdef public Py_ssize_t count
100+
cdef public:
101+
Int64HashTable table
102+
Int64Vector uniques
103+
Py_ssize_t count
102104

103105
def __init__(self, size_hint):
104106
self.table = Int64HashTable(size_hint)
@@ -140,7 +142,7 @@ cdef class Int64Factorizer:
140142

141143
@cython.wraparound(False)
142144
@cython.boundscheck(False)
143-
def unique_label_indices(ndarray[int64_t, ndim=1] labels):
145+
def unique_label_indices(const int64_t[:] labels):
144146
"""
145147
indices of the first occurrences of the unique labels
146148
*excluding* -1. equivalent to:
@@ -168,6 +170,6 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
168170
kh_destroy_int64(table)
169171

170172
arr = idx.to_array()
171-
arr = arr[labels[arr].argsort()]
173+
arr = arr[np.asarray(labels)[arr].argsort()]
172174

173175
return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr

pandas/_libs/hashtable_class_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ cdef class {{name}}HashTable(HashTable):
322322
self.table.vals[k] = <Py_ssize_t>values[i]
323323

324324
@cython.boundscheck(False)
325-
def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values):
325+
def map_locations(self, const {{dtype}}_t[:] values):
326326
cdef:
327327
Py_ssize_t i, n = len(values)
328328
int ret = 0

pandas/_libs/internals.pyx

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@ from pandas._libs.algos import ensure_int64
2323

2424
cdef class BlockPlacement:
2525
# __slots__ = '_as_slice', '_as_array', '_len'
26-
cdef slice _as_slice
27-
cdef object _as_array
26+
cdef:
27+
slice _as_slice
28+
object _as_array
2829

29-
cdef bint _has_slice, _has_array, _is_known_slice_like
30+
bint _has_slice, _has_array, _is_known_slice_like
3031

3132
def __init__(self, val):
3233
cdef:

0 commit comments

Comments
 (0)