Skip to content

Commit 22af130

Browse files
committed
Merge pull request #1 from insertinterestingnamehere/fused_cleanup
Fused cleanup
2 parents f0706b1 + 8bb17cb commit 22af130

File tree

3 files changed

+64
-70
lines changed

3 files changed

+64
-70
lines changed

pandas/core/algorithms.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
232232
values = PeriodIndex(values, name=name)
233233

234234
values = values.view(np.int64)
235-
keys, counts = htable.value_count_int64(values)
235+
keys, counts = htable.value_count_scalar64(values, dropna)
236236

237237
if dropna:
238238
from pandas.tslib import iNaT
@@ -244,10 +244,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
244244

245245
elif com.is_integer_dtype(dtype):
246246
values = com._ensure_int64(values)
247-
keys, counts = htable.value_count_int64(values)
247+
keys, counts = htable.value_count_scalar64(values, dropna)
248248
elif com.is_float_dtype(dtype):
249249
values = com._ensure_float64(values)
250-
keys, counts = htable.value_count_float64(values, dropna)
250+
keys, counts = htable.value_count_scalar64(values, dropna)
251251

252252
else:
253253
values = com._ensure_object(values)

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def value_counts(self, dropna=True):
10301030
from pandas.core.index import CategoricalIndex
10311031

10321032
cat = self.dropna() if dropna else self
1033-
keys, counts = htable.value_count_int64(com._ensure_int64(cat._codes))
1033+
keys, counts = htable.value_count_scalar64(com._ensure_int64(cat._codes), dropna)
10341034
result = Series(counts, index=keys)
10351035

10361036
ix = np.arange(len(cat.categories), dtype='int64')

pandas/hashtable.pyx

+60-66
Original file line numberDiff line numberDiff line change
@@ -866,96 +866,90 @@ cdef class Int64Factorizer:
866866
self.count = len(self.uniques)
867867
return labels
868868

869+
ctypedef fused kh_scalar64:
870+
kh_int64_t
871+
kh_float64_t
872+
869873
@cython.boundscheck(False)
870-
cdef build_count_table_float64(float64_t[:] values, kh_float64_t *table, bint dropna):
874+
cdef build_count_table_scalar64(sixty_four_bit_scalar[:] values,
875+
kh_scalar64 *table, bint dropna):
871876
cdef:
872877
khiter_t k
873878
Py_ssize_t i, n = len(values)
874-
float64_t val
879+
sixty_four_bit_scalar val
875880
int ret = 0
876881

877-
with nogil:
878-
kh_resize_float64(table, n)
882+
if sixty_four_bit_scalar is float64_t and kh_scalar64 is kh_float64_t:
883+
with nogil:
884+
kh_resize_float64(table, n)
879885

880-
for i in range(n):
881-
val = values[i]
882-
if val == val or not dropna:
883-
k = kh_get_float64(table, val)
886+
for i in range(n):
887+
val = values[i]
888+
if val == val or not dropna:
889+
k = kh_get_float64(table, val)
890+
if k != table.n_buckets:
891+
table.vals[k] += 1
892+
else:
893+
k = kh_put_float64(table, val, &ret)
894+
table.vals[k] = 1
895+
elif sixty_four_bit_scalar is int64_t and kh_scalar64 is kh_int64_t:
896+
with nogil:
897+
kh_resize_int64(table, n)
898+
899+
for i in range(n):
900+
val = values[i]
901+
k = kh_get_int64(table, val)
884902
if k != table.n_buckets:
885903
table.vals[k] += 1
886904
else:
887-
k = kh_put_float64(table, val, &ret)
905+
k = kh_put_int64(table, val, &ret)
888906
table.vals[k] = 1
907+
else:
908+
raise ValueError("Table type must match scalar type.")
909+
910+
889911

890912
@cython.boundscheck(False)
891-
cpdef value_count_float64(float64_t[:] values, bint dropna):
913+
cpdef value_count_scalar64(sixty_four_bit_scalar[:] values, bint dropna):
892914
cdef:
893915
Py_ssize_t i
894-
kh_float64_t * table
895-
float64_t[:] result_keys
916+
kh_float64_t *ftable
917+
kh_int64_t *itable
918+
sixty_four_bit_scalar[:] result_keys
896919
int64_t[:] result_counts
897920
int k
898921

899-
table = kh_init_float64()
900-
build_count_table_float64(values, table, dropna)
901-
902922
i = 0
903-
result_keys = np.empty(table.n_occupied, dtype=np.float64)
904-
result_counts = np.zeros(table.n_occupied, dtype=np.int64)
905923

906-
with nogil:
907-
for k in range(table.n_buckets):
908-
if kh_exist_float64(table, k):
909-
result_keys[i] = table.keys[k]
910-
result_counts[i] = table.vals[k]
911-
i += 1
912-
kh_destroy_float64(table)
924+
if sixty_four_bit_scalar is float64_t:
925+
ftable = kh_init_float64()
926+
build_count_table_scalar64(values, ftable, dropna)
913927

914-
return np.asarray(result_keys), np.asarray(result_counts)
928+
result_keys = np.empty(ftable.n_occupied, dtype=np.float64)
929+
result_counts = np.zeros(ftable.n_occupied, dtype=np.int64)
915930

916-
@cython.boundscheck(False)
917-
cdef build_count_table_int64(int64_t[:] values, kh_int64_t *table):
918-
cdef:
919-
khiter_t k
920-
Py_ssize_t i, n = len(values)
921-
int64_t val
922-
int ret = 0
923-
924-
with nogil:
925-
kh_resize_int64(table, n)
926-
927-
for i in range(n):
928-
val = values[i]
929-
k = kh_get_int64(table, val)
930-
if k != table.n_buckets:
931-
table.vals[k] += 1
932-
else:
933-
k = kh_put_int64(table, val, &ret)
934-
table.vals[k] = 1
935-
936-
937-
@cython.boundscheck(False)
938-
cpdef value_count_int64(int64_t[:] values):
939-
cdef:
940-
Py_ssize_t i
941-
kh_int64_t *table
942-
int64_t[:] result_keys, result_counts
943-
int k
931+
with nogil:
932+
for k in range(ftable.n_buckets):
933+
if kh_exist_float64(ftable, k):
934+
result_keys[i] = ftable.keys[k]
935+
result_counts[i] = ftable.vals[k]
936+
i += 1
937+
kh_destroy_float64(ftable)
944938

945-
table = kh_init_int64()
946-
build_count_table_int64(values, table)
939+
elif sixty_four_bit_scalar is int64_t:
940+
itable = kh_init_int64()
941+
build_count_table_scalar64(values, itable, dropna)
947942

948-
i = 0
949-
result_keys = np.empty(table.n_occupied, dtype=np.int64)
950-
result_counts = np.zeros(table.n_occupied, dtype=np.int64)
943+
result_keys = np.empty(itable.n_occupied, dtype=np.int64)
944+
result_counts = np.zeros(itable.n_occupied, dtype=np.int64)
951945

952-
with nogil:
953-
for k in range(table.n_buckets):
954-
if kh_exist_int64(table, k):
955-
result_keys[i] = table.keys[k]
956-
result_counts[i] = table.vals[k]
957-
i += 1
958-
kh_destroy_int64(table)
946+
with nogil:
947+
for k in range(itable.n_buckets):
948+
if kh_exist_int64(itable, k):
949+
result_keys[i] = itable.keys[k]
950+
result_counts[i] = itable.vals[k]
951+
i += 1
952+
kh_destroy_int64(itable)
959953

960954
return np.asarray(result_keys), np.asarray(result_counts)
961955

@@ -1047,7 +1041,7 @@ def mode_int64(int64_t[:] values):
10471041

10481042
table = kh_init_int64()
10491043

1050-
build_count_table_int64(values, table)
1044+
build_count_table_scalar64(values, table, 0)
10511045

10521046
modes = np.empty(table.n_buckets, dtype=np.int64)
10531047

0 commit comments

Comments
 (0)