1
- # cython: infer_types=True
2
- # Fast swap step in PAM algorithm for k_medoid.
3
- # Author: Timothée Mathieu
4
- # License: 3-clause BSD
5
-
6
- cimport cython
7
- import numpy as np
8
- cimport numpy as np
9
-
10
- from sklearn.utils.extmath import row_norms
11
- from cython cimport floating
12
-
13
- from libc.stdint cimport int32_t, int64_t
14
- # instead of int and long
15
-
16
-
17
- import sys
18
- from time import time
19
-
20
- from libc.math cimport exp, log, sqrt, pow , fabs
21
- cimport numpy as np
22
- from numpy.math cimport INFINITY
23
-
24
-
25
- # Modified from sklearn.cluster._k_means_fast.pyx
26
- np.import_array()
27
-
28
- cdef floating _euclidean_dense_dense(
29
- floating* a, # IN
30
- floating* b, # IN
31
- int32_t n_features) nogil:
32
- """ Euclidean distance between a dense and b dense"""
33
- cdef:
34
- int32_t i
35
- int32_t n = n_features // 4
36
- int32_t rem = n_features % 4
37
- floating result = 0
38
-
39
- # We manually unroll the loop for better cache optimization.
40
- for i in range (n):
41
- result += ((a[0 ] - b[0 ]) * (a[0 ] - b[0 ])
42
- + (a[1 ] - b[1 ]) * (a[1 ] - b[1 ])
43
- + (a[2 ] - b[2 ]) * (a[2 ] - b[2 ])
44
- + (a[3 ] - b[3 ]) * (a[3 ] - b[3 ]))
45
- a += 4 ; b += 4
46
-
47
- for i in range (rem):
48
- result += (a[i] - b[i]) * (a[i] - b[i])
49
-
50
- return result
51
-
52
-
53
-
54
- cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim= 2 , mode= ' c' ] X,
55
- int32_t[:] labels):
56
- """ Compute inertia
57
-
58
- squared distancez between each sample and its assigned center.
59
- """
60
- if floating is float :
61
- dtype = np.float32
62
- elif floating is double :
63
- dtype = np.double
64
-
65
- cdef:
66
- int32_t n_samples = X.shape[0 ]
67
- int32_t n_features = X.shape[1 ]
68
- int32_t i, j
69
- int32_t n_classes = len (np.unique(labels))
70
- np.ndarray[floating, ndim= 2 ] centers = np.zeros([n_classes,
71
- n_features],
72
- dtype = dtype)
73
- np.ndarray[np.int32] num_in_cluster = np.zeros(n_classes, dtype = np.int32)
74
- np.ndarray[floating] inertias = np.zeros(n_samples, dtype = dtype)
75
- for i in range (n_samples):
76
- for j in range (n_features):
77
- centers[labels[i], j] += X[i, j]
78
- num_in_cluster[labels[i]] = num_in_cluster[labels[i]] + 1
79
-
80
- for i in range (n_classes):
81
- for j in range (n_features):
82
- centers[i, j] /= num_in_cluster[i]
83
-
84
- for i in range (n_samples):
85
- j = labels[i]
86
- inertias[i] = _euclidean_dense_dense(& X[i, 0 ], & centers[j, 0 ], n_features)
87
- return inertias
88
-
89
-
90
-
91
-
92
-
93
- # Regression and Classification losses, from scikit-learn.
94
-
95
-
96
-
97
-
98
- # ----------------------------------------
99
- # Extension Types for Loss Functions
100
- # ----------------------------------------
101
-
102
- cdef class LossFunction:
103
- """ Base class for convex loss functions"""
104
-
105
- cdef double loss(self , double p, double y) nogil:
106
- """ Evaluate the loss function.
107
-
108
- Parameters
109
- ----------
110
- p : double
111
- The prediction, p = w^T x
112
- y : double
113
- The true value (aka target)
114
-
115
- Returns
116
- -------
117
- double
118
- The loss evaluated at `p` and `y`.
119
- """
120
- return 0.
121
-
122
- def py_dloss (self , double p , double y ):
123
- """ Python version of `dloss` for testing.
124
-
125
- Pytest needs a python function and can't use cdef functions.
126
- """
127
- return self .dloss(p, y)
128
-
129
- def py_loss (self , double p , double y ):
130
- """ Python version of `dloss` for testing.
131
-
132
- Pytest needs a python function and can't use cdef functions.
133
- """
134
- return self .loss(p, y)
135
-
136
-
137
- cdef double dloss(self , double p, double y) nogil:
138
- """ Evaluate the derivative of the loss function with respect to
139
- the prediction `p`.
140
-
141
- Parameters
142
- ----------
143
- p : double
144
- The prediction, p = w^T x
145
- y : double
146
- The true value (aka target)
147
- Returns
148
- -------
149
- double
150
- The derivative of the loss function with regards to `p`.
151
- """
152
- return 0.
153
-
154
-
155
- cdef class Regression(LossFunction):
156
- """ Base class for loss functions for regression"""
157
1
158
2
cdef double loss(self , double p, double y) nogil:
159
3
return 0.
@@ -336,4 +180,4 @@ cdef class Huber(Regression):
336
180
return - self .c
337
181
338
182
def __reduce__ (self ):
339
- return Huber, (self .c,)
183
+ return Huber, (self .c,)
0 commit comments