|
1 |
| -from typing import Hashable, Tuple |
| 1 | +from functools import wraps |
| 2 | +from typing import Callable, Hashable, Tuple |
2 | 3 |
|
3 | 4 | import dask.array as da
|
4 | 5 | import numpy as np
|
5 | 6 | import xarray as xr
|
6 | 7 | from dask.array import Array
|
| 8 | +from numba import guvectorize |
7 | 9 | from xarray import DataArray, Dataset
|
8 | 10 |
|
9 | 11 | from ..typing import ArrayLike
|
@@ -109,3 +111,210 @@ def map_blocks_asnumpy(x: Array) -> Array:
|
109 | 111 |
|
110 | 112 | x = x.map_blocks(cp.asnumpy)
|
111 | 113 | return x
|
| 114 | + |
| 115 | + |
| 116 | +def cohort_reduction(gufunc: Callable) -> Callable: |
| 117 | + @wraps(gufunc) |
| 118 | + def func(x: ArrayLike, cohort: ArrayLike, n: int, axis: int = -1) -> ArrayLike: |
| 119 | + out = da.apply_gufunc( |
| 120 | + gufunc, |
| 121 | + gufunc.ufunc.signature, |
| 122 | + da.swapaxes(x, axis, -1), |
| 123 | + cohort, |
| 124 | + np.empty(n, np.int8), |
| 125 | + ) |
| 126 | + return da.swapaxes(out, axis, -1) |
| 127 | + |
| 128 | + return func |
| 129 | + |
| 130 | + |
| 131 | +@cohort_reduction |
| 132 | +@guvectorize( |
| 133 | + [ |
| 134 | + "(uint8[:], int64[:], int8[:], uint64[:])", |
| 135 | + "(uint64[:], int64[:], int8[:], uint64[:])", |
| 136 | + "(int8[:], int64[:], int8[:], int64[:])", |
| 137 | + "(int64[:], int64[:], int8[:], int64[:])", |
| 138 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 139 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 140 | + ], |
| 141 | + "(n),(n),(c)->(c)", |
| 142 | +) |
| 143 | +def cohort_sum( |
| 144 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 145 | +) -> ArrayLike: |
| 146 | + """Sum of values by cohort. |
| 147 | +
|
| 148 | + Parameters |
| 149 | + ---------- |
| 150 | + x |
| 151 | + Array of values corresponding to each sample. |
| 152 | + cohort |
| 153 | + Array of integers indicating the cohort membership of |
| 154 | + each sample with negative values indicating no cohort. |
| 155 | + n |
| 156 | + Number of cohorts. |
| 157 | + axis |
| 158 | + The axis of array x corresponding to samples (defaults |
| 159 | + to final axis). |
| 160 | +
|
| 161 | + Returns |
| 162 | + ------- |
| 163 | + An array with the same number of dimensions as x in which |
| 164 | + the sample axis has been replaced with a cohort axis of |
| 165 | + size n. |
| 166 | + """ |
| 167 | + out[:] = 0 |
| 168 | + n = len(x) |
| 169 | + for i in range(n): |
| 170 | + c = cohort[i] |
| 171 | + if c >= 0: |
| 172 | + out[c] += x[i] |
| 173 | + return |
| 174 | + |
| 175 | + |
| 176 | +@cohort_reduction |
| 177 | +@guvectorize( |
| 178 | + [ |
| 179 | + "(uint8[:], int64[:], int8[:], uint64[:])", |
| 180 | + "(uint64[:], int64[:], int8[:], uint64[:])", |
| 181 | + "(int8[:], int64[:], int8[:], int64[:])", |
| 182 | + "(int64[:], int64[:], int8[:], int64[:])", |
| 183 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 184 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 185 | + ], |
| 186 | + "(n),(n),(c)->(c)", |
| 187 | +) |
| 188 | +def cohort_nansum( |
| 189 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 190 | +) -> ArrayLike: |
| 191 | + """Sum of values by cohort ignoring nan values. |
| 192 | +
|
| 193 | + Parameters |
| 194 | + ---------- |
| 195 | + x |
| 196 | + Array of values corresponding to each sample. |
| 197 | + cohort |
| 198 | + Array of integers indicating the cohort membership of |
| 199 | + each sample with negative values indicating no cohort. |
| 200 | + n |
| 201 | + Number of cohorts. |
| 202 | + axis |
| 203 | + The axis of array x corresponding to samples (defaults |
| 204 | + to final axis). |
| 205 | +
|
| 206 | + Returns |
| 207 | + ------- |
| 208 | + An array with the same number of dimensions as x in which |
| 209 | + the sample axis has been replaced with a cohort axis of |
| 210 | + size n. |
| 211 | + """ |
| 212 | + out[:] = 0 |
| 213 | + n = len(x) |
| 214 | + for i in range(n): |
| 215 | + c = cohort[i] |
| 216 | + v = x[i] |
| 217 | + if (not np.isnan(v)) and (c >= 0): |
| 218 | + out[cohort[i]] += v |
| 219 | + return |
| 220 | + |
| 221 | + |
| 222 | +@cohort_reduction |
| 223 | +@guvectorize( |
| 224 | + [ |
| 225 | + "(uint8[:], int64[:], int8[:], float64[:])", |
| 226 | + "(uint64[:], int64[:], int8[:], float64[:])", |
| 227 | + "(int8[:], int64[:], int8[:], float64[:])", |
| 228 | + "(int64[:], int64[:], int8[:], float64[:])", |
| 229 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 230 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 231 | + ], |
| 232 | + "(n),(n),(c)->(c)", |
| 233 | +) |
| 234 | +def cohort_mean( |
| 235 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 236 | +) -> ArrayLike: |
| 237 | + """Mean of values by cohort. |
| 238 | +
|
| 239 | + Parameters |
| 240 | + ---------- |
| 241 | + x |
| 242 | + Array of values corresponding to each sample. |
| 243 | + cohort |
| 244 | + Array of integers indicating the cohort membership of |
| 245 | + each sample with negative values indicating no cohort. |
| 246 | + n |
| 247 | + Number of cohorts. |
| 248 | + axis |
| 249 | + The axis of array x corresponding to samples (defaults |
| 250 | + to final axis). |
| 251 | +
|
| 252 | + Returns |
| 253 | + ------- |
| 254 | + An array with the same number of dimensions as x in which |
| 255 | + the sample axis has been replaced with a cohort axis of |
| 256 | + size n. |
| 257 | + """ |
| 258 | + out[:] = 0 |
| 259 | + n = len(x) |
| 260 | + c = len(_) |
| 261 | + count = np.zeros(c) |
| 262 | + for i in range(n): |
| 263 | + j = cohort[i] |
| 264 | + if j >= 0: |
| 265 | + out[j] += x[i] |
| 266 | + count[j] += 1 |
| 267 | + for j in range(c): |
| 268 | + out[j] /= count[j] |
| 269 | + return |
| 270 | + |
| 271 | + |
| 272 | +@cohort_reduction |
| 273 | +@guvectorize( |
| 274 | + [ |
| 275 | + "(uint8[:], int64[:], int8[:], float64[:])", |
| 276 | + "(uint64[:], int64[:], int8[:], float64[:])", |
| 277 | + "(int8[:], int64[:], int8[:], float64[:])", |
| 278 | + "(int64[:], int64[:], int8[:], float64[:])", |
| 279 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 280 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 281 | + ], |
| 282 | + "(n),(n),(c)->(c)", |
| 283 | +) |
| 284 | +def cohort_nanmean( |
| 285 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 286 | +) -> ArrayLike: |
| 287 | + """Mean of values by cohort ignoring nan values. |
| 288 | +
|
| 289 | + Parameters |
| 290 | + ---------- |
| 291 | + x |
| 292 | + Array of values corresponding to each sample. |
| 293 | + cohort |
| 294 | + Array of integers indicating the cohort membership of |
| 295 | + each sample with negative values indicating no cohort. |
| 296 | + n |
| 297 | + Number of cohorts. |
| 298 | + axis |
| 299 | + The axis of array x corresponding to samples (defaults |
| 300 | + to final axis). |
| 301 | +
|
| 302 | + Returns |
| 303 | + ------- |
| 304 | + An array with the same number of dimensions as x in which |
| 305 | + the sample axis has been replaced with a cohort axis of |
| 306 | + size n. |
| 307 | + """ |
| 308 | + out[:] = 0 |
| 309 | + n = len(x) |
| 310 | + c = len(_) |
| 311 | + count = np.zeros(c) |
| 312 | + for i in range(n): |
| 313 | + j = cohort[i] |
| 314 | + v = x[i] |
| 315 | + if (not np.isnan(v)) and (j >= 0): |
| 316 | + out[j] += v |
| 317 | + count[j] += 1 |
| 318 | + for j in range(c): |
| 319 | + out[j] /= count[j] |
| 320 | + return |
0 commit comments