|
1 |
| -from typing import Hashable, Tuple |
| 1 | +from functools import wraps |
| 2 | +from typing import Callable, Hashable, Tuple |
2 | 3 |
|
3 | 4 | import dask.array as da
|
4 | 5 | import numpy as np
|
5 | 6 | import xarray as xr
|
6 | 7 | from dask.array import Array
|
| 8 | +from numba import guvectorize |
7 | 9 | from xarray import DataArray, Dataset
|
8 | 10 |
|
9 | 11 | from ..typing import ArrayLike
|
@@ -109,3 +111,215 @@ def map_blocks_asnumpy(x: Array) -> Array:
|
109 | 111 |
|
110 | 112 | x = x.map_blocks(cp.asnumpy)
|
111 | 113 | return x
|
| 114 | + |
| 115 | + |
| 116 | +def cohort_reduction(gufunc: Callable) -> Callable: |
| 117 | + @wraps(gufunc) |
| 118 | + def func(x: ArrayLike, cohort: ArrayLike, n: int, axis: int = -1) -> ArrayLike: |
| 119 | + x = da.swapaxes(da.asarray(x), axis, -1) |
| 120 | + replaced = len(x.shape) - 1 |
| 121 | + chunks = x.chunks[0:-1] + (n,) |
| 122 | + out = da.map_blocks( |
| 123 | + gufunc, |
| 124 | + x, |
| 125 | + cohort, |
| 126 | + np.empty(n, np.int8), |
| 127 | + chunks=chunks, |
| 128 | + drop_axis=replaced, |
| 129 | + new_axis=replaced, |
| 130 | + ) |
| 131 | + return da.swapaxes(out, axis, -1) |
| 132 | + |
| 133 | + return func |
| 134 | + |
| 135 | + |
| 136 | +@cohort_reduction |
| 137 | +@guvectorize( |
| 138 | + [ |
| 139 | + "(uint8[:], int64[:], int8[:], uint64[:])", |
| 140 | + "(uint64[:], int64[:], int8[:], uint64[:])", |
| 141 | + "(int8[:], int64[:], int8[:], int64[:])", |
| 142 | + "(int64[:], int64[:], int8[:], int64[:])", |
| 143 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 144 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 145 | + ], |
| 146 | + "(n),(n),(c)->(c)", |
| 147 | +) |
| 148 | +def cohort_sum( |
| 149 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 150 | +) -> None: # pragma: no cover |
| 151 | + """Sum of values by cohort. |
| 152 | +
|
| 153 | + Parameters |
| 154 | + ---------- |
| 155 | + x |
| 156 | + Array of values corresponding to each sample. |
| 157 | + cohort |
| 158 | + Array of integers indicating the cohort membership of |
| 159 | + each sample with negative values indicating no cohort. |
| 160 | + n |
| 161 | + Number of cohorts. |
| 162 | + axis |
| 163 | + The axis of array x corresponding to samples (defaults |
| 164 | + to final axis). |
| 165 | +
|
| 166 | + Returns |
| 167 | + ------- |
| 168 | + An array with the same number of dimensions as x in which |
| 169 | + the sample axis has been replaced with a cohort axis of |
| 170 | + size n. |
| 171 | + """ |
| 172 | + out[:] = 0 |
| 173 | + n = len(x) |
| 174 | + for i in range(n): |
| 175 | + c = cohort[i] |
| 176 | + if c >= 0: |
| 177 | + out[c] += x[i] |
| 178 | + return |
| 179 | + |
| 180 | + |
| 181 | +@cohort_reduction |
| 182 | +@guvectorize( |
| 183 | + [ |
| 184 | + "(uint8[:], int64[:], int8[:], uint64[:])", |
| 185 | + "(uint64[:], int64[:], int8[:], uint64[:])", |
| 186 | + "(int8[:], int64[:], int8[:], int64[:])", |
| 187 | + "(int64[:], int64[:], int8[:], int64[:])", |
| 188 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 189 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 190 | + ], |
| 191 | + "(n),(n),(c)->(c)", |
| 192 | +) |
| 193 | +def cohort_nansum( |
| 194 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 195 | +) -> None: # pragma: no cover |
| 196 | + """Sum of values by cohort ignoring nan values. |
| 197 | +
|
| 198 | + Parameters |
| 199 | + ---------- |
| 200 | + x |
| 201 | + Array of values corresponding to each sample. |
| 202 | + cohort |
| 203 | + Array of integers indicating the cohort membership of |
| 204 | + each sample with negative values indicating no cohort. |
| 205 | + n |
| 206 | + Number of cohorts. |
| 207 | + axis |
| 208 | + The axis of array x corresponding to samples (defaults |
| 209 | + to final axis). |
| 210 | +
|
| 211 | + Returns |
| 212 | + ------- |
| 213 | + An array with the same number of dimensions as x in which |
| 214 | + the sample axis has been replaced with a cohort axis of |
| 215 | + size n. |
| 216 | + """ |
| 217 | + out[:] = 0 |
| 218 | + n = len(x) |
| 219 | + for i in range(n): |
| 220 | + c = cohort[i] |
| 221 | + v = x[i] |
| 222 | + if (not np.isnan(v)) and (c >= 0): |
| 223 | + out[cohort[i]] += v |
| 224 | + return |
| 225 | + |
| 226 | + |
| 227 | +@cohort_reduction |
| 228 | +@guvectorize( |
| 229 | + [ |
| 230 | + "(uint8[:], int64[:], int8[:], float64[:])", |
| 231 | + "(uint64[:], int64[:], int8[:], float64[:])", |
| 232 | + "(int8[:], int64[:], int8[:], float64[:])", |
| 233 | + "(int64[:], int64[:], int8[:], float64[:])", |
| 234 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 235 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 236 | + ], |
| 237 | + "(n),(n),(c)->(c)", |
| 238 | +) |
| 239 | +def cohort_mean( |
| 240 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 241 | +) -> None: # pragma: no cover |
| 242 | + """Mean of values by cohort. |
| 243 | +
|
| 244 | + Parameters |
| 245 | + ---------- |
| 246 | + x |
| 247 | + Array of values corresponding to each sample. |
| 248 | + cohort |
| 249 | + Array of integers indicating the cohort membership of |
| 250 | + each sample with negative values indicating no cohort. |
| 251 | + n |
| 252 | + Number of cohorts. |
| 253 | + axis |
| 254 | + The axis of array x corresponding to samples (defaults |
| 255 | + to final axis). |
| 256 | +
|
| 257 | + Returns |
| 258 | + ------- |
| 259 | + An array with the same number of dimensions as x in which |
| 260 | + the sample axis has been replaced with a cohort axis of |
| 261 | + size n. |
| 262 | + """ |
| 263 | + out[:] = 0 |
| 264 | + n = len(x) |
| 265 | + c = len(_) |
| 266 | + count = np.zeros(c) |
| 267 | + for i in range(n): |
| 268 | + j = cohort[i] |
| 269 | + if j >= 0: |
| 270 | + out[j] += x[i] |
| 271 | + count[j] += 1 |
| 272 | + for j in range(c): |
| 273 | + out[j] /= count[j] |
| 274 | + return |
| 275 | + |
| 276 | + |
| 277 | +@cohort_reduction |
| 278 | +@guvectorize( |
| 279 | + [ |
| 280 | + "(uint8[:], int64[:], int8[:], float64[:])", |
| 281 | + "(uint64[:], int64[:], int8[:], float64[:])", |
| 282 | + "(int8[:], int64[:], int8[:], float64[:])", |
| 283 | + "(int64[:], int64[:], int8[:], float64[:])", |
| 284 | + "(float32[:], int64[:], int8[:], float32[:])", |
| 285 | + "(float64[:], int64[:], int8[:], float64[:])", |
| 286 | + ], |
| 287 | + "(n),(n),(c)->(c)", |
| 288 | +) |
| 289 | +def cohort_nanmean( |
| 290 | + x: ArrayLike, cohort: ArrayLike, _: ArrayLike, out: ArrayLike |
| 291 | +) -> None: # pragma: no cover |
| 292 | + """Mean of values by cohort ignoring nan values. |
| 293 | +
|
| 294 | + Parameters |
| 295 | + ---------- |
| 296 | + x |
| 297 | + Array of values corresponding to each sample. |
| 298 | + cohort |
| 299 | + Array of integers indicating the cohort membership of |
| 300 | + each sample with negative values indicating no cohort. |
| 301 | + n |
| 302 | + Number of cohorts. |
| 303 | + axis |
| 304 | + The axis of array x corresponding to samples (defaults |
| 305 | + to final axis). |
| 306 | +
|
| 307 | + Returns |
| 308 | + ------- |
| 309 | + An array with the same number of dimensions as x in which |
| 310 | + the sample axis has been replaced with a cohort axis of |
| 311 | + size n. |
| 312 | + """ |
| 313 | + out[:] = 0 |
| 314 | + n = len(x) |
| 315 | + c = len(_) |
| 316 | + count = np.zeros(c) |
| 317 | + for i in range(n): |
| 318 | + j = cohort[i] |
| 319 | + v = x[i] |
| 320 | + if (not np.isnan(v)) and (j >= 0): |
| 321 | + out[j] += v |
| 322 | + count[j] += 1 |
| 323 | + for j in range(c): |
| 324 | + out[j] /= count[j] |
| 325 | + return |
0 commit comments