1
1
import numpy as np
2
-
3
2
from xarray import DataArray
3
+ from xrspatial .utils import ngjit , lnglat_to_meters
4
+ from numba import prange
5
+ import re
6
+ import warnings
7
+
8
+ warnings .simplefilter ('default' )
9
+
10
+ DEFAULT_UNIT = 'meter'
11
+
12
+
13
+ # TODO: Make convolution more generic with numba first-class functions.
14
+
15
+
16
+ def is_number (s ):
17
+ try :
18
+ float (s )
19
+ return True
20
+ except ValueError :
21
+ return False
22
+
23
+
24
+ # modified from https://stackoverflow.com/questions/3943752/the-dateutil-parser-parse-of-distance-strings
25
+ class Distance (object ):
26
+ METER = 1
27
+ FOOT = 0.3048
28
+ KILOMETER = 1000
29
+ MILE = 1609.344
30
+ UNITS = {'meter' : METER ,
31
+ 'meters' : METER ,
32
+ 'm' : METER ,
33
+ 'feet' : FOOT ,
34
+ 'foot' : FOOT ,
35
+ 'ft' : FOOT ,
36
+ 'miles' : MILE ,
37
+ 'mls' : MILE ,
38
+ 'ml' : MILE ,
39
+ 'kilometer' : KILOMETER ,
40
+ 'kilometers' : KILOMETER ,
41
+ 'km' : KILOMETER ,
42
+ }
43
+
44
+ def __init__ (self , s ):
45
+ self .number , unit = self ._get_distance_unit (s )
46
+ self ._convert (unit )
47
+
48
+ def _get_distance_unit (self , s ):
49
+ # spit string into numbers and text
50
+ splits = [x for x in re .split (r'(-?\d*\.?\d+)' , s ) if x != '' ]
51
+ if len (splits ) not in [1 , 2 ]:
52
+ raise ValueError ("Invalid distance." )
53
+
54
+ number = splits [0 ]
55
+ unit = DEFAULT_UNIT
56
+ if len (splits ) == 1 :
57
+ warnings .warn ('Raster distance unit not provided. '
58
+ 'Use meter as default.' , Warning )
59
+ elif len (splits ) == 2 :
60
+ unit = splits [1 ]
61
+
62
+ unit = unit .lower ()
63
+ unit = unit .replace (' ' , '' )
64
+ if unit not in self .UNITS :
65
+ raise ValueError (
66
+ "Invalid value.\n "
67
+ "Distance unit should be one of the following: \n "
68
+ "meter (meter, meters, m),\n "
69
+ "kilometer (kilometer, kilometers, km),\n "
70
+ "foot (foot, feet, ft),\n "
71
+ "mile (mile, miles, ml, mls)" )
72
+ return number , unit
73
+
74
+ def _convert (self , unit ):
75
+ self .number = float (self .number )
76
+ if self .UNITS [unit ] != 1 :
77
+ self .number *= self .UNITS [unit ]
78
+
79
+ @property
80
+ def meters (self ):
81
+ return self .number
82
+
83
+ @meters .setter
84
+ def meters (self , v ):
85
+ self .number = float (v )
86
+
87
+ @property
88
+ def miles (self ):
89
+ return self .number / self .MILE
90
+
91
+ @miles .setter
92
+ def miles (self , v ):
93
+ self .number = v
94
+ self ._convert ('miles' )
95
+
96
+ @property
97
+ def feet (self ):
98
+ return self .number / self .FOOT
99
+
100
+ @feet .setter
101
+ def feet (self , v ):
102
+ self .number = v
103
+ self ._convert ('feet' )
104
+
105
+ @property
106
+ def kilometers (self ):
107
+ return self .number / self .KILOMETER
108
+
109
+ @kilometers .setter
110
+ def kilometers (self , v ):
111
+ self .number = v
112
+ self ._convert ('KILOMETER' )
4
113
5
- from xrspatial .utils import ngjit
6
114
115
+ def _calc_cell_size (raster ):
116
+ if 'unit' in raster .attrs :
117
+ unit = raster .attrs ['unit' ]
118
+ else :
119
+ unit = DEFAULT_UNIT
120
+ warnings .warn ('Raster distance unit not provided. '
121
+ 'Use meter as default.' , Warning )
7
122
8
- #TODO: Make convolution more generic with numba first-class functions.
123
+ cell_size_x = 1
124
+ cell_size_y = 1
125
+
126
+ # calculate cell size from input `raster`
127
+ for dim in raster .dims :
128
+ if (dim .lower ().count ('x' )) > 0 :
129
+ # dimension of x-coordinates
130
+ if len (raster [dim ]) > 1 :
131
+ cell_size_x = raster [dim ].values [1 ] - raster [dim ].values [0 ]
132
+ elif (dim .lower ().count ('y' )) > 0 :
133
+ # dimension of y-coordinates
134
+ if len (raster [dim ]) > 1 :
135
+ cell_size_y = raster [dim ].values [1 ] - raster [dim ].values [0 ]
136
+
137
+ lon0 , lon1 , lat0 , lat1 = None , None , None , None
138
+ for dim in raster .dims :
139
+ if (dim .lower ().count ('lon' )) > 0 :
140
+ # dimension of x-coordinates
141
+ if len (raster [dim ]) > 1 :
142
+ lon0 , lon1 = raster [dim ].values [0 ], raster [dim ].values [1 ]
143
+ elif (dim .lower ().count ('lat' )) > 0 :
144
+ # dimension of y-coordinates
145
+ if len (raster [dim ]) > 1 :
146
+ lat0 , lat1 = raster [dim ].values [0 ], raster [dim ].values [1 ]
147
+
148
+ # convert lat-lon to meters
149
+ if (lon0 , lon1 , lat0 , lat1 ) != (None , None , None , None ):
150
+ mx0 , my0 = lnglat_to_meters (lon0 , lat0 )
151
+ mx1 , my1 = lnglat_to_meters (lon1 , lat1 )
152
+ cell_size_x = mx1 - mx0
153
+ cell_size_y = my1 - my0
154
+ unit = DEFAULT_UNIT
155
+
156
+ sx = Distance (str (cell_size_x ) + unit )
157
+ sy = Distance (str (cell_size_y ) + unit )
158
+ return sx , sy
159
+
160
+
161
+ def _gen_ellipse_kernel (half_w , half_h ):
162
+ # x values of interest
163
+ x = np .linspace (- half_w , half_w , 2 * half_w + 1 )
164
+ # y values of interest, as a "column" array
165
+ y = np .linspace (- half_h , half_h , 2 * half_h + 1 )[:, None ]
166
+
167
+ # True for points inside the ellipse
168
+ # (x / a)^2 + (y / b)^2 <= 1, avoid division to avoid rounding issue
169
+ ellipse = (x * half_h ) ** 2 + (y * half_w ) ** 2 <= (half_w * half_h ) ** 2
170
+
171
+ return ellipse .astype (float )
172
+
173
+
174
+ class Kernel :
175
+ def __init__ (self , shape = 'circle' , radius = 10000 ):
176
+ self .shape = shape
177
+ self .radius = radius
178
+ self ._validate_shape ()
179
+ self ._validate_radius ()
180
+
181
+ def _validate_shape (self ):
182
+ # validate shape
183
+ if self .shape not in ['circle' ]:
184
+ raise ValueError (
185
+ "Kernel shape must be \' circle\' " )
186
+
187
+ def _validate_radius (self ):
188
+ # try to convert into Distance object
189
+ d = Distance (str (self .radius ))
190
+
191
+ def to_array (self , raster ):
192
+ # calculate cell size over the x and y axis
193
+ sx , sy = _calc_cell_size (raster )
194
+ # create Distance object of radius
195
+ sr = Distance (str (self .radius ))
196
+ if self .shape == 'circle' :
197
+ # convert radius (meter) to pixel
198
+ kernel_half_w = int (sr .meters / sx .meters )
199
+ kernel_half_h = int (sr .meters / sy .meters )
200
+ kernel = _gen_ellipse_kernel (kernel_half_w , kernel_half_h )
201
+ return kernel
9
202
10
203
11
204
@ngjit
@@ -30,6 +223,7 @@ def _mean(data, excludes):
30
223
out [y , x ] = data [y , x ]
31
224
return out
32
225
226
+
33
227
# TODO: add optional name parameter `name='mean'`
34
228
def mean (agg , passes = 1 , excludes = [np .nan ]):
35
229
"""
@@ -53,3 +247,164 @@ def mean(agg, passes=1, excludes=[np.nan]):
53
247
54
248
return DataArray (out , name = 'mean' ,
55
249
dims = agg .dims , coords = agg .coords , attrs = agg .attrs )
250
+
251
+
252
+ @ngjit
253
+ def calc_mean (array ):
254
+ return np .nanmean (array )
255
+
256
+
257
+ @ngjit
258
+ def calc_sum (array ):
259
+ return np .nansum (array )
260
+
261
+
262
+ @ngjit
263
+ def upper_bound_p_value (zscore ):
264
+ if abs (zscore ) >= 2.33 :
265
+ return 0.0099
266
+ if abs (zscore ) >= 1.65 :
267
+ return 0.0495
268
+ if abs (zscore ) >= 1.29 :
269
+ return 0.0985
270
+ return 1
271
+
272
+
273
+ @ngjit
274
+ def _hot_cold (zscore ):
275
+ if zscore > 0 :
276
+ return 1
277
+ if zscore < 0 :
278
+ return - 1
279
+ return 0
280
+
281
+
282
+ @ngjit
283
+ def _confidence (zscore ):
284
+ p_value = upper_bound_p_value (zscore )
285
+ if abs (zscore ) > 2.58 and p_value < 0.01 :
286
+ return 99
287
+ if abs (zscore ) > 1.96 and p_value < 0.05 :
288
+ return 95
289
+ if abs (zscore ) > 1.65 and p_value < 0.1 :
290
+ return 90
291
+ return 0
292
+
293
+
294
+ @ngjit
295
+ def _apply (data , kernel_array , func ):
296
+ out = np .zeros_like (data )
297
+ rows , cols = data .shape
298
+ krows , kcols = kernel_array .shape
299
+ hrows , hcols = int (krows / 2 ), int (kcols / 2 )
300
+ kernel_values = np .zeros_like (kernel_array , dtype = data .dtype )
301
+
302
+ for y in prange (rows ):
303
+ for x in prange (cols ):
304
+ # kernel values are all nans at the beginning of each step
305
+ kernel_values .fill (np .nan )
306
+ for ky in range (y - hrows , y + hrows + 1 ):
307
+ for kx in range (x - hcols , x + hcols + 1 ):
308
+ if ky >= 0 and kx >= 0 :
309
+ if ky >= 0 and ky < rows and kx >= 0 and kx < cols :
310
+ if kernel_array [ky - (y - hrows ), kx - (x - hcols )] == 1 :
311
+ kernel_values [ky - (y - hrows ), kx - (x - hcols )] = data [ky , kx ]
312
+ out [y , x ] = func (kernel_values )
313
+ return out
314
+
315
+
316
+ def apply (raster , kernel , func = calc_mean ):
317
+ # validate raster
318
+ if not isinstance (raster , DataArray ):
319
+ raise TypeError ("`raster` must be instance of DataArray" )
320
+
321
+ if raster .ndim != 2 :
322
+ raise ValueError ("`raster` must be 2D" )
323
+
324
+ if not (issubclass (raster .values .dtype .type , np .integer ) or
325
+ issubclass (raster .values .dtype .type , np .float )):
326
+ raise ValueError (
327
+ "`raster` must be an array of integers or float" )
328
+
329
+ # create kernel mask array
330
+ kernel_values = kernel .to_array (raster )
331
+ # apply kernel to raster values
332
+ out = _apply (raster .values .astype (float ), kernel_values , func )
333
+
334
+ result = DataArray (out ,
335
+ coords = raster .coords ,
336
+ dims = raster .dims ,
337
+ attrs = raster .attrs )
338
+
339
+ return result
340
+
341
+
342
+ @ngjit
343
+ def _hotspots (z_array ):
344
+ out = np .zeros_like (z_array , dtype = np .int8 )
345
+ rows , cols = z_array .shape
346
+ for y in prange (rows ):
347
+ for x in prange (cols ):
348
+ out [y , x ] = _hot_cold (z_array [y , x ]) * _confidence (z_array [y , x ])
349
+ return out
350
+
351
+
352
+ def hotspots (raster , kernel ):
353
+ """Identify statistically significant hot spots and cold spots in an input
354
+ raster. To be a statistically significant hot spot, a feature will have a
355
+ high value and be surrounded by other features with high values as well.
356
+ Neighborhood of a feature defined by the input kernel, which currently
357
+ support a shape of circle and a radius in meters.
358
+
359
+ The result should be a raster with the following 7 values:
360
+ 90 for 90% confidence high value cluster
361
+ 95 for 95% confidence high value cluster
362
+ 99 for 99% confidence high value cluster
363
+ -90 for 90% confidence low value cluster
364
+ -95 for 95% confidence low value cluster
365
+ -99 for 99% confidence low value cluster
366
+ 0 for no significance
367
+
368
+ Parameters
369
+ ----------
370
+ raster: xarray.DataArray
371
+ Input raster image with shape=(height, width)
372
+ kernel: Kernel
373
+
374
+ Returns
375
+ -------
376
+ hotspots: xarray.DataArray
377
+ """
378
+
379
+ # validate raster
380
+ if not isinstance (raster , DataArray ):
381
+ raise TypeError ("`raster` must be instance of DataArray" )
382
+
383
+ if raster .ndim != 2 :
384
+ raise ValueError ("`raster` must be 2D" )
385
+
386
+ if not (issubclass (raster .values .dtype .type , np .integer ) or
387
+ issubclass (raster .values .dtype .type , np .float )):
388
+ raise ValueError (
389
+ "`raster` must be an array of integers or float" )
390
+
391
+ # create kernel mask array
392
+ kernel_values = kernel .to_array (raster )
393
+ # apply kernel to raster values
394
+ mean_array = _apply (raster .values .astype (float ), kernel_values , calc_mean )
395
+
396
+ # calculate z-scores
397
+ global_mean = np .nanmean (raster .values )
398
+ global_std = np .nanstd (raster .values )
399
+ if global_std == 0 :
400
+ raise ZeroDivisionError ("Standard deviation "
401
+ "of the input raster values is 0." )
402
+ z_array = (mean_array - global_mean ) / global_std
403
+
404
+ out = _hotspots (z_array )
405
+ result = DataArray (out ,
406
+ coords = raster .coords ,
407
+ dims = raster .dims ,
408
+ attrs = raster .attrs )
409
+
410
+ return result
0 commit comments