From cfd6d5e04df61525bfd41676f488d3d7b6cbbb20 Mon Sep 17 00:00:00 2001
From: Alexander Soklev <alexander.soklev@software-supreme.com>
Date: Wed, 29 Sep 2021 21:47:11 +0300
Subject: [PATCH 1/3] Optimize natural_break on large inputs

---
 xrspatial/classify.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/xrspatial/classify.py b/xrspatial/classify.py
index 13f642de..f9903e1a 100755
--- a/xrspatial/classify.py
+++ b/xrspatial/classify.py
@@ -13,7 +13,7 @@ class cupy(object):
 from datashader.colors import rgb
 from xarray import DataArray
 
-from numba import cuda
+import numba as nb
 import dask.array as da
 
 from numpy.random import RandomState
@@ -112,7 +112,7 @@ def _run_dask_numpy_bin(data, bins, new_values):
     return out
 
 
-@cuda.jit(device=True)
+@nb.cuda.jit(device=True)
 def _gpu_bin(data, bins, new_values):
     nbins = len(bins)
     val = data[0, 0]
@@ -139,9 +139,9 @@ def _gpu_bin(data, bins, new_values):
     return out
 
 
-@cuda.jit
+@nb.cuda.jit
 def _run_gpu_bin(data, bins, new_values, out):
-    i, j = cuda.grid(2)
+    i, j = nb.cuda.grid(2)
     if (i >= 0 and i < out.shape[0] and j >= 0 and j < out.shape[1]):
         out[i, j] = _gpu_bin(data[i:i+1, j:j+1], bins, new_values)
 
@@ -440,7 +440,7 @@ def quantile(agg: xr.DataArray,
                      attrs=agg.attrs)
 
 
-@ngjit
+@nb.jit(nopython=True, parallel=True)
 def _run_numpy_jenks_matrices(data, n_classes):
     n_data = data.shape[0]
     lower_class_limits = np.zeros((n_data + 1, n_classes + 1),
@@ -453,7 +453,7 @@ def _run_numpy_jenks_matrices(data, n_classes):
     nl = data.shape[0] + 1
     variance = 0.0
 
-    for l in range(2, nl): # noqa
+    for l in nb.prange(2, nl): # noqa
         sum = 0.0
         sum_squares = 0.0
         w = 0.0
@@ -494,7 +494,7 @@ def _run_numpy_jenks_matrices(data, n_classes):
     return lower_class_limits, var_combinations
 
 
-@ngjit
+@nb.jit(nopython=True)
 def _run_numpy_jenks(data, n_classes):
     # ported from existing cython implementation:
     # https://github.com/perrygeo/jenks/blob/master/jenks.pyx
@@ -526,7 +526,7 @@ def _run_numpy_natural_break(data, num_sample, k):
         # randomly select sample from the whole dataset
         # create a pseudo random number generator
         generator = RandomState(1234567890)
-        idx = [i for i in range(0, data.size)]
+        idx = np.linspace(0, data.size, data.size, endpoint=False, dtype=np.uint32)
         generator.shuffle(idx)
         sample_idx = idx[:num_sample]
         sample_data = data.flatten()[sample_idx]
@@ -544,7 +544,8 @@ def _run_numpy_natural_break(data, num_sample, k):
                           Warning)
 
     # only include non-nan values
-    sample_data = np.asarray([i for i in sample_data if np.isfinite(i)])
+    if not isinstance(sample_data, np.ndarray):
+        sample_data = np.asarray(sample_data)
 
     uv = np.unique(sample_data[np.isfinite(sample_data)])
     uvk = len(uv)

From b5b094d888d8c881e73ab5e1f34aab28d5ab1f84 Mon Sep 17 00:00:00 2001
From: Alexander Soklev <a.soklev@gmail.com>
Date: Wed, 29 Sep 2021 22:29:23 +0300
Subject: [PATCH 2/3] Split a long line to adhere to linter rules

---
 xrspatial/classify.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/xrspatial/classify.py b/xrspatial/classify.py
index f9903e1a..6a67440e 100755
--- a/xrspatial/classify.py
+++ b/xrspatial/classify.py
@@ -526,7 +526,9 @@ def _run_numpy_natural_break(data, num_sample, k):
         # randomly select sample from the whole dataset
         # create a pseudo random number generator
         generator = RandomState(1234567890)
-        idx = np.linspace(0, data.size, data.size, endpoint=False, dtype=np.uint32)
+        idx = np.linspace(
+            0, data.size, data.size, endpoint=False, dtype=np.uint32
+        )
         generator.shuffle(idx)
         sample_idx = idx[:num_sample]
         sample_data = data.flatten()[sample_idx]

From 93771caf6c418faf690d082e0d2352b8178f3f97 Mon Sep 17 00:00:00 2001
From: Alexander Soklev <alexander.soklev@software-supreme.com>
Date: Thu, 30 Sep 2021 18:59:40 +0300
Subject: [PATCH 3/3] Fix a bug in natural_breaks

A uv array is being calculated that strips all NaNs and Infs from the input data,
but then instead of the uv array we pass on the original data.
---
 xrspatial/classify.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xrspatial/classify.py b/xrspatial/classify.py
index 6a67440e..bd2fa329 100755
--- a/xrspatial/classify.py
+++ b/xrspatial/classify.py
@@ -563,7 +563,7 @@ def _run_numpy_natural_break(data, num_sample, k):
         uv.sort()
         bins = uv
     else:
-        centroids = _run_numpy_jenks(sample_data, k)
+        centroids = _run_numpy_jenks(uv, k)
         bins = np.array(centroids[1:])
 
     out = _bin(data, bins, np.arange(uvk))