makepath
diff --git a/‎examples/user_guide/0_Getting_Setup.ipynb
+75 b/‎examples/user_guide/0_Getting_Setup.ipynb
+75
diff --git a/‎examples/user-guide.ipynb renamed to ‎examples/user_guide/1_Surface.ipynb
+167-1,646 b/‎examples/user-guide.ipynb renamed to ‎examples/user_guide/1_Surface.ipynb
+167-1,646
diff --git a/‎examples/user_guide/2_Proximity.ipynb
+557 b/‎examples/user_guide/2_Proximity.ipynb
+557
diff --git a/‎examples/user_guide/3_Zonal.ipynb
+488 b/‎examples/user_guide/3_Zonal.ipynb
+488
diff --git a/‎examples/user_guide/5_Classification.ipynb
+396 b/‎examples/user_guide/5_Classification.ipynb
+396
diff --git a/‎examples/user_guide/8_Remote_Sensing.ipynb
+343 b/‎examples/user_guide/8_Remote_Sensing.ipynb
+343
diff --git a/‎examples/user_guide/9_Pathfinding.ipynb
+438 b/‎examples/user_guide/9_Pathfinding.ipynb
+438
diff --git a/‎setup.py
-1 b/‎setup.py
-1
diff --git a/‎xrspatial/classify.py
+34-10 b/‎xrspatial/classify.py
+34-10
@@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Xarray-spatial\n",
+    "### User Guide: Getting setup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The package can be install easily via conda or pip. \n",
+    "\n",
+    "#### To install this package with conda run:\n",
+    "- conda install -c conda-forge xarray-spatial\n",
+    "- conda install -c makepath xarray-spatial\n",
+    "\n",
+    "#### To install with pip run:\n",
+    "- pip install xarray-spatial\n",
+    "\n",
+    "#### To verify whether the installation successed, open a Python session and import the package:\n",
+    "- import xrspatial"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.10"
+  },
+  "nbTranslate": {
+   "displayLangs": [
+    "*"
+   ],
+   "hotkey": "alt-t",
+   "langInMainMenu": true,
+   "sourceLang": "en",
+   "targetLang": "fr",
+   "useGoogleTranslate": true
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -18,7 +18,6 @@
         'pandas',
         'pillow',
         'requests',
-        'scikit-learn',
         'scipy',
         'xarray',
         ],
 
@@ -4,6 +4,9 @@
 from datashader.colors import rgb
 from datashader.utils import ngjit
 from xarray import DataArray
+
+from numpy.random import RandomState
+
 import warnings
 warnings.simplefilter('default')
 
@@ -238,12 +241,7 @@ def _jenks(data, n_classes):
     return kclass
 
 
-def _kmeans(agg, k=5):
-    centroids = _jenks(agg.data.flatten(), k)
-    return centroids[1:]
-
-
-def natural_breaks(agg, name='natural_breaks', k=5):
+def natural_breaks(agg, num_sample=None, name='natural_breaks', k=5):
     """
     Calculate Jenks natural breaks (a.k.a kmeans in one dimension)
     for an input raster xarray.
@@ -252,9 +250,14 @@ def natural_breaks(agg, name='natural_breaks', k=5):
     ----------
     agg : xarray.DataArray
         xarray.DataArray of values to bin
+    num_sample: int (optional)
+        Number of sample data points used to fit the model.
+        Natural Breaks (Jenks) classification is indeed O(n²) complexity,
+        where n is the total number of data points, i.e: agg.size
+        When n is large, we should fit the model on a small sub-sample
+        of the data instead of using the whole dataset.
     k: int
         Number of classes
-
     Returns
     -------
     natural_breaks_agg: xarray.DataArray
@@ -283,7 +286,28 @@ def natural_breaks(agg, name='natural_breaks', k=5):
            [4., 4., 4.]]
     """
 
-    uv = np.unique(agg.data)
+    num_data = agg.size
+
+    if num_sample is not None and num_sample < num_data:
+        # randomly select sample from the whole dataset
+        # create a pseudo random number generator
+        generator = RandomState(1234567890)
+        idx = [i for i in range(0, agg.size)]
+        generator.shuffle(idx)
+        sample_idx = idx[:num_sample]
+        sample_data = agg.data.flatten()[sample_idx]
+    else:
+        sample_data = agg.data.flatten()
+
+    # warning if number of total data points to fit the model bigger than 40k
+    if sample_data.size >= 40000:
+        warnings.warn('natural_breaks Warning: Natural break classification '
+                      '(Jenks) has a complexity of O(n^2), '
+                      'your classification with {} data points may take '
+                      'a long time.'.format(sample_data.size),
+                      Warning)
+
+    uv = np.unique(sample_data)
     uvk = len(uv)
 
     if uvk < k:
@@ -295,8 +319,8 @@ def natural_breaks(agg, name='natural_breaks', k=5):
         uv.sort()
         bins = uv
     else:
-        res0 = _kmeans(agg, k)
-        bins = np.array(res0)
+        centroids = _jenks(sample_data, k)
+        bins = np.array(centroids[1:])
 
     return DataArray(_bin(agg.data, bins, np.arange(uvk)),
                      name=name,