makepath · brendancol · Apr 15, 2021 · Apr 14, 2021 · Apr 14, 2021 · Apr 14, 2021
diff --git a/examples/cloudless-mosaic-sentinel2.ipynb b/examples/cloudless-mosaic-sentinel2.ipynb
@@ -17,7 +17,9 @@
     "\n",
     "SENTINEL-2 (https://sentinel.esa.int/web/sentinel/user-guides/sentinel-2-msi/overview) is a wide-swath, high-resolution, multi-spectral imaging mission, supporting Copernicus Land Monitoring studies, including the monitoring of vegetation, soil and water cover, as well as observation of inland waterways and coastal areas.\n",
     "\n",
-    "## 2. Imports"
+    "## 2. Environment setup\n",
+    "\n",
+    "Necessary libraries are listed as below"
    ]
   },
   {
@@ -26,12 +28,53 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
+    "import xarray as xr\n",
+    "import datashader as ds\n",
+    "from datashader import Canvas\n",
+    "from datashader.transfer_functions import shade, Images\n",
+    "\n",
     "import stackstac\n",
     "from satsearch import Search\n",
     "\n",
-    "import xrspatial.multispectral as ms\n",
+    "import xrspatial.multispectral as ms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dask_gateway import GatewayCluster\n",
+    "from dask_gateway import Gateway\n",
+    "from distributed import Client\n",
+    "from dask.distributed import PipInstall\n",
     "\n",
-    "import matplotlib.pyplot as plt"
+    "plugin = PipInstall(packages=[\"stackstac\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create a new cluster that configured to use Dask-Gateway, and a new client that executes all Dask computations on the cluster. And we can set the mode for the cluster to be adaptive mode so that it will resize itself automatically based on the workload."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cluster = GatewayCluster()  # Creates the Dask Scheduler. Might take a minute.\n",
+    "\n",
+    "client = cluster.get_client()\n",
+    "client.register_worker_plugin(plugin)\n",
+    "\n",
+    "cluster.adapt(minimum=8, maximum=100)\n",
+    "\n",
+    "client"
    ]
   },
   {
@@ -40,7 +83,7 @@
    "source": [
     "## 3. Load Sentinel 2 data\n",
     "\n",
-    "In this example, we use data from `sentinel-s2-l2a-cogs` collection within a bounding box of `[-93.112301, 29.649001, -92.075965, 30.719868]`, and the time range considered is from `2019-07-01` to `2020-06-30`. And the collected data has less than 25% cloud coverage."
+    "In this example, we use data from `sentinel-s2-l2a-cogs` collection within a bounding box of `[-97.185642, 27.569157, -95.117574, 29.500710]`, and the time range considered is from `2019-07-01` to `2020-06-30`. And the collected data has less than 25% cloud coverage."
    ]
   },
   {
@@ -51,7 +94,7 @@
    "source": [
     "items = Search(\n",
     "    url=\"https://earth-search.aws.element84.com/v0\",\n",
-    "    bbox=[-93.112301, 29.649001, -92.075965, 30.719868],\n",
+    "    bbox=[-97.185642, 27.569157, -95.117574, 29.500710],\n",
     "    collections=[\"sentinel-s2-l2a-cogs\"],\n",
     "    query={'eo:cloud_cover': {'lt': 25}},\n",
     "    datetime=\"2019-07-01/2020-06-30\"\n",
@@ -67,7 +110,7 @@
     "Let's combine all the above STAC items into a lazy xarray with following settings:\n",
     "- projection: epsg=32613\n",
     "- resolution: 100m\n",
-    "- bands: green (B02), red (B03), blue (B04), NIR (B08), SWIR1 (B11)"
+    "- bands: red (B04), green (B03), blue (B02)"
    ]
   },
   {
@@ -76,10 +119,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bands = ['B02', 'B03', 'B04', 'B08', 'B11']\n",
-    "\n",
     "stack_ds = stackstac.stack(\n",
-    "    items, epsg=32613, resolution=100, assets=bands\n",
+    "    items, epsg=32613, resolution=100, assets=['B04', 'B03', 'B02']\n",
     ")\n",
     "\n",
     "stack_ds"
@@ -99,20 +140,10 @@
    "outputs": [],
    "source": [
     "monthly = stack_ds.resample(time=\"MS\").median(\"time\", keep_attrs=True)\n",
+    "monthly.data = monthly.data.rechunk(1024, 1024)\n",
     "monthly"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import dask.diagnostics\n",
-    "with dask.diagnostics.ProgressBar():\n",
-    "    monthly = monthly.compute()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -128,44 +159,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "median_scene = monthly.median(dim=['time'])"
+    "median_scene = monthly.median(dim=['time'])\n",
+    "median_scene.data = median_scene.data.rechunk(2048, 2048)\n",
+    "median_scene"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "With 3 bands: red, green, blue, let's see the true color using the `true_color` function from `xrspatial.multispectral module` for each separate month and the median layer."
+    "## 5. Save median layer to Azure "
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "bands_mapping = {v: i for i, v in enumerate(bands)}\n",
-    "\n",
-    "band_blue = bands_mapping['B02']\n",
-    "band_green = bands_mapping['B03']\n",
-    "band_red = bands_mapping['B04']"
+    "## 6. Downsample for visualization"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "months = 12\n",
-    "imgs = []\n",
-    "for month in range(months):\n",
-    "    # True color\n",
-    "    r = monthly[month][band_red]\n",
-    "    g = monthly[month][band_green]\n",
-    "    b = monthly[month][band_blue]\n",
-    "    img = ms.true_color(r, g, b)\n",
-    "    imgs.append(img)"
+    "With 3 bands: red, green, blue, let's see visualize the cloud-free scene we just constructed using the `true_color` function from `xrspatial.multispectral module`"
    ]
   },
   {
@@ -174,21 +191,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Utility function for displaying images\n",
+    "h, w = 600, 800\n",
+    "canvas = Canvas(plot_height=h, plot_width=w)\n",
+    "resampled_agg = canvas.raster(median_scene)\n",
     "\n",
-    "def display_images(images, columns=2, width=50, height=50):\n",
-    "    height = max(height, int(len(images)/columns) * height)\n",
-    "    plt.figure(figsize=(width, height))\n",
-    "    for i, image in enumerate(images):\n",
-    "        plt.subplot(len(images) / columns + 1, columns, i + 1)\n",
-    "        plt.imshow(image)"
+    "resampled_agg"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Monthly data"
+    "`true_color` function takes 3 bands: red, green, blue as inputs and returns a PIL.Image object"
    ]
   },
   {
@@ -197,15 +211,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# takes some time to run\n",
-    "display_images(imgs)"
+    "image = ms.true_color(resampled_agg[2], resampled_agg[1], resampled_agg[0])\n",
+    "\n",
+    "image"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Median layer"
+    "Finally, close the client and the cluster."
    ]
   },
   {
@@ -214,24 +229,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "median_scene = monthly.median(dim=['time'])\n",
-    "\n",
-    "median_red_agg = median_scene[band_red]\n",
-    "median_green_agg = median_scene[band_green]\n",
-    "median_blue_agg = median_scene[band_blue]\n",
-    "\n",
-    "median_img = ms.true_color(median_red_agg, median_green_agg, median_blue_agg)\n",
-    "\n",
-    "median_img"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### References\n",
-    "\n",
-    "- https://stackstac.readthedocs.io/en/latest/basic.html"
+    "client.close()\n",
+    "cluster.close()"
    ]
   }
  ],