-
-
Notifications
You must be signed in to change notification settings - Fork 6
Add Topographic data source #150
Changes from 34 commits
ef21a70
460f895
7b3b25a
7bb892a
002ca09
b269930
11c33db
11b75a8
a6d9370
3a90c98
734693a
55f2faf
58715fc
9503301
d296272
4fb3ddc
b0ca3b0
adae2a9
a26ee82
c625278
bf3542c
8bdfaec
77c8f1d
c2aa243
c09c242
8622f5d
fae1bef
118a805
a72fc78
015a621
89aab00
b2c14c2
3a81924
2938874
5774d01
2601554
cfedd43
d7a1922
966b83d
182c5d6
9b6c2dd
cbc41f7
56e874c
2d7e1c1
afe9990
59b953e
49311d8
fca7984
6e63ba1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
from nowcasting_dataset.data_sources.data_source import ImageDataSource | ||
from nowcasting_dataset.dataset.example import Example | ||
from nowcasting_dataset.consts import TOPOGRAPHIC_DATA | ||
from nowcasting_dataset.geospatial import OSGB | ||
from rasterio.warp import Resampling | ||
from dataclasses import dataclass, InitVar | ||
from numbers import Number | ||
import pandas as pd | ||
import xarray as xr | ||
import numpy as np | ||
import rioxarray | ||
|
||
# Means computed with | ||
# out_fp = "europe_dem_1km.tif" | ||
# out = rasterio.open(out_fp) | ||
# data = out.read(masked=True) | ||
# print(np.mean(data)) | ||
# print(np.std(data)) | ||
TOPO_MEAN = xr.DataArray( | ||
data=[ | ||
365.486887, | ||
], | ||
dims=["variable"], | ||
coords={"variable": [TOPOGRAPHIC_DATA]}, | ||
).astype(np.float32) | ||
|
||
TOPO_STD = xr.DataArray( | ||
data=[ | ||
478.841369, | ||
], | ||
dims=["variable"], | ||
coords={"variable": [TOPOGRAPHIC_DATA]}, | ||
).astype(np.float32) | ||
|
||
|
||
@dataclass | ||
class TopographicDataSource(ImageDataSource): | ||
"""Add topographic/elevation map features.""" | ||
|
||
filename: str = None | ||
normalize: bool = True | ||
|
||
def __post_init__(self, image_size_pixels: int, meters_per_pixel: int): | ||
super().__post_init__(image_size_pixels, meters_per_pixel) | ||
self._shape_of_example = ( | ||
image_size_pixels, | ||
image_size_pixels, | ||
) | ||
self._data = rioxarray.open_rasterio( | ||
filename=self.filename, parse_coordinates=True, masked=True | ||
) | ||
self._data = self._data.fillna(0) # Set nodata values to 0 (mostly should be ocean) | ||
# Add CRS for later, topo maps are assumed to be in OSGB | ||
self._data.attrs["crs"] = OSGB | ||
print(self._data.shape) | ||
jacobbieker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Distance between pixels, giving their spatial extant, in meters | ||
self._stored_pixel_size_meters = abs(self._data.coords["x"][1] - self._data.coords["x"][0]) | ||
self._meters_per_pixel = meters_per_pixel | ||
|
||
def get_example( | ||
self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number | ||
) -> Example: | ||
""" | ||
Get a single example | ||
|
||
Args: | ||
t0_dt: Current datetime for the example, unused | ||
x_meters_center: Center of the example in meters in the x direction in OSGB coordinates | ||
y_meters_center: Center of the example in meters in the y direction in OSGB coordinates | ||
|
||
Returns: | ||
Example containing topographic data for the selected area | ||
""" | ||
|
||
bounding_box = self._square.bounding_box_centered_on( | ||
x_meters_center=x_meters_center, y_meters_center=y_meters_center | ||
) | ||
selected_data = self._data.sel( | ||
x=slice(bounding_box.left, bounding_box.right), | ||
y=slice(bounding_box.top, bounding_box.bottom), | ||
) | ||
if self._stored_pixel_size_meters != self._meters_per_pixel: | ||
# Rescale here to the exact size, assumes that the above is good slice | ||
# Useful if using different spatially sized grids | ||
selected_data = selected_data.rio.reproject( | ||
dst_crs=selected_data.attrs["crs"], | ||
shape=(self._square.size_pixels, self._square.size_pixels), | ||
resampling=Resampling.bilinear, | ||
) | ||
|
||
# selected_sat_data is likely to have 1 too many pixels in x and y | ||
jacobbieker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# because sel(x=slice(a, b)) is [a, b], not [a, b). So trim: | ||
selected_data = selected_data.isel( | ||
x=slice(0, self._square.size_pixels), y=slice(0, self._square.size_pixels) | ||
) | ||
|
||
selected_data = self._post_process_example(selected_data, t0_dt) | ||
if selected_data.shape != self._shape_of_example: | ||
raise RuntimeError( | ||
"Example is wrong shape! " | ||
f"x_meters_center={x_meters_center}\n" | ||
f"y_meters_center={y_meters_center}\n" | ||
f"t0_dt={t0_dt}\n" | ||
f"expected shape={self._shape_of_example}\n" | ||
f"actual shape {selected_data.shape}" | ||
) | ||
|
||
return self._put_data_into_example(selected_data) | ||
|
||
def _put_data_into_example(self, selected_data: xr.DataArray) -> Example: | ||
""" | ||
Insert the data and coordinates into an Example | ||
|
||
Args: | ||
selected_data: DataArray containing the data to insert | ||
|
||
Returns: | ||
Example containing the Topographic data | ||
""" | ||
return Example( | ||
topo_data=selected_data, | ||
topo_x_coords=selected_data.x, | ||
topo_y_coords=selected_data.y, | ||
) | ||
|
||
def _post_process_example( | ||
self, selected_data: xr.DataArray, t0_dt: pd.Timestamp | ||
) -> xr.DataArray: | ||
""" | ||
Post process the topographical data, removing an extra dim and optionally | ||
normalizing | ||
|
||
Args: | ||
selected_data: DataArray containing the topographic data | ||
t0_dt: Unused | ||
|
||
Returns: | ||
DataArray with optionally normalized data, and removed first dimension | ||
""" | ||
if self.normalize: | ||
selected_data = selected_data - TOPO_MEAN | ||
selected_data = selected_data / TOPO_STD | ||
# Shrink extra dims | ||
selected_data = selected_data.squeeze() | ||
return selected_data |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,3 +27,4 @@ plotly | |
tqdm | ||
black | ||
pre-commit | ||
rioxarray |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
""" | ||
Script that takes the SRTM 30m worldwide elevation maps and creates a | ||
single representation of the area in NetCDF and GeoTIFF formats | ||
|
||
The SRTM files can be downloaded from NASA, and are in CRS EPSG:4326 | ||
""" | ||
jacobbieker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import os.path | ||
import glob | ||
import rasterio | ||
from rasterio.merge import merge | ||
from rasterio.warp import Resampling | ||
from nowcasting_dataset.geospatial import OSGB | ||
import rioxarray | ||
|
||
dst_crs = OSGB | ||
|
||
# Go through, open all the files, combined by coords, then save out to NetCDF, or GeoTIFF | ||
files = glob.glob("/run/media/jacob/data/SRTM/*.tif") | ||
out_dir = "/run/media/jacob/data/SRTM1KM/" | ||
jacobbieker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
upscale_factor = 0.12 # 30m to 250m-ish, just making it small enough files to actually merge | ||
for f in files: | ||
with rasterio.open(f) as dataset: | ||
|
||
# resample data to target shape | ||
data = dataset.read( | ||
out_shape=( | ||
dataset.count, | ||
int(dataset.height * upscale_factor), | ||
int(dataset.width * upscale_factor), | ||
), | ||
resampling=Resampling.bilinear, | ||
) | ||
|
||
# scale image transform | ||
transform = dataset.transform * dataset.transform.scale( | ||
(dataset.width / data.shape[-1]), (dataset.height / data.shape[-2]) | ||
) | ||
name = f.split("/")[-1] | ||
# Set the nodata values to 0, as nearly all ocean. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps in a later PR, it might be nice to also give the ML model a binary map of "ocean versus land". |
||
out_meta = dataset.meta.copy() | ||
out_meta.update( | ||
{ | ||
"driver": "GTiff", | ||
"height": data.shape[1], | ||
"width": data.shape[2], | ||
"transform": transform, | ||
} | ||
) | ||
with rasterio.open(os.path.join(out_dir, name), "w", **out_meta) as dest: | ||
dest.write(data) | ||
files = glob.glob("/run/media/jacob/data/SRTM1KM/*.tif") | ||
src = rasterio.open(files[0]) | ||
|
||
mosaic, out_trans = merge(files) | ||
out_meta = src.meta.copy() | ||
out_meta.update( | ||
{ | ||
"driver": "GTiff", | ||
"height": mosaic.shape[1], | ||
"width": mosaic.shape[2], | ||
"transform": out_trans, | ||
} | ||
) | ||
out_fp = "europe_dem_250m.tif" | ||
with rasterio.open(out_fp, "w", **out_meta) as dest: | ||
dest.write(mosaic) | ||
|
||
xds = rioxarray.open_rasterio(out_fp, parse_coordinates=True) | ||
# Reproject to exactly 1km pixels now | ||
with rasterio.open(out_fp) as src: | ||
src_crs = src.crs | ||
xds.attrs["crs"] = src_crs | ||
|
||
xds_resampled = xds.rio.reproject(dst_crs=dst_crs, resolution=500, resampling=Resampling.bilinear) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry if I've missed something but where does the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, just in case we want a higher resolution map to downsample or whatever later. Its still only about 240mb I think? So still quite small. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool beans. It might be worth commenting on what the 500 means in the code? No big worries if not though! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added it! |
||
print(xds_resampled) | ||
print(abs(xds_resampled.coords["x"][1] - xds_resampled.coords["x"][0])) | ||
xds_resampled.rio.to_raster("europe_dem_500m_osgb.tif") | ||
xds_resampled = xds.rio.reproject(dst_crs=dst_crs, resolution=1000, resampling=Resampling.bilinear) | ||
print(xds_resampled) | ||
print(abs(xds_resampled.coords["x"][1] - xds_resampled.coords["x"][0])) | ||
xds_resampled.rio.to_raster("europe_dem_1km_osgb.tif") | ||
xds_resampled = xds.rio.reproject(dst_crs=dst_crs, resolution=2000, resampling=Resampling.bilinear) | ||
print(xds_resampled) | ||
print(abs(xds_resampled.coords["x"][1] - xds_resampled.coords["x"][0])) | ||
xds_resampled.rio.to_raster("europe_dem_2km_osgb.tif") |
Uh oh!
There was an error while loading. Please reload this page.