Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 9ca6f47

Browse files
committed
Address PR comments
1 parent 5c72247 commit 9ca6f47

File tree

2 files changed

+3
-113
lines changed

2 files changed

+3
-113
lines changed

nowcasting_dataset/data_sources/optical_flow/optical_flow_data_source.py

+2-113
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,18 @@
99
import pandas as pd
1010
import xarray as xr
1111

12-
import nowcasting_dataset.time as nd_time
1312
from nowcasting_dataset.consts import SAT_VARIABLE_NAMES
14-
from nowcasting_dataset.data_sources.data_source import ZarrDataSource
1513
from nowcasting_dataset.data_sources.datasource_output import DataSourceOutput
1614
from nowcasting_dataset.data_sources.optical_flow.optical_flow_model import OpticalFlow
15+
from nowcasting_dataset.data_sources.satellite.satellite_data_source import SatelliteDataSource
1716

1817
_LOG = logging.getLogger("nowcasting_dataset")
1918

2019
IMAGE_BUFFER_SIZE = 16
2120

2221

2322
@dataclass
24-
class OpticalFlowDataSource(ZarrDataSource):
23+
class OpticalFlowDataSource(SatelliteDataSource):
2524
"""
2625
Optical Flow Data Source, computing flow between Satellite data
2726
@@ -45,21 +44,6 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
4544
n_channels,
4645
)
4746

48-
def open(self) -> None:
49-
"""
50-
Open Satellite data
51-
52-
We don't want to open_sat_data in __init__.
53-
If we did that, then we couldn't copy SatelliteDataSource
54-
instances into separate processes. Instead,
55-
call open() _after_ creating separate processes.
56-
"""
57-
self._data = self._open_data()
58-
self._data = self._data.sel(variable=list(self.channels))
59-
60-
def _open_data(self) -> xr.DataArray:
61-
return open_sat_data(zarr_path=self.zarr_path, consolidated=self.consolidated)
62-
6347
def get_example(
6448
self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number
6549
) -> DataSourceOutput:
@@ -282,101 +266,6 @@ def _remap_image(self, image: np.ndarray, flow: np.ndarray) -> np.ndarray:
282266
def _dataset_to_data_source_output(output: xr.Dataset) -> OpticalFlow:
283267
return OpticalFlow(output)
284268

285-
def _get_time_slice(self, t0_dt: pd.Timestamp) -> xr.DataArray:
286-
start_dt = self._get_start_dt(t0_dt)
287-
end_dt = self._get_end_dt(t0_dt)
288-
data = self.data.sel(time=slice(start_dt, end_dt))
289-
return data
290-
291-
def datetime_index(self, remove_night: bool = True) -> pd.DatetimeIndex:
292-
"""Returns a complete list of all available datetimes
293-
294-
Args:
295-
remove_night: If True then remove datetimes at night.
296-
We're interested in forecasting solar power generation, so we
297-
don't care about nighttime data :)
298-
299-
In the UK in summer, the sun rises first in the north east, and
300-
sets last in the north west [1]. In summer, the north gets more
301-
hours of sunshine per day.
302-
303-
In the UK in winter, the sun rises first in the south east, and
304-
sets last in the south west [2]. In winter, the south gets more
305-
hours of sunshine per day.
306-
307-
| | Summer | Winter |
308-
| ---: | :---: | :---: |
309-
| Sun rises first in | N.E. | S.E. |
310-
| Sun sets last in | N.W. | S.W. |
311-
| Most hours of sunlight | North | South |
312-
313-
Before training, we select timesteps which have at least some
314-
sunlight. We do this by computing the clearsky global horizontal
315-
irradiance (GHI) for the four corners of the satellite imagery,
316-
and for all the timesteps in the dataset. We only use timesteps
317-
where the maximum global horizontal irradiance across all four
318-
corners is above some threshold.
319-
320-
The 'clearsky solar irradiance' is the amount of sunlight we'd
321-
expect on a clear day at a specific time and location. The SI unit
322-
of irradiance is watt per square meter. The 'global horizontal
323-
irradiance' (GHI) is the total sunlight that would hit a
324-
horizontal surface on the surface of the Earth. The GHI is the
325-
sum of the direct irradiance (sunlight which takes a direct path
326-
from the Sun to the Earth's surface) and the diffuse horizontal
327-
irradiance (the sunlight scattered from the atmosphere). For more
328-
info, see: https://en.wikipedia.org/wiki/Solar_irradiance
329-
330-
References:
331-
1. [Video of June 2019](https://www.youtube.com/watch?v=IOp-tj-IJpk)
332-
2. [Video of Jan 2019](https://www.youtube.com/watch?v=CJ4prUVa2nQ)
333-
"""
334-
if self._data is None:
335-
sat_data = self._open_data()
336-
else:
337-
sat_data = self._data
338-
339-
datetime_index = pd.DatetimeIndex(sat_data.time.values)
340-
341-
if remove_night:
342-
border_locations = self.geospatial_border()
343-
datetime_index = nd_time.select_daylight_datetimes(
344-
datetimes=datetime_index, locations=border_locations
345-
)
346-
347-
return datetime_index
348-
349-
350-
def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray:
351-
"""Lazily opens the Zarr store.
352-
353-
Adds 1 minute to the 'time' coordinates, so the timestamps
354-
are at 00, 05, ..., 55 past the hour.
355-
356-
Args:
357-
zarr_path: Cloud URL or local path. If GCP URL, must start with 'gs://'
358-
consolidated: Whether or not the Zarr metadata is consolidated.
359-
"""
360-
_LOG.debug("Opening satellite data: %s", zarr_path)
361-
362-
# We load using chunks=None so xarray *doesn't* use Dask to
363-
# load the Zarr chunks from disk. Using Dask to load the data
364-
# seems to slow things down a lot if the Zarr store has more than
365-
# about a million chunks.
366-
# See https://github.com/openclimatefix/nowcasting_dataset/issues/23
367-
dataset = xr.open_dataset(zarr_path, engine="zarr", mode="r", chunks=None)
368-
369-
data_array = dataset["stacked_eumetsat_data"]
370-
del dataset
371-
372-
# The 'time' dimension is at 04, 09, ..., 59 minutes past the hour.
373-
# To make it easier to align the satellite data with other data sources
374-
# (which are at 00, 05, ..., 55 minutes past the hour) we add 1 minute to
375-
# the time dimension.
376-
# TODO Remove this as new Zarr already has the time fixed
377-
data_array["time"] = data_array.time + pd.Timedelta("1 minute")
378-
return data_array
379-
380269

381270
def crop_center(img, cropx, cropy):
382271
"""

nowcasting_dataset/data_sources/satellite/satellite_data_source.py

+1
Original file line numberDiff line numberDiff line change
@@ -145,5 +145,6 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray:
145145
# (which are at 00, 05, ..., 55 minutes past the hour) we add 1 minute to
146146
# the time dimension.
147147
# TODO Remove this as new Zarr already has the time fixed
148+
# See https://github.com/openclimatefix/nowcasting_dataset/issues/313
148149
data_array["time"] = data_array.time + pd.Timedelta("1 minute")
149150
return data_array

0 commit comments

Comments
 (0)