9
9
import pandas as pd
10
10
import xarray as xr
11
11
12
- import nowcasting_dataset .time as nd_time
13
12
from nowcasting_dataset .consts import SAT_VARIABLE_NAMES
14
- from nowcasting_dataset .data_sources .data_source import ZarrDataSource
15
13
from nowcasting_dataset .data_sources .datasource_output import DataSourceOutput
16
14
from nowcasting_dataset .data_sources .optical_flow .optical_flow_model import OpticalFlow
15
+ from nowcasting_dataset .data_sources .satellite .satellite_data_source import SatelliteDataSource
17
16
18
17
_LOG = logging .getLogger ("nowcasting_dataset" )
19
18
20
19
IMAGE_BUFFER_SIZE = 16
21
20
22
21
23
22
@dataclass
24
- class OpticalFlowDataSource (ZarrDataSource ):
23
+ class OpticalFlowDataSource (SatelliteDataSource ):
25
24
"""
26
25
Optical Flow Data Source, computing flow between Satellite data
27
26
@@ -45,21 +44,6 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
45
44
n_channels ,
46
45
)
47
46
48
- def open (self ) -> None :
49
- """
50
- Open Satellite data
51
-
52
- We don't want to open_sat_data in __init__.
53
- If we did that, then we couldn't copy SatelliteDataSource
54
- instances into separate processes. Instead,
55
- call open() _after_ creating separate processes.
56
- """
57
- self ._data = self ._open_data ()
58
- self ._data = self ._data .sel (variable = list (self .channels ))
59
-
60
- def _open_data (self ) -> xr .DataArray :
61
- return open_sat_data (zarr_path = self .zarr_path , consolidated = self .consolidated )
62
-
63
47
def get_example (
64
48
self , t0_dt : pd .Timestamp , x_meters_center : Number , y_meters_center : Number
65
49
) -> DataSourceOutput :
@@ -282,101 +266,6 @@ def _remap_image(self, image: np.ndarray, flow: np.ndarray) -> np.ndarray:
282
266
def _dataset_to_data_source_output (output : xr .Dataset ) -> OpticalFlow :
283
267
return OpticalFlow (output )
284
268
285
- def _get_time_slice (self , t0_dt : pd .Timestamp ) -> xr .DataArray :
286
- start_dt = self ._get_start_dt (t0_dt )
287
- end_dt = self ._get_end_dt (t0_dt )
288
- data = self .data .sel (time = slice (start_dt , end_dt ))
289
- return data
290
-
291
- def datetime_index (self , remove_night : bool = True ) -> pd .DatetimeIndex :
292
- """Returns a complete list of all available datetimes
293
-
294
- Args:
295
- remove_night: If True then remove datetimes at night.
296
- We're interested in forecasting solar power generation, so we
297
- don't care about nighttime data :)
298
-
299
- In the UK in summer, the sun rises first in the north east, and
300
- sets last in the north west [1]. In summer, the north gets more
301
- hours of sunshine per day.
302
-
303
- In the UK in winter, the sun rises first in the south east, and
304
- sets last in the south west [2]. In winter, the south gets more
305
- hours of sunshine per day.
306
-
307
- | | Summer | Winter |
308
- | ---: | :---: | :---: |
309
- | Sun rises first in | N.E. | S.E. |
310
- | Sun sets last in | N.W. | S.W. |
311
- | Most hours of sunlight | North | South |
312
-
313
- Before training, we select timesteps which have at least some
314
- sunlight. We do this by computing the clearsky global horizontal
315
- irradiance (GHI) for the four corners of the satellite imagery,
316
- and for all the timesteps in the dataset. We only use timesteps
317
- where the maximum global horizontal irradiance across all four
318
- corners is above some threshold.
319
-
320
- The 'clearsky solar irradiance' is the amount of sunlight we'd
321
- expect on a clear day at a specific time and location. The SI unit
322
- of irradiance is watt per square meter. The 'global horizontal
323
- irradiance' (GHI) is the total sunlight that would hit a
324
- horizontal surface on the surface of the Earth. The GHI is the
325
- sum of the direct irradiance (sunlight which takes a direct path
326
- from the Sun to the Earth's surface) and the diffuse horizontal
327
- irradiance (the sunlight scattered from the atmosphere). For more
328
- info, see: https://en.wikipedia.org/wiki/Solar_irradiance
329
-
330
- References:
331
- 1. [Video of June 2019](https://www.youtube.com/watch?v=IOp-tj-IJpk)
332
- 2. [Video of Jan 2019](https://www.youtube.com/watch?v=CJ4prUVa2nQ)
333
- """
334
- if self ._data is None :
335
- sat_data = self ._open_data ()
336
- else :
337
- sat_data = self ._data
338
-
339
- datetime_index = pd .DatetimeIndex (sat_data .time .values )
340
-
341
- if remove_night :
342
- border_locations = self .geospatial_border ()
343
- datetime_index = nd_time .select_daylight_datetimes (
344
- datetimes = datetime_index , locations = border_locations
345
- )
346
-
347
- return datetime_index
348
-
349
-
350
- def open_sat_data (zarr_path : str , consolidated : bool ) -> xr .DataArray :
351
- """Lazily opens the Zarr store.
352
-
353
- Adds 1 minute to the 'time' coordinates, so the timestamps
354
- are at 00, 05, ..., 55 past the hour.
355
-
356
- Args:
357
- zarr_path: Cloud URL or local path. If GCP URL, must start with 'gs://'
358
- consolidated: Whether or not the Zarr metadata is consolidated.
359
- """
360
- _LOG .debug ("Opening satellite data: %s" , zarr_path )
361
-
362
- # We load using chunks=None so xarray *doesn't* use Dask to
363
- # load the Zarr chunks from disk. Using Dask to load the data
364
- # seems to slow things down a lot if the Zarr store has more than
365
- # about a million chunks.
366
- # See https://github.com/openclimatefix/nowcasting_dataset/issues/23
367
- dataset = xr .open_dataset (zarr_path , engine = "zarr" , mode = "r" , chunks = None )
368
-
369
- data_array = dataset ["stacked_eumetsat_data" ]
370
- del dataset
371
-
372
- # The 'time' dimension is at 04, 09, ..., 59 minutes past the hour.
373
- # To make it easier to align the satellite data with other data sources
374
- # (which are at 00, 05, ..., 55 minutes past the hour) we add 1 minute to
375
- # the time dimension.
376
- # TODO Remove this as new Zarr already has the time fixed
377
- data_array ["time" ] = data_array .time + pd .Timedelta ("1 minute" )
378
- return data_array
379
-
380
269
381
270
def crop_center (img , cropx , cropy ):
382
271
"""
0 commit comments