Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit d64f242

Browse files
Merge pull request #258 from openclimatefix/issue/233-data-validation
Issue/233 data validation
2 parents 8d31624 + 9c9ad04 commit d64f242

File tree

13 files changed

+155
-17
lines changed

13 files changed

+155
-17
lines changed

nowcasting_dataset/data_sources/fake.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ def create_gsp_pv_dataset(
241241
data["x_coords"] = x_coords
242242
data["y_coords"] = y_coords
243243

244+
data.__setitem__("data", data.data.clip(min=0))
245+
244246
return data
245247

246248

@@ -275,6 +277,9 @@ def create_sun_dataset(
275277
sun = data.rename({"data": "elevation"})
276278
sun["azimuth"] = data.data
277279

280+
sun.__setitem__("azimuth", sun.azimuth.clip(min=0, max=360))
281+
sun.__setitem__("elevation", sun.elevation.clip(min=-90, max=90))
282+
278283
return sun
279284

280285

nowcasting_dataset/data_sources/gsp/gsp_data_source.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,9 @@ def get_example(
234234
gsp["x_coords"] = gsp_x_coords
235235
gsp["y_coords"] = gsp_y_coords
236236

237-
# pad out so that there are always 32 gsp
237+
# pad out so that there are always 32 gsp, fill with 0
238238
pad_n = self.n_gsp_per_example - len(gsp.id_index)
239-
gsp = gsp.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)))
239+
gsp = gsp.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)), constant_values=0)
240240

241241
gsp.__setitem__("id_index", range(self.n_gsp_per_example))
242242

nowcasting_dataset/data_sources/gsp/gsp_model.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" Model for output of GSP data """
22
import logging
3+
from xarray.ufuncs import isnan, isinf
34

45
from nowcasting_dataset.data_sources.datasource_output import (
56
DataSourceOutput,
@@ -15,4 +16,11 @@ class GSP(DataSourceOutput):
1516
__slots__ = ()
1617
_expected_dimensions = ("time", "id")
1718

18-
# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
19+
@classmethod
20+
def model_validation(cls, v):
21+
""" Check that all values are non NaNs """
22+
assert (~isnan(v.data)).all(), f"Some gsp data values are NaNs"
23+
assert (~isinf(v.data)).all(), f"Some gsp data values are Infinite"
24+
assert (v.data >= 0).all(), f"Some gsp data values are below 0 {v.data.min()}"
25+
26+
return v

nowcasting_dataset/data_sources/nwp/nwp_model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import logging
55

6-
import numpy as np
6+
from xarray.ufuncs import isnan, isinf
77

88
from nowcasting_dataset.data_sources.datasource_output import (
99
DataSourceOutput,
@@ -24,5 +24,6 @@ class NWP(DataSourceOutput):
2424
@classmethod
2525
def model_validation(cls, v):
2626
""" Check that all values are not NaNs """
27-
assert (v.data != np.nan).all(), "Some nwp data values are NaNs"
27+
assert (~isnan(v.data)).all(), "Some nwp data values are NaNs"
28+
assert (~isinf(v.data)).all(), f"Some nwp data values are Infinite"
2829
return v

nowcasting_dataset/data_sources/pv/pv_data_source.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,9 +268,9 @@ def get_example(
268268
pv["x_coords"] = x_coords
269269
pv["y_coords"] = y_coords
270270

271-
# pad out so that there are always 32 gsp
271+
# pad out so that there are always 32 gsp, pad with zeros
272272
pad_n = self.n_pv_systems_per_example - len(pv.id_index)
273-
pv = pv.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)))
273+
pv = pv.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)), constant_values=0)
274274

275275
pv.__setitem__("id_index", range(self.n_pv_systems_per_example))
276276

nowcasting_dataset/data_sources/pv/pv_model.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33

44
import numpy as np
5+
from xarray.ufuncs import isnan, isinf
56
from pydantic import Field, validator
67

78
from nowcasting_dataset.consts import (
@@ -29,4 +30,12 @@ class PV(DataSourceOutput):
2930
__slots__ = ()
3031
_expected_dimensions = ("time", "id")
3132

32-
# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
33+
@classmethod
34+
def model_validation(cls, v):
35+
""" Check that all values are non NaNs """
36+
assert (~isnan(v.data)).all(), f"Some pv data values are NaNs"
37+
assert (~isinf(v.data)).all(), f"Some pv data values are Infinite"
38+
39+
assert (v.data >= 0).all(), f"Some pv data values are below 0"
40+
41+
return v

nowcasting_dataset/data_sources/satellite/satellite_model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55

66
import numpy as np
7-
import xarray as xr
7+
from xarray.ufuncs import isnan, isinf
88
from pydantic import Field
99

1010
from nowcasting_dataset.consts import Array
@@ -27,5 +27,7 @@ class Satellite(DataSourceOutput):
2727
@classmethod
2828
def model_validation(cls, v):
2929
""" Check that all values are non negative """
30-
assert (v.data != np.NaN).all(), f"Some satellite data values are NaNs"
30+
assert (~isnan(v.data)).all(), f"Some satellite data values are NaNs"
31+
assert (~isinf(v.data)).all(), f"Some satellite data values are Infinite"
32+
assert (v.data != -1).all(), f"Some satellite data values are -1's"
3133
return v

nowcasting_dataset/data_sources/sun/sun_model.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33

44
import numpy as np
5+
from xarray.ufuncs import isnan, isinf
56
from pydantic import Field, validator
67

78
from nowcasting_dataset.consts import Array, SUN_AZIMUTH_ANGLE, SUN_ELEVATION_ANGLE
@@ -20,4 +21,25 @@ class Sun(DataSourceOutput):
2021
__slots__ = ()
2122
_expected_dimensions = ("time",)
2223

23-
# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
24+
@classmethod
25+
def model_validation(cls, v):
26+
""" Check that all values are non NaNs """
27+
assert (~isnan(v.elevation)).all(), f"Some elevation data values are NaNs"
28+
assert (~isinf(v.elevation)).all(), f"Some elevation data values are Infinite"
29+
30+
assert (~isnan(v.azimuth)).all(), f"Some azimuth data values are NaNs"
31+
assert (~isinf(v.azimuth)).all(), f"Some azimuth data values are Infinite"
32+
33+
assert (0 <= v.azimuth).all(), f"Some azimuth data values are lower 0, {v.azimuth.min()}"
34+
assert (
35+
v.azimuth <= 360
36+
).all(), f"Some azimuth data values are greater than 360, {v.azimuth.max()}"
37+
38+
assert (
39+
-90 <= v.elevation
40+
).all(), f"Some elevation data values are lower -90, {v.elevation.min()}"
41+
assert (
42+
v.elevation <= 90
43+
).all(), f"Some elevation data values are greater than 90, {v.elevation.max()}"
44+
45+
return v

nowcasting_dataset/data_sources/topographic/topographic_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import logging
33

44
import numpy as np
5+
from xarray.ufuncs import isnan, isinf
56
from pydantic import Field, validator
67

78
from nowcasting_dataset.consts import Array
@@ -21,5 +22,6 @@ class Topographic(DataSourceOutput):
2122
@classmethod
2223
def model_validation(cls, v):
2324
""" Check that all values are non NaNs """
24-
assert (v.data != np.NaN).all(), f"Some topological data values are NaNs"
25+
assert (~isnan(v.data)).all(), f"Some topological data values are NaNs"
26+
assert (~isinf(v.data)).all(), f"Some topological data values are Infinite"
2527
return v
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import os
2+
import tempfile
3+
import pytest
4+
import numpy as np
5+
6+
from nowcasting_dataset.data_sources.fake import gsp_fake
7+
from nowcasting_dataset.data_sources.gsp.gsp_model import GSP
8+
9+
10+
def test_gsp_init():
11+
_ = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)
12+
13+
14+
def test_gsp_validation():
15+
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)
16+
17+
GSP.model_validation(gsp)
18+
19+
gsp.data[0, 0] = np.nan
20+
with pytest.raises(Exception):
21+
GSP.model_validation(gsp)
22+
23+
24+
def test_gsp_save():
25+
26+
with tempfile.TemporaryDirectory() as dirpath:
27+
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)
28+
gsp.save_netcdf(path=dirpath, batch_i=0)
29+
30+
assert os.path.exists(f"{dirpath}/gsp/0.nc")

tests/data_sources/satellite/test_satellite_model.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,24 @@
11
import os
22
import tempfile
3+
import pytest
4+
import numpy as np
35

46
from nowcasting_dataset.data_sources.fake import satellite_fake
7+
from nowcasting_dataset.data_sources.satellite.satellite_model import Satellite
58

69

710
def test_satellite_init():
8-
_ = satellite_fake
11+
_ = satellite_fake()
12+
13+
14+
def test_satellite_validation():
15+
sat = satellite_fake()
16+
17+
Satellite.model_validation(sat)
18+
19+
sat.data[0, 0] = np.nan
20+
with pytest.raises(Exception):
21+
Satellite.model_validation(sat)
922

1023

1124
def test_satellite_save():

tests/data_sources/sun/test_sun_data_source.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
from nowcasting_dataset.data_sources.sun.sun_data_source import SunDataSource
2-
from datetime import datetime
3-
4-
# from nowcasting_dataset.dataset.example import Example
5-
from nowcasting_dataset.consts import SUN_ELEVATION_ANGLE, SUN_AZIMUTH_ANGLE
62
import pandas as pd
73

84

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import os
2+
import tempfile
3+
import pytest
4+
import numpy as np
5+
6+
from nowcasting_dataset.data_sources.fake import sun_fake
7+
from nowcasting_dataset.data_sources.sun.sun_model import Sun
8+
9+
10+
def test_sun_init():
11+
_ = sun_fake(batch_size=4, seq_length_5=17)
12+
13+
14+
def test_sun_validation():
15+
sun = sun_fake(batch_size=4, seq_length_5=17)
16+
17+
Sun.model_validation(sun)
18+
19+
sun.elevation[0, 0] = np.nan
20+
with pytest.raises(Exception):
21+
Sun.model_validation(sun)
22+
23+
24+
def test_sun_validation_elevation():
25+
sun = sun_fake(batch_size=4, seq_length_5=17)
26+
27+
Sun.model_validation(sun)
28+
29+
sun.elevation[0, 0] = 1000
30+
with pytest.raises(Exception):
31+
Sun.model_validation(sun)
32+
33+
34+
def test_sun_validation_azimuth():
35+
sun = sun_fake(batch_size=4, seq_length_5=17)
36+
37+
Sun.model_validation(sun)
38+
39+
sun.azimuth[0, 0] = 1000
40+
with pytest.raises(Exception):
41+
Sun.model_validation(sun)
42+
43+
44+
def test_sun_save():
45+
46+
with tempfile.TemporaryDirectory() as dirpath:
47+
sun = sun_fake(batch_size=4, seq_length_5=17)
48+
sun.save_netcdf(path=dirpath, batch_i=0)
49+
50+
assert os.path.exists(f"{dirpath}/sun/0.nc")

0 commit comments

Comments
 (0)