-
-
Notifications
You must be signed in to change notification settings - Fork 6
Issue/233 data validation #258
Changes from 7 commits
54ae844
52ea668
20311b9
b963934
db36595
931da61
40e9960
581c380
9c9ad04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -24,5 +24,5 @@ class NWP(DataSourceOutput): | |||||||||
@classmethod | ||||||||||
def model_validation(cls, v): | ||||||||||
""" Check that all values are not NaNs """ | ||||||||||
assert (v.data != np.nan).all(), "Some nwp data values are NaNs" | ||||||||||
assert (~np.isnan(v.data)).all(), "Some nwp data values are NaNs" | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about checking for positive and negative infinity? with
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I agree about using
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm gona go with both isnan and isinf. I this leads to slightly more verbose error messages For example |
||||||||||
return v |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -29,4 +29,12 @@ class PV(DataSourceOutput): | |||||||
__slots__ = () | ||||||||
_expected_dimensions = ("time", "id") | ||||||||
|
||||||||
# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233 | ||||||||
@classmethod | ||||||||
def model_validation(cls, v): | ||||||||
""" Check that all values are non NaNs """ | ||||||||
assert (~np.isnan(v.data)).all(), f"Some pv data values are NaNs" | ||||||||
assert (v.data != np.Inf).all(), f"Some pv data values are Infinite" | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, possibly combining the two might be a bit cleaner
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above, think I will keep them separate |
||||||||
|
||||||||
assert (v.data >= 0).all(), f"Some pv data values are below 0" | ||||||||
|
||||||||
return v |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -27,5 +27,6 @@ class Satellite(DataSourceOutput): | |||||||||
@classmethod | ||||||||||
def model_validation(cls, v): | ||||||||||
""" Check that all values are non negative """ | ||||||||||
assert (v.data != np.NaN).all(), f"Some satellite data values are NaNs" | ||||||||||
assert (~np.isnan(v.data)).all(), f"Some satellite data values are NaNs" | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as for NWP
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above 🙂
Suggested change
|
||||||||||
assert (v.data != -1).all(), f"Some satellite data values are -1's" | ||||||||||
return v |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -20,4 +20,25 @@ class Sun(DataSourceOutput): | |||||||||||
__slots__ = () | ||||||||||||
_expected_dimensions = ("time",) | ||||||||||||
|
||||||||||||
# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233 | ||||||||||||
@classmethod | ||||||||||||
def model_validation(cls, v): | ||||||||||||
""" Check that all values are non NaNs """ | ||||||||||||
assert (~np.isnan(v.elevation)).all(), f"Some elevation data values are NaNs" | ||||||||||||
assert (v.elevation != np.Inf).all(), f"Some elevation data values are Infinite" | ||||||||||||
|
||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly combine these two, or if we do want to keep them separate, changing it to
Suggested change
as I believe np.Inf only counts as positive infinity, and wouldn't check for negative infinity There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above, think I will keep them separate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cool, good idea! |
||||||||||||
assert (~np.isnan(v.azimuth)).all(), f"Some azimuth data values are NaNs" | ||||||||||||
assert (v.azimuth != np.Inf).all(), f"Some azimuth data values are Infinite" | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||||||||||||
|
||||||||||||
assert (0 <= v.azimuth).all(), f"Some azimuth data values are lower 0, {v.azimuth.min()}" | ||||||||||||
assert ( | ||||||||||||
v.azimuth <= 360 | ||||||||||||
).all(), f"Some azimuth data values are greater than 360, {v.azimuth.max()}" | ||||||||||||
|
||||||||||||
assert ( | ||||||||||||
-90 <= v.elevation | ||||||||||||
).all(), f"Some elevation data values are lower -90, {v.elevation.min()}" | ||||||||||||
assert ( | ||||||||||||
v.elevation <= 90 | ||||||||||||
).all(), f"Some elevation data values are greater than 90, {v.elevation.max()}" | ||||||||||||
|
||||||||||||
return v |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -21,5 +21,6 @@ class Topographic(DataSourceOutput): | |||||
@classmethod | ||||||
def model_validation(cls, v): | ||||||
""" Check that all values are non NaNs """ | ||||||
assert (v.data != np.NaN).all(), f"Some topological data values are NaNs" | ||||||
assert (~np.isnan(v.data)).all(), f"Some topological data values are NaNs" | ||||||
assert (v.data != np.Inf).all(), f"Some topological data values are Infinite" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly just combining the two asserts might be a bit simpler?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above, think I will keep them separate |
||||||
return v |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
import tempfile | ||
import pytest | ||
import numpy as np | ||
|
||
from nowcasting_dataset.data_sources.fake import gsp_fake | ||
from nowcasting_dataset.data_sources.gsp.gsp_model import GSP | ||
|
||
|
||
def test_gsp_init(): | ||
_ = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6) | ||
|
||
|
||
def test_gsp_validation(): | ||
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6) | ||
|
||
GSP.model_validation(gsp) | ||
|
||
gsp.data[0, 0] = np.nan | ||
with pytest.raises(Exception): | ||
GSP.model_validation(gsp) | ||
|
||
|
||
def test_gsp_save(): | ||
|
||
with tempfile.TemporaryDirectory() as dirpath: | ||
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6) | ||
gsp.save_netcdf(path=dirpath, batch_i=0) | ||
|
||
assert os.path.exists(f"{dirpath}/gsp/0.nc") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import tempfile | ||
import pytest | ||
import numpy as np | ||
|
||
from nowcasting_dataset.data_sources.fake import sun_fake | ||
from nowcasting_dataset.data_sources.sun.sun_model import Sun | ||
|
||
|
||
def test_sun_init(): | ||
_ = sun_fake(batch_size=4, seq_length_5=17) | ||
|
||
|
||
def test_sun_validation(): | ||
sun = sun_fake(batch_size=4, seq_length_5=17) | ||
|
||
Sun.model_validation(sun) | ||
|
||
sun.elevation[0, 0] = np.nan | ||
with pytest.raises(Exception): | ||
Sun.model_validation(sun) | ||
|
||
|
||
def test_sun_validation_elevation(): | ||
sun = sun_fake(batch_size=4, seq_length_5=17) | ||
|
||
Sun.model_validation(sun) | ||
|
||
sun.elevation[0, 0] = 1000 | ||
with pytest.raises(Exception): | ||
Sun.model_validation(sun) | ||
|
||
|
||
def test_sun_validation_azimuth(): | ||
sun = sun_fake(batch_size=4, seq_length_5=17) | ||
|
||
Sun.model_validation(sun) | ||
|
||
sun.azimuth[0, 0] = 1000 | ||
with pytest.raises(Exception): | ||
Sun.model_validation(sun) | ||
|
||
|
||
def test_sun_save(): | ||
|
||
with tempfile.TemporaryDirectory() as dirpath: | ||
sun = sun_fake(batch_size=4, seq_length_5=17) | ||
sun.save_netcdf(path=dirpath, batch_i=0) | ||
|
||
assert os.path.exists(f"{dirpath}/sun/0.nc") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Possibly combine the two asserts with
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment above, think I will keep them separate