Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Issue/233 data validation #258

Merged
merged 9 commits into from
Oct 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions nowcasting_dataset/data_sources/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ def create_gsp_pv_dataset(
data["x_coords"] = x_coords
data["y_coords"] = y_coords

data.__setitem__("data", data.data.clip(min=0))

return data


Expand Down Expand Up @@ -275,6 +277,9 @@ def create_sun_dataset(
sun = data.rename({"data": "elevation"})
sun["azimuth"] = data.data

sun.__setitem__("azimuth", sun.azimuth.clip(min=0, max=360))
sun.__setitem__("elevation", sun.elevation.clip(min=-90, max=90))

return sun


Expand Down
4 changes: 2 additions & 2 deletions nowcasting_dataset/data_sources/gsp/gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,9 @@ def get_example(
gsp["x_coords"] = gsp_x_coords
gsp["y_coords"] = gsp_y_coords

# pad out so that there are always 32 gsp
# pad out so that there are always 32 gsp, fill with 0
pad_n = self.n_gsp_per_example - len(gsp.id_index)
gsp = gsp.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)))
gsp = gsp.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)), constant_values=0)

gsp.__setitem__("id_index", range(self.n_gsp_per_example))

Expand Down
10 changes: 9 additions & 1 deletion nowcasting_dataset/data_sources/gsp/gsp_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Model for output of GSP data """
import logging
from xarray.ufuncs import isnan, isinf

from nowcasting_dataset.data_sources.datasource_output import (
DataSourceOutput,
Expand All @@ -15,4 +16,11 @@ class GSP(DataSourceOutput):
__slots__ = ()
_expected_dimensions = ("time", "id")

# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
@classmethod
def model_validation(cls, v):
""" Check that all values are non NaNs """
assert (~isnan(v.data)).all(), f"Some gsp data values are NaNs"
assert (~isinf(v.data)).all(), f"Some gsp data values are Infinite"
assert (v.data >= 0).all(), f"Some gsp data values are below 0 {v.data.min()}"

return v
5 changes: 3 additions & 2 deletions nowcasting_dataset/data_sources/nwp/nwp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import logging

import numpy as np
from xarray.ufuncs import isnan, isinf

from nowcasting_dataset.data_sources.datasource_output import (
DataSourceOutput,
Expand All @@ -24,5 +24,6 @@ class NWP(DataSourceOutput):
@classmethod
def model_validation(cls, v):
""" Check that all values are not NaNs """
assert (v.data != np.nan).all(), "Some nwp data values are NaNs"
assert (~isnan(v.data)).all(), "Some nwp data values are NaNs"
assert (~isinf(v.data)).all(), f"Some nwp data values are Infinite"
return v
4 changes: 2 additions & 2 deletions nowcasting_dataset/data_sources/pv/pv_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@ def get_example(
pv["x_coords"] = x_coords
pv["y_coords"] = y_coords

# pad out so that there are always 32 gsp
# pad out so that there are always 32 gsp, pad with zeros
pad_n = self.n_pv_systems_per_example - len(pv.id_index)
pv = pv.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)))
pv = pv.pad(id_index=(0, pad_n), data=((0, 0), (0, pad_n)), constant_values=0)

pv.__setitem__("id_index", range(self.n_pv_systems_per_example))

Expand Down
11 changes: 10 additions & 1 deletion nowcasting_dataset/data_sources/pv/pv_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import numpy as np
from xarray.ufuncs import isnan, isinf
from pydantic import Field, validator

from nowcasting_dataset.consts import (
Expand Down Expand Up @@ -29,4 +30,12 @@ class PV(DataSourceOutput):
__slots__ = ()
_expected_dimensions = ("time", "id")

# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
@classmethod
def model_validation(cls, v):
""" Check that all values are non NaNs """
assert (~isnan(v.data)).all(), f"Some pv data values are NaNs"
assert (~isinf(v.data)).all(), f"Some pv data values are Infinite"

assert (v.data >= 0).all(), f"Some pv data values are below 0"

return v
6 changes: 4 additions & 2 deletions nowcasting_dataset/data_sources/satellite/satellite_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging

import numpy as np
import xarray as xr
from xarray.ufuncs import isnan, isinf
from pydantic import Field

from nowcasting_dataset.consts import Array
Expand All @@ -27,5 +27,7 @@ class Satellite(DataSourceOutput):
@classmethod
def model_validation(cls, v):
""" Check that all values are non negative """
assert (v.data != np.NaN).all(), f"Some satellite data values are NaNs"
assert (~isnan(v.data)).all(), f"Some satellite data values are NaNs"
assert (~isinf(v.data)).all(), f"Some satellite data values are Infinite"
assert (v.data != -1).all(), f"Some satellite data values are -1's"
return v
24 changes: 23 additions & 1 deletion nowcasting_dataset/data_sources/sun/sun_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import numpy as np
from xarray.ufuncs import isnan, isinf
from pydantic import Field, validator

from nowcasting_dataset.consts import Array, SUN_AZIMUTH_ANGLE, SUN_ELEVATION_ANGLE
Expand All @@ -20,4 +21,25 @@ class Sun(DataSourceOutput):
__slots__ = ()
_expected_dimensions = ("time",)

# todo add validation here - https://github.com/openclimatefix/nowcasting_dataset/issues/233
@classmethod
def model_validation(cls, v):
""" Check that all values are non NaNs """
assert (~isnan(v.elevation)).all(), f"Some elevation data values are NaNs"
assert (~isinf(v.elevation)).all(), f"Some elevation data values are Infinite"

assert (~isnan(v.azimuth)).all(), f"Some azimuth data values are NaNs"
assert (~isinf(v.azimuth)).all(), f"Some azimuth data values are Infinite"

assert (0 <= v.azimuth).all(), f"Some azimuth data values are lower 0, {v.azimuth.min()}"
assert (
v.azimuth <= 360
).all(), f"Some azimuth data values are greater than 360, {v.azimuth.max()}"

assert (
-90 <= v.elevation
).all(), f"Some elevation data values are lower -90, {v.elevation.min()}"
assert (
v.elevation <= 90
).all(), f"Some elevation data values are greater than 90, {v.elevation.max()}"

return v
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging

import numpy as np
from xarray.ufuncs import isnan, isinf
from pydantic import Field, validator

from nowcasting_dataset.consts import Array
Expand All @@ -21,5 +22,6 @@ class Topographic(DataSourceOutput):
@classmethod
def model_validation(cls, v):
""" Check that all values are non NaNs """
assert (v.data != np.NaN).all(), f"Some topological data values are NaNs"
assert (~isnan(v.data)).all(), f"Some topological data values are NaNs"
assert (~isinf(v.data)).all(), f"Some topological data values are Infinite"
return v
30 changes: 30 additions & 0 deletions tests/data_sources/gsp/test_gsp_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import tempfile
import pytest
import numpy as np

from nowcasting_dataset.data_sources.fake import gsp_fake
from nowcasting_dataset.data_sources.gsp.gsp_model import GSP


def test_gsp_init():
_ = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)


def test_gsp_validation():
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)

GSP.model_validation(gsp)

gsp.data[0, 0] = np.nan
with pytest.raises(Exception):
GSP.model_validation(gsp)


def test_gsp_save():

with tempfile.TemporaryDirectory() as dirpath:
gsp = gsp_fake(batch_size=4, seq_length_30=5, n_gsp_per_batch=6)
gsp.save_netcdf(path=dirpath, batch_i=0)

assert os.path.exists(f"{dirpath}/gsp/0.nc")
15 changes: 14 additions & 1 deletion tests/data_sources/satellite/test_satellite_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
import os
import tempfile
import pytest
import numpy as np

from nowcasting_dataset.data_sources.fake import satellite_fake
from nowcasting_dataset.data_sources.satellite.satellite_model import Satellite


def test_satellite_init():
_ = satellite_fake
_ = satellite_fake()


def test_satellite_validation():
sat = satellite_fake()

Satellite.model_validation(sat)

sat.data[0, 0] = np.nan
with pytest.raises(Exception):
Satellite.model_validation(sat)


def test_satellite_save():
Expand Down
4 changes: 0 additions & 4 deletions tests/data_sources/sun/test_sun_data_source.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
from nowcasting_dataset.data_sources.sun.sun_data_source import SunDataSource
from datetime import datetime

# from nowcasting_dataset.dataset.example import Example
from nowcasting_dataset.consts import SUN_ELEVATION_ANGLE, SUN_AZIMUTH_ANGLE
import pandas as pd


Expand Down
50 changes: 50 additions & 0 deletions tests/data_sources/sun/test_sun_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import tempfile
import pytest
import numpy as np

from nowcasting_dataset.data_sources.fake import sun_fake
from nowcasting_dataset.data_sources.sun.sun_model import Sun


def test_sun_init():
_ = sun_fake(batch_size=4, seq_length_5=17)


def test_sun_validation():
sun = sun_fake(batch_size=4, seq_length_5=17)

Sun.model_validation(sun)

sun.elevation[0, 0] = np.nan
with pytest.raises(Exception):
Sun.model_validation(sun)


def test_sun_validation_elevation():
sun = sun_fake(batch_size=4, seq_length_5=17)

Sun.model_validation(sun)

sun.elevation[0, 0] = 1000
with pytest.raises(Exception):
Sun.model_validation(sun)


def test_sun_validation_azimuth():
sun = sun_fake(batch_size=4, seq_length_5=17)

Sun.model_validation(sun)

sun.azimuth[0, 0] = 1000
with pytest.raises(Exception):
Sun.model_validation(sun)


def test_sun_save():

with tempfile.TemporaryDirectory() as dirpath:
sun = sun_fake(batch_size=4, seq_length_5=17)
sun.save_netcdf(path=dirpath, batch_i=0)

assert os.path.exists(f"{dirpath}/sun/0.nc")