diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index 514aeac2f5..191001f2fb 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -20,8 +20,6 @@ of sources and file formats relevant to solar energy modeling. iotools.read_surfrad iotools.read_midc iotools.read_midc_raw_data_from_nrel - iotools.read_ecmwf_macc - iotools.get_ecmwf_macc iotools.read_crn iotools.read_solrad iotools.get_psm3 diff --git a/docs/sphinx/source/user_guide/clearsky.rst b/docs/sphinx/source/user_guide/clearsky.rst index 6e51981e95..573580b869 100644 --- a/docs/sphinx/source/user_guide/clearsky.rst +++ b/docs/sphinx/source/user_guide/clearsky.rst @@ -376,7 +376,8 @@ derived from surface relative humidity using functions such as :py:func:`pvlib.atmosphere.gueymard94_pw`. Numerous gridded products from satellites, weather models, and climate models contain one or both of aerosols and precipitable water. Consider data -from the `ECMWF `_ +from the `ECMWF ERA5 `_, +`NASA MERRA-2 `_, and `SoDa `_. Aerosol optical depth (AOD) is a function of wavelength, and the Simplified diff --git a/docs/sphinx/source/whatsnew/v0.9.6.rst b/docs/sphinx/source/whatsnew/v0.9.6.rst index 7d1271086f..579622fafd 100644 --- a/docs/sphinx/source/whatsnew/v0.9.6.rst +++ b/docs/sphinx/source/whatsnew/v0.9.6.rst @@ -7,6 +7,11 @@ v0.9.6 (Anticipated June 2023) Deprecations ~~~~~~~~~~~~ +* Removed the ``get_ecmwf_macc`` and ``read_ecmwf_macc`` iotools functions as the + MACC dataset has been `removed by ECMWF `_ + (data period 2003-2012). Instead, ECMWF recommends to use CAMS global + reanalysis (EAC4) from the Atmosphere Data Store (ADS). See also :py:func:`pvlib.iotools.get_cams`. + (:issue:`1691`, :pull:`1654`) Enhancements diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index b02ce243ae..7653934535 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -5,8 +5,6 @@ from pvlib.iotools.surfrad import read_surfrad # noqa: F401 from pvlib.iotools.midc import read_midc # noqa: F401 from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401 -from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401 -from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401 from pvlib.iotools.crn import read_crn # noqa: F401 from pvlib.iotools.solrad import read_solrad # noqa: F401 from pvlib.iotools.psm3 import get_psm3 # noqa: F401 diff --git a/pvlib/iotools/ecmwf_macc.py b/pvlib/iotools/ecmwf_macc.py deleted file mode 100644 index 96902b2a9b..0000000000 --- a/pvlib/iotools/ecmwf_macc.py +++ /dev/null @@ -1,312 +0,0 @@ -""" -Read data from ECMWF MACC Reanalysis. -""" - -import threading -import pandas as pd - -try: - import netCDF4 -except ImportError: - class netCDF4: - @staticmethod - def Dataset(*a, **kw): - raise ImportError( - 'Reading ECMWF data requires netCDF4 to be installed.') - -try: - from ecmwfapi import ECMWFDataServer -except ImportError: - def ECMWFDataServer(*a, **kw): - raise ImportError( - 'To download data from ECMWF requires the API client.\nSee https:/' - '/confluence.ecmwf.int/display/WEBAPI/Access+ECMWF+Public+Datasets' - ) - -#: map of ECMWF MACC parameter keynames and codes used in API -PARAMS = { - "tcwv": "137.128", - "aod550": "207.210", - 'aod469': '213.210', - 'aod670': '214.210', - 'aod865': '215.210', - "aod1240": "216.210", -} - - -def _ecmwf(server, startdate, enddate, params, targetname): - # see http://apps.ecmwf.int/datasets/data/macc-reanalysis/levtype=sfc/ - server.retrieve({ - "class": "mc", - "dataset": "macc", - "date": "%s/to/%s" % (startdate, enddate), - "expver": "rean", - "grid": "0.75/0.75", - "levtype": "sfc", - "param": params, - "step": "3/6/9/12/15/18/21/24", - "stream": "oper", - "format": "netcdf", - "time": "00:00:00", - "type": "fc", - "target": targetname, - }) - - -def get_ecmwf_macc(filename, params, start, end, lookup_params=True, - server=None, target=_ecmwf): - """ - Download data from ECMWF MACC Reanalysis API. - - Parameters - ---------- - filename : str - full path of file where to save data, ``.nc`` appended if not given - params : str or sequence of str - keynames of parameter[s] to download - start : datetime.datetime or datetime.date - UTC date - end : datetime.datetime or datetime.date - UTC date - lookup_params : bool, default True - optional flag, if ``False``, then codes are already formatted - server : ecmwfapi.api.ECMWFDataServer - optionally provide a server object, default is ``None`` - target : callable - optional function that calls ``server.retrieve`` to pass to thread - - Returns - ------- - t : thread - a thread object, use it to check status by calling `t.is_alive()` - - Notes - ----- - To download data from ECMWF requires the API client and a registration - key. Please read the documentation in `Access ECMWF Public Datasets - `_. - Follow the instructions in step 4 and save the ECMWF registration key - as `$HOME/.ecmwfapirc` or set `ECMWF_API_KEY` as the path to the key. - - This function returns a daemon thread that runs in the background. Exiting - Python will kill this thread, however this thread will not block the main - thread or other threads. This thread will terminate when the file is - downloaded or if the thread raises an unhandled exception. You may submit - multiple requests simultaneously to break up large downloads. You can also - check the status and retrieve downloads online at - http://apps.ecmwf.int/webmars/joblist/. This is useful if you kill the - thread. Downloads expire after 24 hours. - - .. warning:: Your request may be queued online for an hour or more before - it begins to download - - Precipitable water :math:`P_{wat}` is equivalent to the total column of - water vapor (TCWV), but the units given by ECMWF MACC Reanalysis are kg/m^2 - at STP (1-atm, 25-C). Divide by ten to convert to centimeters of - precipitable water: - - .. math:: - P_{wat} \\left( \\text{cm} \\right) \ - = TCWV \\left( \\frac{\\text{kg}}{\\text{m}^2} \\right) \ - \\frac{100 \\frac{\\text{cm}}{\\text{m}}} \ - {1000 \\frac{\\text{kg}}{\\text{m}^3}} - - The keynames available for the ``params`` argument are given by - :const:`pvlib.iotools.ecmwf_macc.PARAMS` which maps the keys to codes used - in the API. The following keynames are available: - - ======= ========================================= - keyname description - ======= ========================================= - tcwv total column water vapor in kg/m^2 at STP - aod550 aerosol optical depth measured at 550-nm - aod469 aerosol optical depth measured at 469-nm - aod670 aerosol optical depth measured at 670-nm - aod865 aerosol optical depth measured at 865-nm - aod1240 aerosol optical depth measured at 1240-nm - ======= ========================================= - - If ``lookup_params`` is ``False`` then ``params`` must contain the codes - preformatted according to the ECMWF MACC Reanalysis API. This is useful if - you want to retrieve codes that are not mapped in - :const:`pvlib.iotools.ecmwf_macc.PARAMS`. - - Specify a custom ``target`` function to modify how the ECMWF API function - ``server.retrieve`` is called. The ``target`` function must have the - following signature in which the parameter definitions are similar to - :func:`pvlib.iotools.get_ecmwf_macc`. :: - - - target(server, startdate, enddate, params, filename) -> None - - Examples - -------- - Retrieve the AOD measured at 550-nm and the total column of water vapor for - November 1, 2012. - - >>> from datetime import date - >>> from pvlib.iotools import get_ecmwf_macc - >>> filename = 'aod_tcwv_20121101.nc' # .nc extension added if missing - >>> params = ('aod550', 'tcwv') - >>> start = end = date(2012, 11, 1) - >>> t = get_ecmwf_macc(filename, params, start, end) - >>> t.is_alive() - True - - """ - if not filename.endswith('nc'): - filename += '.nc' - if lookup_params: - try: - params = '/'.join(PARAMS.get(p) for p in params) - except TypeError: - params = PARAMS.get(params) - startdate = start.strftime('%Y-%m-%d') - enddate = end.strftime('%Y-%m-%d') - if not server: - server = ECMWFDataServer() - t = threading.Thread(target=target, daemon=True, - args=(server, startdate, enddate, params, filename)) - t.start() - return t - - -class ECMWF_MACC(object): - """container for ECMWF MACC reanalysis data""" - - TCWV = 'tcwv' # total column water vapor in kg/m^2 at (1-atm,25-degC) - - def __init__(self, filename): - self.data = netCDF4.Dataset(filename) - # data variables and dimensions - variables = set(self.data.variables.keys()) - dimensions = set(self.data.dimensions.keys()) - self.keys = tuple(variables - dimensions) - # size of lat/lon dimensions - self.lat_size = self.data.dimensions['latitude'].size - self.lon_size = self.data.dimensions['longitude'].size - # spatial resolution in degrees - self.delta_lat = -180.0 / (self.lat_size - 1) # from north to south - self.delta_lon = 360.0 / self.lon_size # from west to east - # time resolution in hours - self.time_size = self.data.dimensions['time'].size - self.start_time = self.data['time'][0] - self.end_time = self.data['time'][-1] - self.time_range = self.end_time - self.start_time - self.delta_time = self.time_range / (self.time_size - 1) - - def get_nearest_indices(self, latitude, longitude): - """ - Get nearest indices to (latitude, longitude). - - Parmaeters - ---------- - latitude : float - Latitude in degrees - longitude : float - Longitude in degrees - - Returns - ------- - idx_lat : int - index of nearest latitude - idx_lon : int - index of nearest longitude - """ - # index of nearest latitude - idx_lat = int(round((latitude - 90.0) / self.delta_lat)) - # avoid out of bounds latitudes - if idx_lat < 0: - idx_lat = 0 # if latitude == 90, north pole - elif idx_lat > self.lat_size: - idx_lat = self.lat_size # if latitude == -90, south pole - # adjust longitude from -180/180 to 0/360 - longitude = longitude % 360.0 - # index of nearest longitude - idx_lon = int(round(longitude / self.delta_lon)) % self.lon_size - return idx_lat, idx_lon - - def interp_data(self, latitude, longitude, utc_time, param): - """ - Interpolate ``param`` values to ``utc_time`` using indices nearest to - (``latitude, longitude``). - - Parmaeters - ---------- - latitude : float - Latitude in degrees - longitude : float - Longitude in degrees - utc_time : datetime.datetime or datetime.date - Naive or UTC date or datetime to interpolate - param : str - Name of the parameter to interpolate from the data - - Returns - ------- - Interpolated ``param`` value at (``utc_time, latitude, longitude``) - - Examples - -------- - Use this to get a single value of a parameter in the data at a specific - time and set of (latitude, longitude) coordinates. - - >>> from datetime import datetime - >>> from pvlib.iotools import ecmwf_macc - >>> data = ecmwf_macc.ECMWF_MACC('aod_tcwv_20121101.nc') - >>> dt = datetime(2012, 11, 1, 11, 33, 1) - >>> data.interp_data(38.2, -122.1, dt, 'aod550') - """ - nctime = self.data['time'] # time - ilat, ilon = self.get_nearest_indices(latitude, longitude) - # time index before - before = netCDF4.date2index(utc_time, nctime, select='before') - fbefore = self.data[param][before, ilat, ilon] - fafter = self.data[param][before + 1, ilat, ilon] - dt_num = netCDF4.date2num(utc_time, nctime.units) - time_ratio = (dt_num - nctime[before]) / self.delta_time - return fbefore + (fafter - fbefore) * time_ratio - - -def read_ecmwf_macc(filename, latitude, longitude, utc_time_range=None): - """ - Read data from ECMWF MACC reanalysis netCDF4 file. - - Parameters - ---------- - filename : string - full path to netCDF4 data file. - latitude : float - latitude in degrees - longitude : float - longitude in degrees - utc_time_range : sequence of datetime.datetime - pair of start and end naive or UTC date-times - - Returns - ------- - data : pandas.DataFrame - dataframe for specified range of UTC date-times - """ - ecmwf_macc = ECMWF_MACC(filename) - try: - ilat, ilon = ecmwf_macc.get_nearest_indices(latitude, longitude) - nctime = ecmwf_macc.data['time'] - if utc_time_range: - start_idx = netCDF4.date2index( - utc_time_range[0], nctime, select='nearest') - end_idx = netCDF4.date2index( - utc_time_range[-1], nctime, select='nearest') - time_slice = slice(start_idx, end_idx + 1) - else: - time_slice = slice(0, ecmwf_macc.time_size) - times = netCDF4.num2date(nctime[time_slice], nctime.units) - df = {k: ecmwf_macc.data[k][time_slice, ilat, ilon] - for k in ecmwf_macc.keys} - if ECMWF_MACC.TCWV in df: - # convert total column water vapor in kg/m^2 at (1-atm, 25-degC) to - # precipitable water in cm - df['precipitable_water'] = df[ECMWF_MACC.TCWV] / 10.0 - finally: - ecmwf_macc.data.close() - return pd.DataFrame(df, index=times.astype('datetime64[s]')) diff --git a/pvlib/tests/iotools/test_ecmwf_macc.py b/pvlib/tests/iotools/test_ecmwf_macc.py deleted file mode 100644 index bec3d0aafd..0000000000 --- a/pvlib/tests/iotools/test_ecmwf_macc.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -tests for :mod:`pvlib.iotools.ecmwf_macc` -""" - -import os -import datetime -import numpy as np -import pandas as pd -import pytest -from ..conftest import requires_netCDF4, DATA_DIR, assert_index_equal -from pvlib.iotools import ecmwf_macc - -TESTDATA = 'aod550_tcwv_20121101_test.nc' - -# for creating test data -START = END = datetime.date(2012, 11, 1) -DATAFILE = 'aod550_tcwv_20121101.nc' -RESIZE = 4 -LON_BND = (0, 360.0) -LAT_BND = (90, -90) - - -@pytest.fixture -def expected_test_data(): - return DATA_DIR / TESTDATA - - -@requires_netCDF4 -def test_get_nearest_indices(expected_test_data): - """Test getting indices given latitude, longitude from ECMWF_MACC data.""" - data = ecmwf_macc.ECMWF_MACC(expected_test_data) - ilat, ilon = data.get_nearest_indices(38, -122) - assert ilat == 17 - assert ilon == 79 - - -@requires_netCDF4 -def test_interp_data(expected_test_data): - """Test interpolating UTC time from ECMWF_MACC data.""" - data = ecmwf_macc.ECMWF_MACC(expected_test_data) - test9am = data.interp_data( - 38, -122, datetime.datetime(2012, 11, 1, 9, 0, 0), 'aod550') - assert np.isclose(test9am, data.data.variables['aod550'][2, 17, 79]) - test12pm = data.interp_data( - 38, -122, datetime.datetime(2012, 11, 1, 12, 0, 0), 'aod550') - assert np.isclose(test12pm, data.data.variables['aod550'][3, 17, 79]) - test113301 = data.interp_data( - 38, -122, datetime.datetime(2012, 11, 1, 11, 33, 1), 'aod550') - expected = test9am + (2 + (33 + 1 / 60) / 60) / 3 * (test12pm - test9am) - assert np.isclose(test113301, expected) # 0.15515305836696536 - - -@requires_netCDF4 -def test_read_ecmwf_macc(expected_test_data): - """Test reading ECMWF_MACC data from netCDF4 file.""" - data = ecmwf_macc.read_ecmwf_macc( - expected_test_data, 38, -122) - expected_times = pd.date_range('2012-11-01 03:00', '2012-11-02 00:00', - freq='3h').astype("datetime64[ns]") - assert_index_equal(data.index.astype("datetime64[ns]"), expected_times) - expected_aod = np.array([ - 0.39531226, 0.22371339, 0.18373083, 0.15010143, 0.130809, 0.11172834, - 0.09741255, 0.0921606]) - expected_tcwv = np.array([ - 26.56172238, 22.75563109, 19.37884304, 16.19186269, 13.31990346, - 11.65635338, 10.94879802, 10.55725756]) - assert np.allclose(data.aod550.values, expected_aod) - assert np.allclose(data.tcwv.values, expected_tcwv) - assert np.allclose(data.precipitable_water.values, expected_tcwv / 10.0) - datetimes = (datetime.datetime(2012, 11, 1, 9, 0, 0), - datetime.datetime(2012, 11, 1, 12, 0, 0)) - data_9am_12pm = ecmwf_macc.read_ecmwf_macc( - expected_test_data, 38, -122, datetimes) - assert np.allclose(data_9am_12pm.aod550.values, expected_aod[2:4]) - assert np.allclose(data_9am_12pm.tcwv.values, expected_tcwv[2:4]) - - -def _create_test_data(datafile=DATAFILE, testfile=TESTDATA, start=START, - end=END, resize=RESIZE): # pragma: no cover - """ - Create test data from downloaded data. - - Downloaded data from ECMWF for a single day is 3MB. This creates a subset - of the downloaded data that is only 100kb. - """ - - import netCDF4 - - if not os.path.exists(datafile): - ecmwf_macc.get_ecmwf_macc(datafile, ("aod550", "tcwv"), start, end) - - data = netCDF4.Dataset(datafile) - testdata = netCDF4.Dataset(testfile, 'w', format="NETCDF3_64BIT_OFFSET") - - # attributes - testdata.Conventions = data.Conventions - testdata.history = "intentionally blank" - - # longitude - lon_name = 'longitude' - lon_test = data.variables[lon_name][::resize] - lon_size = lon_test.size - lon = testdata.createDimension(lon_name, lon_size) - assert not lon.isunlimited() - assert lon_test[0] == LON_BND[0] - assert (LON_BND[-1] - lon_test[-1]) == (LON_BND[-1] / lon_size) - longitudes = testdata.createVariable(lon_name, "f4", (lon_name,)) - longitudes.units = data.variables[lon_name].units - longitudes.long_name = lon_name - longitudes[:] = lon_test - - # latitude - lat_name = 'latitude' - lat_test = data.variables[lat_name][::resize] - lat = testdata.createDimension(lat_name, lat_test.size) - assert not lat.isunlimited() - assert lat_test[0] == LAT_BND[0] - assert lat_test[-1] == LAT_BND[-1] - latitudes = testdata.createVariable(lat_name, "f4", (lat_name,)) - latitudes.units = data.variables[lat_name].units - latitudes.long_name = lat_name - latitudes[:] = lat_test - - # time - time_name = 'time' - time_test = data.variables[time_name][:] - time = testdata.createDimension(time_name, None) - assert time.isunlimited() - times = testdata.createVariable(time_name, 'i4', (time_name,)) - times.units = data.variables[time_name].units - times.long_name = time_name - times.calendar = data.variables[time_name].calendar - times[:] = time_test - - # aod - aod_name = 'aod550' - aod_vars = data.variables[aod_name] - aod_dims = (time_name, lat_name, lon_name) - aod_fill_value = getattr(aod_vars, '_FillValue') - aods = testdata.createVariable( - aod_name, 'i2', aod_dims, fill_value=aod_fill_value) - for attr in aod_vars.ncattrs(): - if attr.startswith('_'): - continue - setattr(aods, attr, getattr(aod_vars, attr)) - aods[:] = aod_vars[:, ::resize, ::resize] - - # tcwv - tcwv_name = 'tcwv' - tcwv_vars = data.variables[tcwv_name] - tcwv_dims = (time_name, lat_name, lon_name) - tcwv_fill_value = getattr(tcwv_vars, '_FillValue') - tcwvs = testdata.createVariable( - tcwv_name, 'i2', tcwv_dims, fill_value=tcwv_fill_value) - for attr in tcwv_vars.ncattrs(): - if attr.startswith('_'): - continue - setattr(tcwvs, attr, getattr(tcwv_vars, attr)) - tcwvs[:] = tcwv_vars[:, ::resize, ::resize] - - data.close() - testdata.close() diff --git a/pyproject.toml b/pyproject.toml index d60aea67f6..11e8dc1ad3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,5 @@ filterwarnings =[ "ignore:`np.int` is a deprecated alias:DeprecationWarning:.*(numba|scipy):", "ignore:`np.bool` is a deprecated alias:DeprecationWarning:.*numba:", # warnings from netcdf4, but reported as coming from pvlib - "ignore:`np.bool` is a deprecated alias:DeprecationWarning:.*(ecmwf_macc|forecast):", - "ignore:tostring() is deprecated:DeprecationWarning:.*ecmwf_macc:" + "ignore:`np.bool` is a deprecated alias:DeprecationWarning:.*forecast:" ] \ No newline at end of file