diff --git a/ci/azure/conda_linux.yml b/ci/azure/conda_linux.yml
index 583ac71226..3bf8215cc4 100644
--- a/ci/azure/conda_linux.yml
+++ b/ci/azure/conda_linux.yml
@@ -38,6 +38,8 @@ jobs:
- script: |
source activate test_env
export NREL_API_KEY=$(nrelApiKey)
+ export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME)
+ export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD)
pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html
displayName: 'pytest'
- task: PublishTestResults@2
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
index 4bd52c0fea..29ba841546 100644
--- a/docs/sphinx/source/api.rst
+++ b/docs/sphinx/source/api.rst
@@ -488,7 +488,9 @@ of sources and file formats relevant to solar energy modeling.
iotools.read_pvgis_tmy
iotools.get_pvgis_hourly
iotools.read_pvgis_hourly
+ iotools.get_bsrn
iotools.read_bsrn
+ iotools.parse_bsrn
iotools.get_cams
iotools.read_cams
iotools.parse_cams
diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst
index d7286a6a58..f5c7accc11 100644
--- a/docs/sphinx/source/whatsnew/v0.9.0.rst
+++ b/docs/sphinx/source/whatsnew/v0.9.0.rst
@@ -107,8 +107,9 @@ Enhancements
:func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly
solar radiation data and PV power output from PVGIS. (:pull:`1186`,
:issue:`849`)
-* Add :func:`~pvlib.iotools.read_bsrn` for reading BSRN solar radiation data
- files. (:pull:`1145`, :issue:`1015`)
+* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn`
+ for retrieving and reading BSRN solar radiation data files.
+ (:pull:`1254`, :pull:`1145`, :issue:`1015`)
* Add :func:`~pvlib.iotools.get_cams`,
:func:`~pvlib.iotools.parse_cams`, and
:func:`~pvlib.iotools.read_cams`
diff --git a/pvlib/data/variables_style_rules.csv b/pvlib/data/variables_style_rules.csv
index 34190625bb..a56dddd161 100644
--- a/pvlib/data/variables_style_rules.csv
+++ b/pvlib/data/variables_style_rules.csv
@@ -7,6 +7,7 @@ dni_extra;direct normal irradiance at top of atmosphere (extraterrestrial)
dhi;diffuse horizontal irradiance
bhi;beam/direct horizontal irradiance
ghi;global horizontal irradiance
+gri;ground-reflected irradiance
aoi;angle of incidence between :math:`90\deg` and :math:`90\deg`
aoi_projection;cos(aoi)
airmass;airmass
diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
index 69786f8630..b02ce243ae 100644
--- a/pvlib/iotools/__init__.py
+++ b/pvlib/iotools/__init__.py
@@ -15,7 +15,9 @@
from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401
from pvlib.iotools.pvgis import read_pvgis_hourly # noqa: F401
from pvlib.iotools.pvgis import get_pvgis_hourly # noqa: F401
+from pvlib.iotools.bsrn import get_bsrn # noqa: F401
from pvlib.iotools.bsrn import read_bsrn # noqa: F401
+from pvlib.iotools.bsrn import parse_bsrn # noqa: F401
from pvlib.iotools.sodapro import get_cams # noqa: F401
from pvlib.iotools.sodapro import read_cams # noqa: F401
from pvlib.iotools.sodapro import parse_cams # noqa: F401
diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py
index 0d3b31eb2c..c9c3abb93d 100644
--- a/pvlib/iotools/bsrn.py
+++ b/pvlib/iotools/bsrn.py
@@ -4,139 +4,455 @@
import pandas as pd
import gzip
+import ftplib
+import warnings
+import io
+import os
-COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39),
- (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)]
+BSRN_FTP_URL = "ftp.bsrn.awi.de"
-BSRN_COLUMNS = ['day', 'minute',
- 'ghi', 'ghi_std', 'ghi_min', 'ghi_max',
- 'dni', 'dni_std', 'dni_min', 'dni_max',
- 'empty', 'empty', 'empty', 'empty', 'empty',
- 'dhi', 'dhi_std', 'dhi_min', 'dhi_max',
- 'lwd', 'lwd_std', 'lwd_min', 'lwd_max',
- 'temp_air', 'relative_humidity', 'pressure']
+BSRN_LR0100_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27),
+ (27, 32), (32, 39), (39, 45), (45, 50), (50, 55),
+ (55, 64), (64, 70), (70, 75)]
+BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27),
+ (27, 31), (31, 38), (38, 44), (44, 49), (49, 54),
+ (54, 61), (61, 67), (67, 72), (72, 78)]
-def read_bsrn(filename):
+BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32),
+ (32, 38), (38, 44), (44, 50), (50, 56), (56, 62),
+ (62, 68), (68, 74), (74, 80)]
+
+BSRN_LR0100_COLUMNS = ['day', 'minute',
+ 'ghi', 'ghi_std', 'ghi_min', 'ghi_max',
+ 'dni', 'dni_std', 'dni_min', 'dni_max',
+ 'empty', 'empty', 'empty', 'empty', 'empty',
+ 'dhi', 'dhi_std', 'dhi_min', 'dhi_max',
+ 'lwd', 'lwd_std', 'lwd_min', 'lwd_max',
+ 'temp_air', 'relative_humidity', 'pressure']
+
+BSRN_LR0300_COLUMNS = ['day', 'minute', 'gri', 'gri_std', 'gri_min', 'gri_max',
+ 'lwu', 'lwu_std', 'lwu_min', 'lwu_max', 'net_radiation',
+ 'net_radiation_std', 'net_radiation_min',
+ 'net_radiation_max']
+
+BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global', 'uva_global_std',
+ 'uva_global_min', 'uva_global_max', 'uvb_direct',
+ 'uvb_direct_std', 'uvb_direct_min', 'uvb_direct_max',
+ 'empty', 'empty', 'empty', 'empty',
+ 'uvb_global', 'uvb_global_std', 'uvb_global_min',
+ 'uvb_global_max', 'uvb_diffuse', 'uvb_diffuse_std',
+ 'uvb_diffuse', 'uvb_diffuse_std',
+ 'uvb_diffuse_min', 'uvb_diffuse_max',
+ 'uvb_reflected', 'uvb_reflected_std',
+ 'uvb_reflected_min', 'uvb_reflected_max']
+
+BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0300': BSRN_LR0300_COLUMNS,
+ '0500': BSRN_LR0500_COLUMNS}
+
+
+def _empty_dataframe_from_logical_records(logical_records):
+ # Create an empty DataFrame with the column names corresponding to the
+ # requested logical records
+ columns = []
+ for lr in logical_records:
+ columns += BSRN_COLUMNS[lr][2:]
+ return pd.DataFrame(columns=columns)
+
+
+def get_bsrn(start, end, station, username, password,
+ logical_records=('0100',), local_path=None):
"""
- Read a BSRN station-to-archive file into a DataFrame.
+ Retrieve ground measured irradiance data from the BSRN FTP server.
The BSRN (Baseline Surface Radiation Network) is a world wide network
of high-quality solar radiation monitoring stations as described in [1]_.
- The function only parses the basic measurements (LR0100), which include
- global, diffuse, direct and downwelling long-wave radiation [2]_. Future
- updates may include parsing of additional data and meta-data.
+ Data is retrieved from the BSRN FTP server [2]_.
+
+ Data is returned for the entire months between and including start and end.
+
+ Parameters
+ ----------
+ start: datetime-like
+ First day of the requested period
+ end: datetime-like
+ Last day of the requested period
+ station: str
+ 3-letter BSRN station abbreviation
+ username: str
+ username for accessing the BSRN FTP server
+ password: str
+ password for accessing the BSRN FTP server
+ logical_records: list or tuple, default: ('0100',)
+ List of the logical records (LR) to parse. Options include: '0100',
+ '0300', and '0500'.
+ local_path: str or path-like, optional
+ If specified, path (abs. or relative) of where to save files
+
+ Returns
+ -------
+ data: DataFrame
+ timeseries data from the BSRN archive, see
+ :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is
+ returned if no data was found for the time period.
+ metadata: dict
+ metadata for the last available monthly file.
+
+ Raises
+ ------
+ KeyError
+ If the specified station does not exist on the FTP server.
- BSRN files are freely available and can be accessed via FTP [3]_. Required
+ Warns
+ -----
+ UserWarning
+ If one or more requested files are missing a UserWarning is returned
+ with a list of the filenames missing. If no files match the specified
+ station and timeframe a seperate UserWarning is given.
+
+ Notes
+ -----
+ The username and password for the BSRN FTP server can be obtained for free
+ as described in the BSRN's Data Release Guidelines [3]_.
+
+ Currently only parsing of logical records 0100, 0300 and 0500 is supported.
+ Note not all stations measure LR0300 and LR0500. However, LR0100 is
+ mandatory as it contains the basic irradiance and auxillary measurements.
+ See [4]_ for a description of the different logical records. Future updates
+ may include parsing of additional data and metadata.
+
+ Important
+ ---------
+ While data from the BSRN is generally of high-quality, measurement data
+ should always be quality controlled before usage!
+
+ Examples
+ --------
+ >>> # Retrieve two months irradiance data from the Cabauw BSRN station
+ >>> data, metadata = pvlib.iotools.get_bsrn( # doctest: +SKIP
+ >>> start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,12,1), # doctest: +SKIP
+ >>> station='cab', username='yourusername', password='yourpassword') # doctest: +SKIP
+
+ See Also
+ --------
+ pvlib.iotools.read_bsrn, pvlib.iotools.parse_bsrn
+
+ References
+ ----------
+ .. [1] `World Radiation Monitoring Center - Baseline Surface Radiation
+ Network (BSRN)
+ `_
+ .. [2] `BSRN Data Retrieval via FTP
+ `_
+ .. [4] `BSRN Data Release Guidelines
+ `_
+ .. [3] `Update of the Technical Plan for BSRN Data Management, 2013,
+ Global Climate Observing System (GCOS) GCOS-174.
+ `_
+ """ # noqa: E501
+ # The FTP server uses lowercase station abbreviations
+ station = station.lower()
- username and password are easily obtainable as described in the BSRN's
- Data Release Guidelines [4]_.
+ # Generate list files to download based on start/end (SSSMMYY.dat.gz)
+ filenames = pd.date_range(
+ start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')\
+ .strftime(f"{station}%m%y.dat.gz").tolist()
+ # Create FTP connection
+ with ftplib.FTP(BSRN_FTP_URL, username, password) as ftp:
+ # Change to station sub-directory (checks that the station exists)
+ try:
+ ftp.cwd(f'/{station}')
+ except ftplib.error_perm as e:
+ raise KeyError('Station sub-directory does not exist. Specified '
+ 'station is probably not a proper three letter '
+ 'station abbreviation.') from e
+ dfs = [] # Initialize list for monthly dataframes
+ non_existing_files = [] # Initilize list of files that were not found
+ for filename in filenames:
+ try:
+ bio = io.BytesIO() # Initialize BytesIO object
+ # Retrieve binary file from server and write to BytesIO object
+ response = ftp.retrbinary(f'RETR {filename}', bio.write)
+ # Check that transfer was successfull
+ if not response.startswith('226 Transfer complete'):
+ raise ftplib.Error(response)
+ # Save file locally if local_path is specified
+ if local_path is not None:
+ # Create local file
+ with open(os.path.join(local_path, filename), 'wb') as f:
+ f.write(bio.getbuffer()) # Write local file
+ # Open gzip file and convert to StringIO
+ bio.seek(0) # reset buffer to start of file
+ gzip_file = io.TextIOWrapper(gzip.GzipFile(fileobj=bio),
+ encoding='latin1')
+ dfi, metadata = parse_bsrn(gzip_file, logical_records)
+ dfs.append(dfi)
+ # FTP client raises an error if the file does not exist on server
+ except ftplib.error_perm as e:
+ if str(e) == '550 Failed to open file.':
+ non_existing_files.append(filename)
+ else:
+ raise ftplib.error_perm(e)
+ ftp.quit() # Close and exit FTP connection
+ # Raise user warnings
+ if not dfs: # If no files were found
+ warnings.warn('No files were available for the specified timeframe.')
+ elif non_existing_files: # If only some files were missing
+ warnings.warn(f'The following files were not found: {non_existing_files}') # noqa: E501
+
+ # Concatenate monthly dataframes to one dataframe
+ if len(dfs):
+ data = pd.concat(dfs, axis='rows')
+ else: # Return empty dataframe
+ data = _empty_dataframe_from_logical_records(logical_records)
+ metadata = {}
+ # Return dataframe and metadata (metadata belongs to last available file)
+ return data, metadata
+
+
+def parse_bsrn(fbuf, logical_records=('0100',)):
+ """
+ Parse a file-like buffer of a BSRN station-to-archive file.
Parameters
----------
- filename: str
- A relative or absolute file path.
+ fbuf: file-like buffer
+ Buffer of a BSRN station-to-archive data file
+ logical_records: list or tuple, default: ('0100',)
+ List of the logical records (LR) to parse. Options include: '0100',
+ '0300', and '0500'.
Returns
-------
data: DataFrame
- A DataFrame with the columns as described below. For more extensive
- description of the variables, consult [2]_.
+ timeseries data from the BSRN archive, see
+ :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is
+ returned if the specified logical records were not found.
+ metadata: dict
+ Dictionary containing metadata (primarily from LR0004).
+
+ See Also
+ --------
+ pvlib.iotools.read_bsrn, pvlib.iotools.get_bsrn
+
+ """
+ # Parse metadata
+ fbuf.readline() # first line should be *U0001, so read it and discard
+ date_line = fbuf.readline() # second line contains important metadata
+ start_date = pd.Timestamp(year=int(date_line[7:11]),
+ month=int(date_line[3:6]), day=1,
+ tz='UTC') # BSRN timestamps are UTC
+
+ metadata = {} # Initilize dictionary containing metadata
+ metadata['start date'] = start_date
+ metadata['station identification number'] = int(date_line[:3])
+ metadata['version of data'] = int(date_line.split()[-1])
+ for line in fbuf:
+ if line[2:6] == '0004': # stop once LR0004 has been reached
+ break
+ elif line == '':
+ raise ValueError('Mandatory record LR0004 not found.')
+ metadata['date when station description changed'] = fbuf.readline().strip()
+ metadata['surface type'] = int(fbuf.readline(3))
+ metadata['topography type'] = int(fbuf.readline())
+ metadata['address'] = fbuf.readline().strip()
+ metadata['telephone no. of station'] = fbuf.readline(20).strip()
+ metadata['FAX no. of station'] = fbuf.readline().strip()
+ metadata['TCP/IP no. of station'] = fbuf.readline(15).strip()
+ metadata['e-mail address of station'] = fbuf.readline().strip()
+ metadata['latitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention
+ metadata['latitude'] = metadata['latitude_bsrn'] - 90 # ISO 19115
+ metadata['longitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention
+ metadata['longitude'] = metadata['longitude_bsrn'] - 180 # ISO 19115
+ metadata['altitude'] = int(fbuf.readline(5))
+ metadata['identification of "SYNOP" station'] = fbuf.readline().strip()
+ metadata['date when horizon changed'] = fbuf.readline().strip()
+ # Pass last section of LR0004 containing the horizon elevation data
+ horizon = [] # list for raw horizon elevation data
+ while True:
+ line = fbuf.readline()
+ if ('*' in line) | (line == ''):
+ break
+ else:
+ horizon += [int(i) for i in line.split()]
+ horizon = pd.Series(horizon[1::2], horizon[::2], name='horizon_elevation',
+ dtype=int).drop(-1, errors='ignore').sort_index()
+ horizon.index.name = 'azimuth'
+ metadata['horizon'] = horizon
+
+ # Read file and store the starting line number and number of lines for
+ # each logical record (LR)
+ fbuf.seek(0) # reset buffer to start of file
+ lr_startrow = {} # Dictionary of starting line number for each LR
+ lr_nrows = {} # Dictionary of end line number for each LR
+ for num, line in enumerate(fbuf):
+ if line.startswith('*'): # Find start of all logical records
+ if len(lr_startrow) >= 1:
+ lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821
+ lr = line[2:6] # string of 4 digit LR number
+ lr_startrow[lr] = num
+ lr_nrows[lr] = num - lr_startrow[lr]
+
+ for lr in logical_records:
+ if lr not in ['0100', '0300', '0500']:
+ raise ValueError(f"Logical record {lr} not in "
+ "['0100', '0300','0500'].")
+ dfs = [] # Initialize empty list for dataframe
+
+ # Parse LR0100 - basic measurements including GHI, DNI, DHI and temperature
+ if ('0100' in lr_startrow.keys()) & ('0100' in logical_records):
+ fbuf.seek(0) # reset buffer to start of file
+ LR_0100 = pd.read_fwf(fbuf, skiprows=lr_startrow['0100'] + 1,
+ nrows=lr_nrows['0100'], header=None,
+ colspecs=BSRN_LR0100_COL_SPECS,
+ na_values=[-999.0, -99.9])
+ # Create multi-index and unstack, resulting in 1 col for each variable
+ LR_0100 = LR_0100.set_index([LR_0100.index // 2, LR_0100.index % 2])
+ LR_0100 = LR_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns')
+ # Sort columns to match original order and assign column names
+ LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns')
+ LR_0100.columns = BSRN_LR0100_COLUMNS
+ # Set datetime index
+ LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='d')
+ + pd.to_timedelta(LR_0100['minute'], unit='T'))
+ # Drop empty, minute, and day columns
+ LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute'])
+ dfs.append(LR_0100)
+
+ # Parse LR0300 - other time series data, including upward and net radiation
+ if ('0300' in lr_startrow.keys()) & ('0300' in logical_records):
+ fbuf.seek(0) # reset buffer to start of file
+ LR_0300 = pd.read_fwf(fbuf, skiprows=lr_startrow['0300']+1,
+ nrows=lr_nrows['0300'], header=None,
+ na_values=[-999.0, -99.9],
+ colspecs=BSRN_LR0300_COL_SPECS,
+ names=BSRN_LR0300_COLUMNS)
+ LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='d')
+ + pd.to_timedelta(LR_0300['minute'], unit='T'))
+ LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float)
+ dfs.append(LR_0300)
+
+ # Parse LR0500 - UV measurements
+ if ('0500' in lr_startrow.keys()) & ('0500' in logical_records):
+ fbuf.seek(0) # reset buffer to start of file
+ LR_0500 = pd.read_fwf(fbuf, skiprows=lr_startrow['0500']+1,
+ nrows=lr_nrows['0500'], na_values=[-99.9],
+ header=None, colspecs=BSRN_LR0500_COL_SPECS)
+ # Create multi-index and unstack, resulting in 1 col for each variable
+ LR_0500 = LR_0500.set_index([LR_0500.index // 2, LR_0500.index % 2])
+ LR_0500 = LR_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns')
+ # Sort columns to match original order and assign column names
+ LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns')
+ LR_0500.columns = BSRN_LR0500_COLUMNS
+ LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='d')
+ + pd.to_timedelta(LR_0500['minute'], unit='T'))
+ LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute'])
+ dfs.append(LR_0500)
+
+ if len(dfs):
+ data = pd.concat(dfs, axis='columns')
+ else:
+ data = _empty_dataframe_from_logical_records(logical_records)
+ metadata = {}
+ return data, metadata
+
+
+def read_bsrn(filename, logical_records=('0100',)):
+ """
+ Read a BSRN station-to-archive file into a DataFrame.
+
+ The BSRN (Baseline Surface Radiation Network) is a world wide network
+ of high-quality solar radiation monitoring stations as described in [1]_.
+ The function is able to parse logical records (LR) 0100, 0300, and 0500.
+ LR0100 contains the basic measurements, which include global, diffuse, and
+ direct irradiance, as well as downwelling long-wave radiation [2]_. Future
+ updates may include parsing of additional data and metadata.
+
+ BSRN files are freely available and can be accessed via FTP [3]_. The
+ username and password for the BSRN FTP server can be obtained for free as
+ described in the BSRN's Data Release Guidelines [3]_.
+
+ Parameters
+ ----------
+ filename: str or path-like
+ Name or path of a BSRN station-to-archive data file
+ logical_records: list or tuple, default: ('0100',)
+ List of the logical records (LR) to parse. Options include: '0100',
+ '0300', and '0500'.
+
+ Returns
+ -------
+ data: DataFrame
+ A DataFrame with the columns as described below. For a more extensive
+ description of the variables, consult [2]_. An empty DataFrame is
+ returned if the specified logical records were not found.
+ metadata: dict
+ Dictionary containing metadata (primarily from LR0004).
Notes
-----
- The data DataFrame includes the following fields:
+ The data DataFrame for LR0100 includes the following fields:
======================= ====== ==========================================
Key Format Description
======================= ====== ==========================================
- day int Day of the month 1-31
- minute int Minute of the day 0-1439
- ghi float Mean global horizontal irradiance [W/m^2]
- ghi_std float Std. global horizontal irradiance [W/m^2]
- ghi_min float Min. global horizontal irradiance [W/m^2]
- ghi_max float Max. global horizontal irradiance [W/m^2]
- dni float Mean direct normal irradiance [W/m^2]
- dni_std float Std. direct normal irradiance [W/m^2]
- dni_min float Min. direct normal irradiance [W/m^2]
- dni_max float Max. direct normal irradiance [W/m^2]
- dhi float Mean diffuse horizontal irradiance [W/m^2]
- dhi_std float Std. diffuse horizontal irradiance [W/m^2]
- dhi_min float Min. diffuse horizontal irradiance [W/m^2]
- dhi_max float Max. diffuse horizontal irradiance [W/m^2]
- lwd float Mean. downward long-wave radiation [W/m^2]
- lwd_std float Std. downward long-wave radiation [W/m^2]
- lwd_min float Min. downward long-wave radiation [W/m^2]
- lwd_max float Max. downward long-wave radiation [W/m^2]
+ **Logical record 0100**
+ ---------------------------------------------------------------------------
+ ghi† float Mean global horizontal irradiance [W/m^2]
+ dni† float Mean direct normal irradiance [W/m^2]
+ dhi† float Mean diffuse horizontal irradiance [W/m^2]
+ lwd† float Mean. downward long-wave radiation [W/m^2]
temp_air float Air temperature [°C]
relative_humidity float Relative humidity [%]
pressure float Atmospheric pressure [hPa]
+ ----------------------- ------ ------------------------------------------
+ **Logical record 0300**
+ ---------------------------------------------------------------------------
+ gri† float Mean ground-reflected irradiance [W/m^2]
+ lwu† float Mean long-wave upwelling irradiance [W/m^2]
+ net_radiation† float Mean net radiation (net radiometer) [W/m^2]
+ ----------------------- ------ ------------------------------------------
+ **Logical record 0500**
+ ---------------------------------------------------------------------------
+ uva_global† float Mean UV-A global irradiance [W/m^2]
+ uvb_direct† float Mean UV-B direct irradiance [W/m^2]
+ uvb_global† float Mean UV-B global irradiance [W/m^2]
+ uvb_diffuse† float Mean UV-B diffuse irradiance [W/m^2]
+ uvb_reflected† float Mean UV-B reflected irradiance [W/m^2]
======================= ====== ==========================================
+ † Marked variables have corresponding columns for the standard deviation
+ (_std), minimum (_min), and maximum (_max) calculated from the 60 samples
+ that are average into each 1-minute measurement.
+
+ Hint
+ ----
+ According to [2]_ "All time labels in the station-to-archive files denote
+ the start of a time interval." This corresponds to left bin edge labeling.
+
+ See Also
+ --------
+ pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn
+
References
----------
.. [1] `World Radiation Monitoring Center - Baseline Surface Radiation
Network (BSRN)
`_
.. [2] `Update of the Technical Plan for BSRN Data Management, 2013,
- Global Climate Observing System (GCOS) GCOS-172.
+ Global Climate Observing System (GCOS) GCOS-174.
`_
.. [3] `BSRN Data Retrieval via FTP
`_
.. [4] `BSRN Data Release Guidelines
`_
- """
-
- # Read file and store the starting line number for each logical record (LR)
- line_no_dict = {}
+ """ # noqa: E501
if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file
open_func, mode = gzip.open, 'rt'
else:
open_func, mode = open, 'r'
with open_func(filename, mode) as f:
- f.readline() # first line should be *U0001, so read it and discard
- line_no_dict['0001'] = 0
- date_line = f.readline() # second line contains the year and month
- start_date = pd.Timestamp(year=int(date_line[7:11]),
- month=int(date_line[3:6]), day=1,
- tz='UTC') # BSRN timestamps are UTC
- for num, line in enumerate(f, start=2):
- if line.startswith('*'): # Find start of all logical records
- line_no_dict[line[2:6]] = num # key is 4 digit LR number
-
- # Determine start and end line of logical record LR0100 to be parsed
- start_row = line_no_dict['0100'] + 1 # Start line number
- # If LR0100 is the last logical record, then read rest of file
- if start_row-1 == max(line_no_dict.values()):
- end_row = num # then parse rest of the file
- else: # otherwise parse until the beginning of the next logical record
- end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1
- nrows = end_row-start_row+1
-
- # Read file as a fixed width file (fwf)
- data = pd.read_fwf(filename, skiprows=start_row, nrows=nrows, header=None,
- colspecs=COL_SPECS, na_values=[-999.0, -99.9],
- compression='infer')
-
- # Create multi-index and unstack, resulting in one column for each variable
- data = data.set_index([data.index // 2, data.index % 2])
- data = data.unstack(level=1).swaplevel(i=0, j=1, axis='columns')
-
- # Sort columns to match original order and assign column names
- data = data.reindex(sorted(data.columns), axis='columns')
- data.columns = BSRN_COLUMNS
- # Drop empty columns
- data = data.drop('empty', axis='columns')
-
- # Change day and minute type to integer
- data['day'] = data['day'].astype('Int64')
- data['minute'] = data['minute'].astype('Int64')
-
- # Set datetime index
- data.index = (start_date
- + pd.to_timedelta(data['day']-1, unit='d')
- + pd.to_timedelta(data['minute'], unit='T'))
-
- return data
+ content = parse_bsrn(f, logical_records)
+ return content
diff --git a/pvlib/tests/conftest.py b/pvlib/tests/conftest.py
index 95d0f725d9..a3cba1e7b8 100644
--- a/pvlib/tests/conftest.py
+++ b/pvlib/tests/conftest.py
@@ -3,6 +3,7 @@
import warnings
import pandas as pd
+import os
from pkg_resources import parse_version
import pytest
from functools import wraps
@@ -82,6 +83,18 @@ def assert_frame_equal(left, right, **kwargs):
reason='does not run on windows')
+try:
+ # Attempt to load BSRN credentials used for testing pvlib.iotools.get_bsrn
+ bsrn_username = os.environ["BSRN_FTP_USERNAME"]
+ bsrn_password = os.environ["BSRN_FTP_PASSWORD"]
+ has_bsrn_credentials = True
+except KeyError:
+ has_bsrn_credentials = False
+
+requires_bsrn_credentials = pytest.mark.skipif(
+ not has_bsrn_credentials, reason='requires bsrn credentials')
+
+
try:
import statsmodels # noqa: F401
has_statsmodels = True
diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py
index 18d4be60f7..412cbd5e8f 100644
--- a/pvlib/tests/iotools/test_bsrn.py
+++ b/pvlib/tests/iotools/test_bsrn.py
@@ -2,25 +2,123 @@
tests for :mod:`pvlib.iotools.bsrn`
"""
-
import pandas as pd
import pytest
+import os
+from pvlib.iotools import read_bsrn, get_bsrn
+from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal,
+ requires_bsrn_credentials)
+
+
+@pytest.fixture(scope="module")
+def bsrn_credentials():
+ """Supplies the BSRN FTP credentials for testing purposes.
-from pvlib.iotools import bsrn
-from ..conftest import DATA_DIR, assert_index_equal
+ Users should obtain their own credentials as described in the `read_bsrn`
+ documentation."""
+ bsrn_username = os.environ["BSRN_FTP_USERNAME"]
+ bsrn_password = os.environ["BSRN_FTP_PASSWORD"]
+ return bsrn_username, bsrn_password
-@pytest.mark.parametrize('testfile,expected_index', [
- ('bsrn-pay0616.dat.gz',
- pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')),
- ('bsrn-lr0100-pay0616.dat',
- pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')),
+@pytest.fixture
+def expected_index():
+ return pd.date_range(start='20160601', periods=43200, freq='1min',
+ tz='UTC')
+
+
+@pytest.mark.parametrize('testfile', [
+ ('bsrn-pay0616.dat.gz'),
+ ('bsrn-lr0100-pay0616.dat'),
])
def test_read_bsrn(testfile, expected_index):
- data = bsrn.read_bsrn(DATA_DIR / testfile)
+ data, metadata = read_bsrn(DATA_DIR / testfile)
+ assert_index_equal(expected_index, data.index)
+ assert 'ghi' in data.columns
+ assert 'dni_std' in data.columns
+ assert 'dhi_min' in data.columns
+ assert 'lwd_max' in data.columns
+ assert 'relative_humidity' in data.columns
+
+
+def test_read_bsrn_logical_records(expected_index):
+ # Test if logical records 0300 and 0500 are correct parsed
+ # and that 0100 is not passed when not specified
+ data, metadata = read_bsrn(DATA_DIR / 'bsrn-pay0616.dat.gz',
+ logical_records=['0300', '0500'])
+ assert_index_equal(expected_index, data.index)
+ assert 'lwu' in data.columns
+ assert 'uva_global' in data.columns
+ assert 'uvb_reflected_std' in data.columns
+ assert 'ghi' not in data.columns
+
+
+def test_read_bsrn_bad_logical_record():
+ # Test if ValueError is raised if an unsupported logical record is passed
+ with pytest.raises(ValueError, match='not in'):
+ read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat',
+ logical_records=['dummy'])
+
+
+def test_read_bsrn_logical_records_not_found():
+ # Test if an empty dataframe is returned if specified LRs are not present
+ data, metadata = read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat',
+ logical_records=['0300', '0500'])
+ assert data.empty # assert that the dataframe is empty
+ assert 'uva_global' in data.columns
+ assert 'uvb_reflected_std' in data.columns
+ assert 'uva_global_max' in data.columns
+ assert 'dni' not in data.columns
+ assert 'day' not in data.columns
+
+
+@requires_bsrn_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_bsrn(expected_index, bsrn_credentials):
+ # Retrieve irradiance data from the BSRN FTP server
+ # the TAM station is chosen due to its small file sizes
+ username, password = bsrn_credentials
+ data, metadata = get_bsrn(
+ start=pd.Timestamp(2016, 6, 1),
+ end=pd.Timestamp(2016, 6, 29),
+ station='tam',
+ username=username,
+ password=password,
+ local_path='')
assert_index_equal(expected_index, data.index)
assert 'ghi' in data.columns
assert 'dni_std' in data.columns
assert 'dhi_min' in data.columns
assert 'lwd_max' in data.columns
assert 'relative_humidity' in data.columns
+
+
+@requires_bsrn_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_bsrn_bad_station(bsrn_credentials):
+ # Test if KeyError is raised if a bad station name is passed
+ username, password = bsrn_credentials
+ with pytest.raises(KeyError, match='sub-directory does not exist'):
+ get_bsrn(
+ start=pd.Timestamp(2016, 6, 1),
+ end=pd.Timestamp(2016, 6, 29),
+ station='not_a_station_name',
+ username=username,
+ password=password)
+
+
+@requires_bsrn_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_bsrn_no_files(bsrn_credentials):
+ username, password = bsrn_credentials
+ # Test if Warning is given if no files are found for the entire time frame
+ with pytest.warns(UserWarning, match='No files'):
+ get_bsrn(
+ start=pd.Timestamp(1990, 6, 1),
+ end=pd.Timestamp(1990, 6, 29),
+ station='tam',
+ username=username,
+ password=password)