diff --git a/ci/azure/conda_linux.yml b/ci/azure/conda_linux.yml index 583ac71226..3bf8215cc4 100644 --- a/ci/azure/conda_linux.yml +++ b/ci/azure/conda_linux.yml @@ -38,6 +38,8 @@ jobs: - script: | source activate test_env export NREL_API_KEY=$(nrelApiKey) + export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME) + export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD) pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html displayName: 'pytest' - task: PublishTestResults@2 diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index 4bd52c0fea..29ba841546 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -488,7 +488,9 @@ of sources and file formats relevant to solar energy modeling. iotools.read_pvgis_tmy iotools.get_pvgis_hourly iotools.read_pvgis_hourly + iotools.get_bsrn iotools.read_bsrn + iotools.parse_bsrn iotools.get_cams iotools.read_cams iotools.parse_cams diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst index d7286a6a58..f5c7accc11 100644 --- a/docs/sphinx/source/whatsnew/v0.9.0.rst +++ b/docs/sphinx/source/whatsnew/v0.9.0.rst @@ -107,8 +107,9 @@ Enhancements :func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly solar radiation data and PV power output from PVGIS. (:pull:`1186`, :issue:`849`) -* Add :func:`~pvlib.iotools.read_bsrn` for reading BSRN solar radiation data - files. (:pull:`1145`, :issue:`1015`) +* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn` + for retrieving and reading BSRN solar radiation data files. + (:pull:`1254`, :pull:`1145`, :issue:`1015`) * Add :func:`~pvlib.iotools.get_cams`, :func:`~pvlib.iotools.parse_cams`, and :func:`~pvlib.iotools.read_cams` diff --git a/pvlib/data/variables_style_rules.csv b/pvlib/data/variables_style_rules.csv index 34190625bb..a56dddd161 100644 --- a/pvlib/data/variables_style_rules.csv +++ b/pvlib/data/variables_style_rules.csv @@ -7,6 +7,7 @@ dni_extra;direct normal irradiance at top of atmosphere (extraterrestrial) dhi;diffuse horizontal irradiance bhi;beam/direct horizontal irradiance ghi;global horizontal irradiance +gri;ground-reflected irradiance aoi;angle of incidence between :math:`90\deg` and :math:`90\deg` aoi_projection;cos(aoi) airmass;airmass diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 69786f8630..b02ce243ae 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -15,7 +15,9 @@ from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401 from pvlib.iotools.pvgis import read_pvgis_hourly # noqa: F401 from pvlib.iotools.pvgis import get_pvgis_hourly # noqa: F401 +from pvlib.iotools.bsrn import get_bsrn # noqa: F401 from pvlib.iotools.bsrn import read_bsrn # noqa: F401 +from pvlib.iotools.bsrn import parse_bsrn # noqa: F401 from pvlib.iotools.sodapro import get_cams # noqa: F401 from pvlib.iotools.sodapro import read_cams # noqa: F401 from pvlib.iotools.sodapro import parse_cams # noqa: F401 diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 0d3b31eb2c..c9c3abb93d 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -4,139 +4,455 @@ import pandas as pd import gzip +import ftplib +import warnings +import io +import os -COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39), - (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] +BSRN_FTP_URL = "ftp.bsrn.awi.de" -BSRN_COLUMNS = ['day', 'minute', - 'ghi', 'ghi_std', 'ghi_min', 'ghi_max', - 'dni', 'dni_std', 'dni_min', 'dni_max', - 'empty', 'empty', 'empty', 'empty', 'empty', - 'dhi', 'dhi_std', 'dhi_min', 'dhi_max', - 'lwd', 'lwd_std', 'lwd_min', 'lwd_max', - 'temp_air', 'relative_humidity', 'pressure'] +BSRN_LR0100_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), + (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), + (55, 64), (64, 70), (70, 75)] +BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27), + (27, 31), (31, 38), (38, 44), (44, 49), (49, 54), + (54, 61), (61, 67), (67, 72), (72, 78)] -def read_bsrn(filename): +BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), + (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), + (62, 68), (68, 74), (74, 80)] + +BSRN_LR0100_COLUMNS = ['day', 'minute', + 'ghi', 'ghi_std', 'ghi_min', 'ghi_max', + 'dni', 'dni_std', 'dni_min', 'dni_max', + 'empty', 'empty', 'empty', 'empty', 'empty', + 'dhi', 'dhi_std', 'dhi_min', 'dhi_max', + 'lwd', 'lwd_std', 'lwd_min', 'lwd_max', + 'temp_air', 'relative_humidity', 'pressure'] + +BSRN_LR0300_COLUMNS = ['day', 'minute', 'gri', 'gri_std', 'gri_min', 'gri_max', + 'lwu', 'lwu_std', 'lwu_min', 'lwu_max', 'net_radiation', + 'net_radiation_std', 'net_radiation_min', + 'net_radiation_max'] + +BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global', 'uva_global_std', + 'uva_global_min', 'uva_global_max', 'uvb_direct', + 'uvb_direct_std', 'uvb_direct_min', 'uvb_direct_max', + 'empty', 'empty', 'empty', 'empty', + 'uvb_global', 'uvb_global_std', 'uvb_global_min', + 'uvb_global_max', 'uvb_diffuse', 'uvb_diffuse_std', + 'uvb_diffuse', 'uvb_diffuse_std', + 'uvb_diffuse_min', 'uvb_diffuse_max', + 'uvb_reflected', 'uvb_reflected_std', + 'uvb_reflected_min', 'uvb_reflected_max'] + +BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0300': BSRN_LR0300_COLUMNS, + '0500': BSRN_LR0500_COLUMNS} + + +def _empty_dataframe_from_logical_records(logical_records): + # Create an empty DataFrame with the column names corresponding to the + # requested logical records + columns = [] + for lr in logical_records: + columns += BSRN_COLUMNS[lr][2:] + return pd.DataFrame(columns=columns) + + +def get_bsrn(start, end, station, username, password, + logical_records=('0100',), local_path=None): """ - Read a BSRN station-to-archive file into a DataFrame. + Retrieve ground measured irradiance data from the BSRN FTP server. The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. - The function only parses the basic measurements (LR0100), which include - global, diffuse, direct and downwelling long-wave radiation [2]_. Future - updates may include parsing of additional data and meta-data. + Data is retrieved from the BSRN FTP server [2]_. + + Data is returned for the entire months between and including start and end. + + Parameters + ---------- + start: datetime-like + First day of the requested period + end: datetime-like + Last day of the requested period + station: str + 3-letter BSRN station abbreviation + username: str + username for accessing the BSRN FTP server + password: str + password for accessing the BSRN FTP server + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. + local_path: str or path-like, optional + If specified, path (abs. or relative) of where to save files + + Returns + ------- + data: DataFrame + timeseries data from the BSRN archive, see + :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is + returned if no data was found for the time period. + metadata: dict + metadata for the last available monthly file. + + Raises + ------ + KeyError + If the specified station does not exist on the FTP server. - BSRN files are freely available and can be accessed via FTP [3]_. Required + Warns + ----- + UserWarning + If one or more requested files are missing a UserWarning is returned + with a list of the filenames missing. If no files match the specified + station and timeframe a seperate UserWarning is given. + + Notes + ----- + The username and password for the BSRN FTP server can be obtained for free + as described in the BSRN's Data Release Guidelines [3]_. + + Currently only parsing of logical records 0100, 0300 and 0500 is supported. + Note not all stations measure LR0300 and LR0500. However, LR0100 is + mandatory as it contains the basic irradiance and auxillary measurements. + See [4]_ for a description of the different logical records. Future updates + may include parsing of additional data and metadata. + + Important + --------- + While data from the BSRN is generally of high-quality, measurement data + should always be quality controlled before usage! + + Examples + -------- + >>> # Retrieve two months irradiance data from the Cabauw BSRN station + >>> data, metadata = pvlib.iotools.get_bsrn( # doctest: +SKIP + >>> start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,12,1), # doctest: +SKIP + >>> station='cab', username='yourusername', password='yourpassword') # doctest: +SKIP + + See Also + -------- + pvlib.iotools.read_bsrn, pvlib.iotools.parse_bsrn + + References + ---------- + .. [1] `World Radiation Monitoring Center - Baseline Surface Radiation + Network (BSRN) + `_ + .. [2] `BSRN Data Retrieval via FTP + `_ + .. [4] `BSRN Data Release Guidelines + `_ + .. [3] `Update of the Technical Plan for BSRN Data Management, 2013, + Global Climate Observing System (GCOS) GCOS-174. + `_ + """ # noqa: E501 + # The FTP server uses lowercase station abbreviations + station = station.lower() - username and password are easily obtainable as described in the BSRN's - Data Release Guidelines [4]_. + # Generate list files to download based on start/end (SSSMMYY.dat.gz) + filenames = pd.date_range( + start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')\ + .strftime(f"{station}%m%y.dat.gz").tolist() + # Create FTP connection + with ftplib.FTP(BSRN_FTP_URL, username, password) as ftp: + # Change to station sub-directory (checks that the station exists) + try: + ftp.cwd(f'/{station}') + except ftplib.error_perm as e: + raise KeyError('Station sub-directory does not exist. Specified ' + 'station is probably not a proper three letter ' + 'station abbreviation.') from e + dfs = [] # Initialize list for monthly dataframes + non_existing_files = [] # Initilize list of files that were not found + for filename in filenames: + try: + bio = io.BytesIO() # Initialize BytesIO object + # Retrieve binary file from server and write to BytesIO object + response = ftp.retrbinary(f'RETR {filename}', bio.write) + # Check that transfer was successfull + if not response.startswith('226 Transfer complete'): + raise ftplib.Error(response) + # Save file locally if local_path is specified + if local_path is not None: + # Create local file + with open(os.path.join(local_path, filename), 'wb') as f: + f.write(bio.getbuffer()) # Write local file + # Open gzip file and convert to StringIO + bio.seek(0) # reset buffer to start of file + gzip_file = io.TextIOWrapper(gzip.GzipFile(fileobj=bio), + encoding='latin1') + dfi, metadata = parse_bsrn(gzip_file, logical_records) + dfs.append(dfi) + # FTP client raises an error if the file does not exist on server + except ftplib.error_perm as e: + if str(e) == '550 Failed to open file.': + non_existing_files.append(filename) + else: + raise ftplib.error_perm(e) + ftp.quit() # Close and exit FTP connection + # Raise user warnings + if not dfs: # If no files were found + warnings.warn('No files were available for the specified timeframe.') + elif non_existing_files: # If only some files were missing + warnings.warn(f'The following files were not found: {non_existing_files}') # noqa: E501 + + # Concatenate monthly dataframes to one dataframe + if len(dfs): + data = pd.concat(dfs, axis='rows') + else: # Return empty dataframe + data = _empty_dataframe_from_logical_records(logical_records) + metadata = {} + # Return dataframe and metadata (metadata belongs to last available file) + return data, metadata + + +def parse_bsrn(fbuf, logical_records=('0100',)): + """ + Parse a file-like buffer of a BSRN station-to-archive file. Parameters ---------- - filename: str - A relative or absolute file path. + fbuf: file-like buffer + Buffer of a BSRN station-to-archive data file + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. Returns ------- data: DataFrame - A DataFrame with the columns as described below. For more extensive - description of the variables, consult [2]_. + timeseries data from the BSRN archive, see + :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is + returned if the specified logical records were not found. + metadata: dict + Dictionary containing metadata (primarily from LR0004). + + See Also + -------- + pvlib.iotools.read_bsrn, pvlib.iotools.get_bsrn + + """ + # Parse metadata + fbuf.readline() # first line should be *U0001, so read it and discard + date_line = fbuf.readline() # second line contains important metadata + start_date = pd.Timestamp(year=int(date_line[7:11]), + month=int(date_line[3:6]), day=1, + tz='UTC') # BSRN timestamps are UTC + + metadata = {} # Initilize dictionary containing metadata + metadata['start date'] = start_date + metadata['station identification number'] = int(date_line[:3]) + metadata['version of data'] = int(date_line.split()[-1]) + for line in fbuf: + if line[2:6] == '0004': # stop once LR0004 has been reached + break + elif line == '': + raise ValueError('Mandatory record LR0004 not found.') + metadata['date when station description changed'] = fbuf.readline().strip() + metadata['surface type'] = int(fbuf.readline(3)) + metadata['topography type'] = int(fbuf.readline()) + metadata['address'] = fbuf.readline().strip() + metadata['telephone no. of station'] = fbuf.readline(20).strip() + metadata['FAX no. of station'] = fbuf.readline().strip() + metadata['TCP/IP no. of station'] = fbuf.readline(15).strip() + metadata['e-mail address of station'] = fbuf.readline().strip() + metadata['latitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention + metadata['latitude'] = metadata['latitude_bsrn'] - 90 # ISO 19115 + metadata['longitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention + metadata['longitude'] = metadata['longitude_bsrn'] - 180 # ISO 19115 + metadata['altitude'] = int(fbuf.readline(5)) + metadata['identification of "SYNOP" station'] = fbuf.readline().strip() + metadata['date when horizon changed'] = fbuf.readline().strip() + # Pass last section of LR0004 containing the horizon elevation data + horizon = [] # list for raw horizon elevation data + while True: + line = fbuf.readline() + if ('*' in line) | (line == ''): + break + else: + horizon += [int(i) for i in line.split()] + horizon = pd.Series(horizon[1::2], horizon[::2], name='horizon_elevation', + dtype=int).drop(-1, errors='ignore').sort_index() + horizon.index.name = 'azimuth' + metadata['horizon'] = horizon + + # Read file and store the starting line number and number of lines for + # each logical record (LR) + fbuf.seek(0) # reset buffer to start of file + lr_startrow = {} # Dictionary of starting line number for each LR + lr_nrows = {} # Dictionary of end line number for each LR + for num, line in enumerate(fbuf): + if line.startswith('*'): # Find start of all logical records + if len(lr_startrow) >= 1: + lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821 + lr = line[2:6] # string of 4 digit LR number + lr_startrow[lr] = num + lr_nrows[lr] = num - lr_startrow[lr] + + for lr in logical_records: + if lr not in ['0100', '0300', '0500']: + raise ValueError(f"Logical record {lr} not in " + "['0100', '0300','0500'].") + dfs = [] # Initialize empty list for dataframe + + # Parse LR0100 - basic measurements including GHI, DNI, DHI and temperature + if ('0100' in lr_startrow.keys()) & ('0100' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0100 = pd.read_fwf(fbuf, skiprows=lr_startrow['0100'] + 1, + nrows=lr_nrows['0100'], header=None, + colspecs=BSRN_LR0100_COL_SPECS, + na_values=[-999.0, -99.9]) + # Create multi-index and unstack, resulting in 1 col for each variable + LR_0100 = LR_0100.set_index([LR_0100.index // 2, LR_0100.index % 2]) + LR_0100 = LR_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns') + LR_0100.columns = BSRN_LR0100_COLUMNS + # Set datetime index + LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='d') + + pd.to_timedelta(LR_0100['minute'], unit='T')) + # Drop empty, minute, and day columns + LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute']) + dfs.append(LR_0100) + + # Parse LR0300 - other time series data, including upward and net radiation + if ('0300' in lr_startrow.keys()) & ('0300' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0300 = pd.read_fwf(fbuf, skiprows=lr_startrow['0300']+1, + nrows=lr_nrows['0300'], header=None, + na_values=[-999.0, -99.9], + colspecs=BSRN_LR0300_COL_SPECS, + names=BSRN_LR0300_COLUMNS) + LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='d') + + pd.to_timedelta(LR_0300['minute'], unit='T')) + LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float) + dfs.append(LR_0300) + + # Parse LR0500 - UV measurements + if ('0500' in lr_startrow.keys()) & ('0500' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0500 = pd.read_fwf(fbuf, skiprows=lr_startrow['0500']+1, + nrows=lr_nrows['0500'], na_values=[-99.9], + header=None, colspecs=BSRN_LR0500_COL_SPECS) + # Create multi-index and unstack, resulting in 1 col for each variable + LR_0500 = LR_0500.set_index([LR_0500.index // 2, LR_0500.index % 2]) + LR_0500 = LR_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns') + LR_0500.columns = BSRN_LR0500_COLUMNS + LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='d') + + pd.to_timedelta(LR_0500['minute'], unit='T')) + LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute']) + dfs.append(LR_0500) + + if len(dfs): + data = pd.concat(dfs, axis='columns') + else: + data = _empty_dataframe_from_logical_records(logical_records) + metadata = {} + return data, metadata + + +def read_bsrn(filename, logical_records=('0100',)): + """ + Read a BSRN station-to-archive file into a DataFrame. + + The BSRN (Baseline Surface Radiation Network) is a world wide network + of high-quality solar radiation monitoring stations as described in [1]_. + The function is able to parse logical records (LR) 0100, 0300, and 0500. + LR0100 contains the basic measurements, which include global, diffuse, and + direct irradiance, as well as downwelling long-wave radiation [2]_. Future + updates may include parsing of additional data and metadata. + + BSRN files are freely available and can be accessed via FTP [3]_. The + username and password for the BSRN FTP server can be obtained for free as + described in the BSRN's Data Release Guidelines [3]_. + + Parameters + ---------- + filename: str or path-like + Name or path of a BSRN station-to-archive data file + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. + + Returns + ------- + data: DataFrame + A DataFrame with the columns as described below. For a more extensive + description of the variables, consult [2]_. An empty DataFrame is + returned if the specified logical records were not found. + metadata: dict + Dictionary containing metadata (primarily from LR0004). Notes ----- - The data DataFrame includes the following fields: + The data DataFrame for LR0100 includes the following fields: ======================= ====== ========================================== Key Format Description ======================= ====== ========================================== - day int Day of the month 1-31 - minute int Minute of the day 0-1439 - ghi float Mean global horizontal irradiance [W/m^2] - ghi_std float Std. global horizontal irradiance [W/m^2] - ghi_min float Min. global horizontal irradiance [W/m^2] - ghi_max float Max. global horizontal irradiance [W/m^2] - dni float Mean direct normal irradiance [W/m^2] - dni_std float Std. direct normal irradiance [W/m^2] - dni_min float Min. direct normal irradiance [W/m^2] - dni_max float Max. direct normal irradiance [W/m^2] - dhi float Mean diffuse horizontal irradiance [W/m^2] - dhi_std float Std. diffuse horizontal irradiance [W/m^2] - dhi_min float Min. diffuse horizontal irradiance [W/m^2] - dhi_max float Max. diffuse horizontal irradiance [W/m^2] - lwd float Mean. downward long-wave radiation [W/m^2] - lwd_std float Std. downward long-wave radiation [W/m^2] - lwd_min float Min. downward long-wave radiation [W/m^2] - lwd_max float Max. downward long-wave radiation [W/m^2] + **Logical record 0100** + --------------------------------------------------------------------------- + ghi† float Mean global horizontal irradiance [W/m^2] + dni† float Mean direct normal irradiance [W/m^2] + dhi† float Mean diffuse horizontal irradiance [W/m^2] + lwd† float Mean. downward long-wave radiation [W/m^2] temp_air float Air temperature [°C] relative_humidity float Relative humidity [%] pressure float Atmospheric pressure [hPa] + ----------------------- ------ ------------------------------------------ + **Logical record 0300** + --------------------------------------------------------------------------- + gri† float Mean ground-reflected irradiance [W/m^2] + lwu† float Mean long-wave upwelling irradiance [W/m^2] + net_radiation† float Mean net radiation (net radiometer) [W/m^2] + ----------------------- ------ ------------------------------------------ + **Logical record 0500** + --------------------------------------------------------------------------- + uva_global† float Mean UV-A global irradiance [W/m^2] + uvb_direct† float Mean UV-B direct irradiance [W/m^2] + uvb_global† float Mean UV-B global irradiance [W/m^2] + uvb_diffuse† float Mean UV-B diffuse irradiance [W/m^2] + uvb_reflected† float Mean UV-B reflected irradiance [W/m^2] ======================= ====== ========================================== + † Marked variables have corresponding columns for the standard deviation + (_std), minimum (_min), and maximum (_max) calculated from the 60 samples + that are average into each 1-minute measurement. + + Hint + ---- + According to [2]_ "All time labels in the station-to-archive files denote + the start of a time interval." This corresponds to left bin edge labeling. + + See Also + -------- + pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn + References ---------- .. [1] `World Radiation Monitoring Center - Baseline Surface Radiation Network (BSRN) `_ .. [2] `Update of the Technical Plan for BSRN Data Management, 2013, - Global Climate Observing System (GCOS) GCOS-172. + Global Climate Observing System (GCOS) GCOS-174. `_ .. [3] `BSRN Data Retrieval via FTP `_ .. [4] `BSRN Data Release Guidelines `_ - """ - - # Read file and store the starting line number for each logical record (LR) - line_no_dict = {} + """ # noqa: E501 if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file open_func, mode = gzip.open, 'rt' else: open_func, mode = open, 'r' with open_func(filename, mode) as f: - f.readline() # first line should be *U0001, so read it and discard - line_no_dict['0001'] = 0 - date_line = f.readline() # second line contains the year and month - start_date = pd.Timestamp(year=int(date_line[7:11]), - month=int(date_line[3:6]), day=1, - tz='UTC') # BSRN timestamps are UTC - for num, line in enumerate(f, start=2): - if line.startswith('*'): # Find start of all logical records - line_no_dict[line[2:6]] = num # key is 4 digit LR number - - # Determine start and end line of logical record LR0100 to be parsed - start_row = line_no_dict['0100'] + 1 # Start line number - # If LR0100 is the last logical record, then read rest of file - if start_row-1 == max(line_no_dict.values()): - end_row = num # then parse rest of the file - else: # otherwise parse until the beginning of the next logical record - end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 - nrows = end_row-start_row+1 - - # Read file as a fixed width file (fwf) - data = pd.read_fwf(filename, skiprows=start_row, nrows=nrows, header=None, - colspecs=COL_SPECS, na_values=[-999.0, -99.9], - compression='infer') - - # Create multi-index and unstack, resulting in one column for each variable - data = data.set_index([data.index // 2, data.index % 2]) - data = data.unstack(level=1).swaplevel(i=0, j=1, axis='columns') - - # Sort columns to match original order and assign column names - data = data.reindex(sorted(data.columns), axis='columns') - data.columns = BSRN_COLUMNS - # Drop empty columns - data = data.drop('empty', axis='columns') - - # Change day and minute type to integer - data['day'] = data['day'].astype('Int64') - data['minute'] = data['minute'].astype('Int64') - - # Set datetime index - data.index = (start_date - + pd.to_timedelta(data['day']-1, unit='d') - + pd.to_timedelta(data['minute'], unit='T')) - - return data + content = parse_bsrn(f, logical_records) + return content diff --git a/pvlib/tests/conftest.py b/pvlib/tests/conftest.py index 95d0f725d9..a3cba1e7b8 100644 --- a/pvlib/tests/conftest.py +++ b/pvlib/tests/conftest.py @@ -3,6 +3,7 @@ import warnings import pandas as pd +import os from pkg_resources import parse_version import pytest from functools import wraps @@ -82,6 +83,18 @@ def assert_frame_equal(left, right, **kwargs): reason='does not run on windows') +try: + # Attempt to load BSRN credentials used for testing pvlib.iotools.get_bsrn + bsrn_username = os.environ["BSRN_FTP_USERNAME"] + bsrn_password = os.environ["BSRN_FTP_PASSWORD"] + has_bsrn_credentials = True +except KeyError: + has_bsrn_credentials = False + +requires_bsrn_credentials = pytest.mark.skipif( + not has_bsrn_credentials, reason='requires bsrn credentials') + + try: import statsmodels # noqa: F401 has_statsmodels = True diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 18d4be60f7..412cbd5e8f 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -2,25 +2,123 @@ tests for :mod:`pvlib.iotools.bsrn` """ - import pandas as pd import pytest +import os +from pvlib.iotools import read_bsrn, get_bsrn +from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, + requires_bsrn_credentials) + + +@pytest.fixture(scope="module") +def bsrn_credentials(): + """Supplies the BSRN FTP credentials for testing purposes. -from pvlib.iotools import bsrn -from ..conftest import DATA_DIR, assert_index_equal + Users should obtain their own credentials as described in the `read_bsrn` + documentation.""" + bsrn_username = os.environ["BSRN_FTP_USERNAME"] + bsrn_password = os.environ["BSRN_FTP_PASSWORD"] + return bsrn_username, bsrn_password -@pytest.mark.parametrize('testfile,expected_index', [ - ('bsrn-pay0616.dat.gz', - pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')), - ('bsrn-lr0100-pay0616.dat', - pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')), +@pytest.fixture +def expected_index(): + return pd.date_range(start='20160601', periods=43200, freq='1min', + tz='UTC') + + +@pytest.mark.parametrize('testfile', [ + ('bsrn-pay0616.dat.gz'), + ('bsrn-lr0100-pay0616.dat'), ]) def test_read_bsrn(testfile, expected_index): - data = bsrn.read_bsrn(DATA_DIR / testfile) + data, metadata = read_bsrn(DATA_DIR / testfile) + assert_index_equal(expected_index, data.index) + assert 'ghi' in data.columns + assert 'dni_std' in data.columns + assert 'dhi_min' in data.columns + assert 'lwd_max' in data.columns + assert 'relative_humidity' in data.columns + + +def test_read_bsrn_logical_records(expected_index): + # Test if logical records 0300 and 0500 are correct parsed + # and that 0100 is not passed when not specified + data, metadata = read_bsrn(DATA_DIR / 'bsrn-pay0616.dat.gz', + logical_records=['0300', '0500']) + assert_index_equal(expected_index, data.index) + assert 'lwu' in data.columns + assert 'uva_global' in data.columns + assert 'uvb_reflected_std' in data.columns + assert 'ghi' not in data.columns + + +def test_read_bsrn_bad_logical_record(): + # Test if ValueError is raised if an unsupported logical record is passed + with pytest.raises(ValueError, match='not in'): + read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat', + logical_records=['dummy']) + + +def test_read_bsrn_logical_records_not_found(): + # Test if an empty dataframe is returned if specified LRs are not present + data, metadata = read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat', + logical_records=['0300', '0500']) + assert data.empty # assert that the dataframe is empty + assert 'uva_global' in data.columns + assert 'uvb_reflected_std' in data.columns + assert 'uva_global_max' in data.columns + assert 'dni' not in data.columns + assert 'day' not in data.columns + + +@requires_bsrn_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn(expected_index, bsrn_credentials): + # Retrieve irradiance data from the BSRN FTP server + # the TAM station is chosen due to its small file sizes + username, password = bsrn_credentials + data, metadata = get_bsrn( + start=pd.Timestamp(2016, 6, 1), + end=pd.Timestamp(2016, 6, 29), + station='tam', + username=username, + password=password, + local_path='') assert_index_equal(expected_index, data.index) assert 'ghi' in data.columns assert 'dni_std' in data.columns assert 'dhi_min' in data.columns assert 'lwd_max' in data.columns assert 'relative_humidity' in data.columns + + +@requires_bsrn_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn_bad_station(bsrn_credentials): + # Test if KeyError is raised if a bad station name is passed + username, password = bsrn_credentials + with pytest.raises(KeyError, match='sub-directory does not exist'): + get_bsrn( + start=pd.Timestamp(2016, 6, 1), + end=pd.Timestamp(2016, 6, 29), + station='not_a_station_name', + username=username, + password=password) + + +@requires_bsrn_credentials +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn_no_files(bsrn_credentials): + username, password = bsrn_credentials + # Test if Warning is given if no files are found for the entire time frame + with pytest.warns(UserWarning, match='No files'): + get_bsrn( + start=pd.Timestamp(1990, 6, 1), + end=pd.Timestamp(1990, 6, 29), + station='tam', + username=username, + password=password)