pvlib · wholmgren · Jan 3, 2020 · Dec 23, 2019 · Dec 26, 2019 · Dec 26, 2019
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -369,6 +369,7 @@ relevant to solar energy modeling.
    iotools.read_crn
    iotools.read_solrad
    iotools.get_psm3
+   iotools.read_psm3
 
 A :py:class:`~pvlib.location.Location` object may be created from metadata
 in some files.

diff --git a/docs/sphinx/source/whatsnew.rst b/docs/sphinx/source/whatsnew.rst
@@ -6,6 +6,7 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.7.1.rst
 .. include:: whatsnew/v0.7.0.rst
 .. include:: whatsnew/v0.6.3.rst
 .. include:: whatsnew/v0.6.2.rst

diff --git a/docs/sphinx/source/whatsnew/v0.7.1.rst b/docs/sphinx/source/whatsnew/v0.7.1.rst
@@ -0,0 +1,33 @@
+.. _whatsnew_0710:
+
+v0.7.1 (MONTH DAY, YEAR)
+------------------------
+
+
+API Changes
+~~~~~~~~~~~
+
+Enhancements
+~~~~~~~~~~~~
+* Added :py:func:`~pvlib.iotools.read_psm3` to read local NSRDB PSM3 files.
+  (:issue:`841`)
+* Added `leap_day` parameter to `iotools.get_psm3` instead of hardcoding it as
+  False.
+
+
+Bug fixes
+~~~~~~~~~
+* Changed the PSM3 API endpoint for TMY requests in `iotools.get_psm3`.
+
+Testing
+~~~~~~~
+* Added single-year PSM3 API test for `iotools.get_psm3`.
+* Added tests for `iotools.read_psm3`.
+
+Documentation
+~~~~~~~~~~~~~
+* Updated list of allowed years for `iotools.get_psm3`.
+
+Contributors
+~~~~~~~~~~~~
+* Kevin Anderson (:ghuser:`kanderso-nrel`)
diff --git a/pvlib/data/test_psm3_2017.csv b/pvlib/data/test_psm3_2017.csv
diff --git a/pvlib/data/test_psm3.csv → pvlib/data/test_psm3_tmy-2017.csv b/pvlib/data/test_psm3.csv → pvlib/data/test_psm3_tmy-2017.csv
diff --git a/pvlib/data/test_read_psm3.csv b/pvlib/data/test_read_psm3.csv
diff --git a/pvlib/iotools/psm3.py b/pvlib/iotools/psm3.py
@@ -9,7 +9,8 @@
 import pandas as pd
 from json import JSONDecodeError
 
-URL = "http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv"
+PSM_URL = "http://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv"
+TMY_URL = "http://developer.nrel.gov/api/nsrdb_api/solar/nsrdb_psm3_tmy_download.csv"  # noqa
 
 # 'relative_humidity', 'total_precipitable_water' are not available
 ATTRIBUTES = [
@@ -19,9 +20,11 @@
 
 
 def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
-             full_name=PVLIB_PYTHON, affiliation=PVLIB_PYTHON, timeout=30):
+             leap_day=False, full_name=PVLIB_PYTHON, affiliation=PVLIB_PYTHON,
+             timeout=30):
     """
-    Get PSM3 data
+    Retrieve NSRDB [1]_ PSM3 timeseries weather data from the PSM3 API [2]_
+    [3]_.
 
     Parameters
     ----------
@@ -38,7 +41,11 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
         PSM3 API parameter specifing year or TMY variant to download, see notes
         below for options
     interval : int, default 60
-        interval size in minutes, can only be either 30 or 60
+        interval size in minutes, can only be either 30 or 60.  Only used for
+        single-year requests (i.e., it is ignored for tmy/tgy/tdy requests).
+    leap_day : boolean, default False
+        include leap day in the results.  Only used for single-year requests
+        (i.e., it is ignored for tmy/tgy/tdy requests).
     full_name : str, default 'pvlib python'
         optional
     affiliation : str, default 'pvlib python'
@@ -49,7 +56,8 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
     Returns
     -------
     headers : dict
-        metadata from NREL PSM3 about the record, see notes for fields
+        metadata from NREL PSM3 about the record, see
+        :func:`pvlib.iotools.read_psm3` for fields
     data : pandas.DataFrame
         timeseries data from NREL PSM3
 
@@ -74,50 +82,25 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
 
         ['1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
          '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
-         '2014', '2015', '2016', '2017', 'tmy', 'tmy-2016', 'tmy-2017',
-         'tdy-2017', 'tgy-2017']
-
-    The return is a tuple with two items. The first item is a header with
-    metadata from NREL PSM3 about the record containing the following fields:
-
-    * Source
-    * Location ID
-    * City
-    * State
-    * Country
-    * Latitude
-    * Longitude
-    * Time Zone
-    * Elevation
-    * Local Time Zone
-    * Dew Point Units
-    * DHI Units
-    * DNI Units
-    * GHI Units
-    * Temperature Units
-    * Pressure Units
-    * Wind Direction Units
-    * Wind Speed
-    * Surface Albedo Units
-    * Version
-
-    The second item is a dataframe with the timeseries data downloaded.
+         '2014', '2015', '2016', '2017', '2018', 'tmy', 'tmy-2016', 'tmy-2017',
+         'tdy-2017', 'tgy-2017', 'tmy-2018', 'tdy-2018', 'tgy-2018']
 
     .. warning:: PSM3 is limited to data found in the NSRDB, please consult the
         references below for locations with available data
 
     See Also
     --------
-    pvlib.iotools.read_tmy2, pvlib.iotools.read_tmy3
+    pvlib.iotools.read_psm3
 
     References
     ----------
 
-    .. [1] `NREL Developer Network - Physical Solar Model (PSM) v3
-       <https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/>`_
-    .. [2] `NREL National Solar Radiation Database (NSRDB)
+    .. [1] `NREL National Solar Radiation Database (NSRDB)
        <https://nsrdb.nrel.gov/>`_
-
+    .. [2] `NREL Developer Network - Physical Solar Model (PSM) v3
+       <https://developer.nrel.gov/docs/solar/nsrdb/psm3_data_download/>`_
+    .. [3] `NREL Developer Network - Physical Solar Model (PSM) v3 TMY
+       <https://developer.nrel.gov/docs/solar/nsrdb/psm3_tmy_data_download/>`_
     """
     # The well know text (WKT) representation of geometry notation is strict.
     # A POINT object is a string with longitude first, then the latitude, with
@@ -137,11 +120,15 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
         'wkt': 'POINT(%s %s)' % (longitude, latitude),
         'names': names,
         'attributes':  ','.join(ATTRIBUTES),
-        'leap_day': 'false',
+        'leap_day': str(leap_day).lower(),
         'utc': 'false',
         'interval': interval
     }
     # request CSV download from NREL PSM3
+    if any(prefix in names for prefix in ('tmy', 'tgy', 'tdy')):
+        URL = TMY_URL
+    else:
+        URL = PSM_URL
     response = requests.get(URL, params=params, timeout=timeout)
     if not response.ok:
         # if the API key is rejected, then the response status will be 403
@@ -154,30 +141,132 @@ def get_psm3(latitude, longitude, api_key, email, names='tmy', interval=60,
     # the CSV is in the response content as a UTF-8 bytestring
     # to use pandas we need to create a file buffer from the response
     fbuf = io.StringIO(response.content.decode('utf-8'))
-    # The first 2 lines of the response are headers with metadat
-    header_fields = fbuf.readline().split(',')
-    header_fields[-1] = header_fields[-1].strip()  # strip trailing newline
-    header_values = fbuf.readline().split(',')
-    header_values[-1] = header_values[-1].strip()  # strip trailing newline
-    header = dict(zip(header_fields, header_values))
-    # the response is all strings, so set some header types to numbers
-    header['Local Time Zone'] = int(header['Local Time Zone'])
-    header['Time Zone'] = int(header['Time Zone'])
-    header['Latitude'] = float(header['Latitude'])
-    header['Longitude'] = float(header['Longitude'])
-    header['Elevation'] = int(header['Elevation'])
-    # get the column names so we can set the dtypes
-    columns = fbuf.readline().split(',')
-    columns[-1] = columns[-1].strip()  # strip trailing newline
-    dtypes = dict.fromkeys(columns, float)  # all floats except datevec
-    dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int)
-    data = pd.read_csv(
-        fbuf, header=None, names=columns, dtype=dtypes,
-        delimiter=',', lineterminator='\n')  # skip carriage returns \r
-    # the response 1st 5 columns are a date vector, convert to datetime
-    dtidx = pd.to_datetime(
-        data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
-    # in USA all timezones are intergers
-    tz = 'Etc/GMT%+d' % -header['Time Zone']
-    data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
+    return read_psm3(fbuf)
+
+
+def read_psm3(filename):
+    """
+    Read an NSRDB [1]_ PSM3 weather file (formatted as SAM CSV [2]_).
+
+    Parameters
+    ----------
+    filename: string or file-like object
+        Filename or file-like object of data to read.
+
+    Returns
+    -------
+    headers : dict
+        metadata from NREL PSM3 about the record, see notes for fields
+    data : pandas.DataFrame
+        timeseries data from NREL PSM3
+
+    Notes
+    -----
+    The return is a tuple with two items. The first item is a header with
+    metadata from NREL PSM3 about the record containing the following fields:
+
+    * Source
+    * Location ID
+    * City
+    * State
+    * Country
+    * Latitude
+    * Longitude
+    * Time Zone
+    * Elevation
+    * Local Time Zone
+    * Clearsky DHI Units
+    * Clearsky DNI Units
+    * Clearsky GHI Units
+    * Dew Point Units
+    * DHI Units
+    * DNI Units
+    * GHI Units
+    * Solar Zenith Angle Units
+    * Temperature Units
+    * Pressure Units
+    * Relative Humidity Units
+    * Precipitable Water Units
+    * Wind Direction Units
+    * Wind Speed
+    * Cloud Type -15
+    * Cloud Type 0
+    * Cloud Type 1
+    * Cloud Type 2
+    * Cloud Type 3
+    * Cloud Type 4
+    * Cloud Type 5
+    * Cloud Type 6
+    * Cloud Type 7
+    * Cloud Type 8
+    * Cloud Type 9
+    * Cloud Type 10
+    * Cloud Type 11
+    * Cloud Type 12
+    * Fill Flag 0
+    * Fill Flag 1
+    * Fill Flag 2
+    * Fill Flag 3
+    * Fill Flag 4
+    * Fill Flag 5
+    * Surface Albedo Units
+    * Version
+
+    The second item is a dataframe with the PSM3 timeseries data.
+
+    See Also
+    --------
+    pvlib.iotools.get_psm3
+
+    References
+    ----------
+    .. [1] `NREL National Solar Radiation Database (NSRDB)
+       <https://nsrdb.nrel.gov/>`_
+    .. [2] `Standard Time Series Data File Format
+       <https://rredc.nrel.gov/solar/old_data/nsrdb/2005-2012/wfcsv.pdf>`_
+    """
+    if hasattr(filename, 'readline'):
+        # if passed a file-like object, not our job to close it
+        close = False
+        fbuf = filename
+    else:
+        close = True
+        fbuf = open(filename, 'r')
+
+    try:
+        # The first 2 lines of the response are headers with metadata
+        header_fields = fbuf.readline().split(',')
+        header_fields[-1] = header_fields[-1].strip()  # strip trailing newline
+        header_values = fbuf.readline().split(',')
+        header_values[-1] = header_values[-1].strip()  # strip trailing newline
+        header = dict(zip(header_fields, header_values))
+        # the response is all strings, so set some header types to numbers
+        header['Local Time Zone'] = int(header['Local Time Zone'])
+        header['Time Zone'] = int(header['Time Zone'])
+        header['Latitude'] = float(header['Latitude'])
+        header['Longitude'] = float(header['Longitude'])
+        header['Elevation'] = int(header['Elevation'])
+        # get the column names so we can set the dtypes
+        columns = fbuf.readline().split(',')
+        columns[-1] = columns[-1].strip()  # strip trailing newline
+        # Since the header has so many columns, excel saves blank cols in the
+        # data below the header lines.
+        columns = [col for col in columns if col != '']
+        dtypes = dict.fromkeys(columns, float)  # all floats except datevec
+        dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int)
+        dtypes['Cloud Type'] = int
+        dtypes['Fill Flag'] = int
+        data = pd.read_csv(
+            fbuf, header=None, names=columns, usecols=columns, dtype=dtypes,
+            delimiter=',', lineterminator='\n')  # skip carriage returns \r
+        # the response 1st 5 columns are a date vector, convert to datetime
+        dtidx = pd.to_datetime(
+            data[['Year', 'Month', 'Day', 'Hour', 'Minute']])
+        # in USA all timezones are integers
+        tz = 'Etc/GMT%+d' % -header['Time Zone']
+        data.index = pd.DatetimeIndex(dtidx).tz_localize(tz)
+    finally:
+        if close:
+            fbuf.close()
+
     return header, data