-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Added PNC backend to xarray #1905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 43 commits
eeeb3a5
5caea63
5ac4b6f
f73436d
9507303
ef22872
56f087c
3c023a0
5a3c62d
8eb427d
ca75c76
196c03f
751ba1e
282408f
b1890b1
eda629f
816c7da
c8b2ca3
85ac334
c46caeb
6838885
c3b7c82
7906492
4df9fba
e4900ab
ec95a3a
ad7b709
26dd0f9
d999de1
87e8612
dd94be5
2311701
9791b8a
1d7ad4a
229715a
68997e0
d007bc6
70968ca
c2788b2
1f3287e
abacc1d
a136ea3
7d8a8ee
24c8376
214f51c
5786291
066cdd5
9231e3f
80d03a7
d2c01de
e12288d
a179c25
eaa37fe
590e919
0df1e60
989fa4b
d71bb60
b9b64ca
10c9bfa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,3 +25,4 @@ dependencies: | |
- pytest-cov | ||
- pydap | ||
- lxml | ||
- PseudoNetCDF |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,9 +31,15 @@ What's New | |
v0.10.4 (unreleased) | ||
-------------------- | ||
|
||
Documentation | ||
~~~~~~~~~~~~~ | ||
|
||
Enhancements | ||
~~~~~~~~~~~~ | ||
|
||
- added a PseudoNetCDF backend for many Atmospheric data formats including | ||
GEOS-Chem, CAMx, NOAA arlpacked bit and many others. | ||
By `Barron Henderson <https://github.com/barronh>`_. | ||
- Support writing lists of strings as netCDF attributes (:issue:`2044`). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please fix -- you are inadvertently editing release notes for 0.10.3. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry. Fixed it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_. | ||
|
||
|
@@ -45,10 +51,11 @@ Bug fixes | |
|
||
.. _whats-new.0.10.3: | ||
|
||
v0.10.3 (April 13, 2018) | ||
------------------------ | ||
v0.10.3 (unreleased) | ||
-------------------- | ||
|
||
The minor release includes a number of bug-fixes and backwards compatible enhancements. | ||
Documentation | ||
~~~~~~~~~~~~~ | ||
|
||
Enhancements | ||
~~~~~~~~~~~~ | ||
|
@@ -75,21 +82,9 @@ Enhancements | |
Bug fixes | ||
~~~~~~~~~ | ||
|
||
- Fixed ``decode_cf`` function to operate lazily on dask arrays | ||
(:issue:`1372`). By `Ryan Abernathey <https://github.com/rabernat>`_. | ||
- Fixed labeled indexing with slice bounds given by xarray objects with | ||
datetime64 or timedelta64 dtypes (:issue:`1240`). | ||
By `Stephan Hoyer <https://github.com/shoyer>`_. | ||
- Attempting to convert an xarray.Dataset into a numpy array now raises an | ||
informative error message. | ||
By `Stephan Hoyer <https://github.com/shoyer>`_. | ||
- Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed | ||
correctly (:issue:`2002`). | ||
By `Fabien Maussion <https://github.com/fmaussion>`_. | ||
- When calling `xr.auto_combine()` or `xr.open_mfdataset()` with a `concat_dim`, | ||
the resulting dataset will have that one-element dimension (it was | ||
silently dropped, previously) (:issue:`1988`). | ||
By `Ben Root <https://github.com/WeatherGod>`_. | ||
|
||
.. _whats-new.0.10.2: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import functools | ||
|
||
import numpy as np | ||
|
||
from .. import Variable | ||
from ..core.pycompat import OrderedDict | ||
from ..core.utils import (FrozenOrderedDict, Frozen) | ||
from ..core import indexing | ||
|
||
from .common import AbstractDataStore, DataStorePickleMixin, BackendArray | ||
|
||
|
||
class PncArrayWrapper(BackendArray): | ||
|
||
def __init__(self, variable_name, datastore): | ||
self.datastore = datastore | ||
self.variable_name = variable_name | ||
array = self.get_array() | ||
self.shape = array.shape | ||
self.dtype = np.dtype(array.dtype) | ||
|
||
def get_array(self): | ||
self.datastore.assert_open() | ||
return self.datastore.ds.variables[self.variable_name] | ||
|
||
def __getitem__(self, key): | ||
key, np_inds = indexing.decompose_indexer( | ||
key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR) | ||
|
||
with self.datastore.ensure_open(autoclose=True): | ||
array = self.get_array()[key.tuple] # index backend array | ||
|
||
if len(np_inds.tuple) > 0: | ||
# index the loaded np.ndarray | ||
array = indexing.NumpyIndexingAdapter(array)[np_inds] | ||
return array | ||
|
||
|
||
_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') | ||
|
||
|
||
class _notnetcdf: | ||
def __eq__(self, lhs): | ||
return lhs not in _genericncf | ||
|
||
|
||
class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): | ||
"""Store for accessing datasets via PseudoNetCDF | ||
""" | ||
@classmethod | ||
def open(cls, filename, format=None, writer=None, | ||
autoclose=False, **format_kwds): | ||
from PseudoNetCDF._getreader import getreader, getreaderdict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Importing from private APIs (with the underscore prefix) makes me nervous. In general we try to avoid doing this because private APIs can change at any time, though of course that isn't true here because you're the PNC maintainer. I'm also nervous about including constants like So maybe we should stick with the standard open method that you were using earlier. In particular, if we change the default to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it will be confusing for users to have an indirect use of I agree completely about private API. The standard pncopen, however, does not currently support the ability to enable a subset of readers. I will add this a feature request to PseudoNetCDF, and make a subsequent update there. As soon as I update pncopen, I would make the update here. The The alternative is to update PNC, update the conda installation, and update this code again. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you thinking of someone explicitly calling Another way to make all of this safe (and which wouldn't require disabling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imagine that you have a netcdf file. If you open it with PNC, it uses netCDF4 as the library to open it. In xarray, however, you might get a different processing. If you write it out, it might have different properties. This seems undesirable at best. That is why I prefer removing the netCDF functionality. In PNC, the goal of using netCDF4 is to make sure all pnc functionality is available for any type of file. In PNC, some plotting and processing tools are available (similar to xarray). I think plotting and processing in xarray is often better than PNC. The goal of adding PNC to xarray is to bring xarray formats that PNC can read. Since xarray already has mechanisms for supporting netCDF, I think bringing this functionality is unnecessary. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My general feeling is that we should make it easy to do the right thing, but not put up artificial barriers to stop expert users. I think this is broadly consistent with most Python projects, including the Python language itself. Even if it's strange, somebody might have reasons for trying PNC/netCDF4 together, and as long as it will not give wrong results there is no reason to disable it. That said, my real objection here are all the private imports from PNC, and the four different aliases we use to check for netCDF files. If you want to put a public API version of this in PNC before merging this PR, that would be OK with me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. I removed the type checking and private imports. I updated the testing. I get one failed that I cannot identify as related to PNC:
I'm going to run it through the build to see if it is just my setup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It didn't fail on the automated test, so all passing [x] and no private imports [x]. |
||
readerdict = getreaderdict() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. F841 local variable 'readerdict' is assigned to but never used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. F841 local variable 'readerdict' is assigned to but never used |
||
reader = getreader(filename, format=format, **format_kwds) | ||
_genreaders = tuple([readerdict[rn] for rn in _genericncf]) | ||
if isinstance(reader, _genreaders): | ||
raise ValueError(('In xarray, PseudoNetCDF should not be used ' + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can avoid this check/error message if we default to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see previous comment. |
||
'to read netcdf files with unknown metadata. ' + | ||
'Instead, use netcdf4. If this is a known ' + | ||
'format, specify it using the format keyword ' + | ||
'(or backend_kwargs={\'format\': <name>} from ' + | ||
'open_dataset).')) | ||
|
||
opener = functools.partial(reader, filename, **format_kwds) | ||
ds = opener() | ||
mode = format_kwds.get('mode', 'r') | ||
return cls(ds, mode=mode, writer=writer, opener=opener, | ||
autoclose=autoclose) | ||
|
||
def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, | ||
autoclose=False): | ||
|
||
if autoclose and opener is None: | ||
raise ValueError('autoclose requires an opener') | ||
|
||
self._ds = pnc_dataset | ||
self._autoclose = autoclose | ||
self._isopen = True | ||
self._opener = opener | ||
self._mode = mode | ||
super(PseudoNetCDFDataStore, self).__init__() | ||
|
||
def open_store_variable(self, name, var): | ||
with self.ensure_open(autoclose=False): | ||
data = indexing.LazilyOuterIndexedArray( | ||
PncArrayWrapper(name, self) | ||
) | ||
attrs = OrderedDict((k, getattr(var, k)) for k in var.ncattrs()) | ||
return Variable(var.dimensions, data, attrs) | ||
|
||
def get_variables(self): | ||
with self.ensure_open(autoclose=False): | ||
return FrozenOrderedDict((k, self.open_store_variable(k, v)) | ||
for k, v in self.ds.variables.items()) | ||
|
||
def get_attrs(self): | ||
with self.ensure_open(autoclose=True): | ||
return Frozen(dict([(k, getattr(self.ds, k)) | ||
for k in self.ds.ncattrs()])) | ||
|
||
def get_dimensions(self): | ||
with self.ensure_open(autoclose=True): | ||
return Frozen(self.ds.dimensions) | ||
|
||
def get_encoding(self): | ||
encoding = {} | ||
encoding['unlimited_dims'] = set( | ||
[k for k in self.ds.dimensions | ||
if self.ds.dimensions[k].isunlimited()]) | ||
return encoding | ||
|
||
def close(self): | ||
if self._isopen: | ||
self.ds.close() | ||
self._isopen = False |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this after PyNIO and before pandas? I think that would make a little more logical sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure.