Skip to content

Commit b2bb60e

Browse files
committed
Merge branch 'master' into file-manager-del
2 parents fd12b18 + 9352b3c commit b2bb60e

16 files changed

+312
-53
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ matrix:
2929
- env: CONDA_ENV=py36-bottleneck-dev
3030
- env: CONDA_ENV=py36-condaforge-rc
3131
- env: CONDA_ENV=py36-pynio-dev
32-
- env: CONDA_ENV=py36-rasterio-0.36
32+
- env: CONDA_ENV=py36-rasterio
3333
- env: CONDA_ENV=py36-zarr-dev
3434
- env: CONDA_ENV=docs
3535
- env: CONDA_ENV=py36-hypothesis

ci/requirements-py36-rasterio-0.36.yml renamed to ci/requirements-py36-rasterio.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies:
1616
- scipy
1717
- seaborn
1818
- toolz
19-
- rasterio=0.36.0
19+
- rasterio>=1.0
2020
- bottleneck
2121
- pip:
2222
- coveralls

doc/installing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ For netCDF and IO
3232
for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files
3333
(ffi1001) and many other.
3434
- `rasterio <https://github.com/mapbox/rasterio>`__: for reading GeoTiffs and
35-
other gridded raster datasets.
35+
other gridded raster datasets. (version 1.0 or later)
3636
- `iris <https://github.com/scitools/iris>`__: for conversion to and from iris'
3737
Cube objects
3838
- `cfgrib <https://github.com/ecmwf/cfgrib>`__: for reading GRIB files via the

doc/whats-new.rst

+19
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ v0.11.1 (unreleased)
3333
Breaking changes
3434
~~~~~~~~~~~~~~~~
3535

36+
- Minimum rasterio version increased from 0.36 to 1.0 (for ``open_rasterio``)
37+
- Time bounds variables are now also decoded according to CF conventions
38+
(:issue:`2565`). The previous behavior was to decode them only if they
39+
had specific time attributes, now these attributes are copied
40+
automatically from the corresponding time coordinate. This might
41+
brake downstream code that was relying on these variables to be
42+
not decoded.
43+
By `Fabien Maussion <https://github.com/fmaussion>`_.
44+
3645
Enhancements
3746
~~~~~~~~~~~~
3847

@@ -41,6 +50,10 @@ Enhancements
4150
- :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
4251
:py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
4352
By `Stephan Hoyer <https://github.com/shoyer>`_
53+
- Enable passing ``rasterio.io.DatasetReader`` or ``rasterio.vrt.WarpedVRT`` to
54+
``open_rasterio`` instead of file path string. Allows for in-memory
55+
reprojection, see (:issue:`2588`).
56+
By `Scott Henderson <https://github.com/scottyhq>`_.
4457
- Like :py:class:`pandas.DatetimeIndex`, :py:class:`CFTimeIndex` now supports
4558
"dayofyear" and "dayofweek" accessors (:issue:`2597`). By `Spencer Clark
4659
<https://github.com/spencerkclark>`_.
@@ -50,6 +63,12 @@ Enhancements
5063
recommend enabling it in your test suites if you use xarray for IO.
5164
By `Stephan Hoyer <https://github.com/shoyer>`_
5265
- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
66+
- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
67+
``loffset`` kwarg just like Pandas.
68+
By `Deepak Cherian <https://github.com/dcherian>`_
69+
- 0d slices of ndarrays are now obtained directly through indexing, rather than
70+
extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel
71+
Wennberg <https://github.com/danielwe>`_.
5372

5473
Bug fixes
5574
~~~~~~~~~

xarray/backends/rasterio_.py

+28-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import warnings
33
from collections import OrderedDict
44
from distutils.version import LooseVersion
5-
65
import numpy as np
76

87
from .. import DataArray
@@ -24,13 +23,16 @@
2423
class RasterioArrayWrapper(BackendArray):
2524
"""A wrapper around rasterio dataset objects"""
2625

27-
def __init__(self, manager, lock):
26+
def __init__(self, manager, lock, vrt_params=None):
27+
from rasterio.vrt import WarpedVRT
2828
self.manager = manager
2929
self.lock = lock
3030

3131
# cannot save riods as an attribute: this would break pickleability
3232
riods = manager.acquire()
33-
33+
if vrt_params is not None:
34+
riods = WarpedVRT(riods, **vrt_params)
35+
self.vrt_params = vrt_params
3436
self._shape = (riods.count, riods.height, riods.width)
3537

3638
dtypes = riods.dtypes
@@ -104,6 +106,7 @@ def _get_indexer(self, key):
104106
return band_key, tuple(window), tuple(squeeze_axis), tuple(np_inds)
105107

106108
def _getitem(self, key):
109+
from rasterio.vrt import WarpedVRT
107110
band_key, window, squeeze_axis, np_inds = self._get_indexer(key)
108111

109112
if not band_key or any(start == stop for (start, stop) in window):
@@ -114,6 +117,8 @@ def _getitem(self, key):
114117
else:
115118
with self.lock:
116119
riods = self.manager.acquire(needs_lock=False)
120+
if self.vrt_params is not None:
121+
riods = WarpedVRT(riods, **self.vrt_params)
117122
out = riods.read(band_key, window=window)
118123

119124
if squeeze_axis:
@@ -178,8 +183,8 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
178183
179184
Parameters
180185
----------
181-
filename : str
182-
Path to the file to open.
186+
filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
187+
Path to the file to open. Or already open rasterio dataset.
183188
parse_coordinates : bool, optional
184189
Whether to parse the x and y coordinates out of the file's
185190
``transform`` attribute or not. The default is to automatically
@@ -206,14 +211,28 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
206211
data : DataArray
207212
The newly created DataArray.
208213
"""
209-
210214
import rasterio
215+
from rasterio.vrt import WarpedVRT
216+
vrt_params = None
217+
if isinstance(filename, rasterio.io.DatasetReader):
218+
filename = filename.name
219+
elif isinstance(filename, rasterio.vrt.WarpedVRT):
220+
vrt = filename
221+
filename = vrt.src_dataset.name
222+
vrt_params = dict(crs=vrt.crs.to_string(),
223+
resampling=vrt.resampling,
224+
src_nodata=vrt.src_nodata,
225+
dst_nodata=vrt.dst_nodata,
226+
tolerance=vrt.tolerance,
227+
warp_extras=vrt.warp_extras)
211228

212229
if lock is None:
213230
lock = RASTERIO_LOCK
214231

215232
manager = CachingFileManager(rasterio.open, filename, lock=lock, mode='r')
216233
riods = manager.acquire()
234+
if vrt_params is not None:
235+
riods = WarpedVRT(riods, **vrt_params)
217236

218237
if cache is None:
219238
cache = chunks is None
@@ -287,14 +306,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
287306
for k, v in meta.items():
288307
# Add values as coordinates if they match the band count,
289308
# as attributes otherwise
290-
if (isinstance(v, (list, np.ndarray)) and
291-
len(v) == riods.count):
309+
if (isinstance(v, (list, np.ndarray))
310+
and len(v) == riods.count):
292311
coords[k] = ('band', np.asarray(v))
293312
else:
294313
attrs[k] = v
295314

296315
data = indexing.LazilyOuterIndexedArray(
297-
RasterioArrayWrapper(manager, lock))
316+
RasterioArrayWrapper(manager, lock, vrt_params))
298317

299318
# this lets you write arrays loaded with rasterio
300319
data = indexing.CopyOnWriteArray(data)

xarray/conventions.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -320,11 +320,39 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
320320
return Variable(dimensions, data, attributes, encoding=encoding)
321321

322322

323+
def _update_bounds_attributes(variables):
324+
"""Adds time attributes to time bounds variables.
325+
326+
Variables handling time bounds ("Cell boundaries" in the CF
327+
conventions) do not necessarily carry the necessary attributes to be
328+
decoded. This copies the attributes from the time variable to the
329+
associated boundaries.
330+
331+
See Also:
332+
333+
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/
334+
cf-conventions.html#cell-boundaries
335+
336+
https://github.com/pydata/xarray/issues/2565
337+
"""
338+
339+
# For all time variables with bounds
340+
for v in variables.values():
341+
attrs = v.attrs
342+
has_date_units = 'units' in attrs and 'since' in attrs['units']
343+
if has_date_units and 'bounds' in attrs:
344+
if attrs['bounds'] in variables:
345+
bounds_attrs = variables[attrs['bounds']].attrs
346+
bounds_attrs.setdefault('units', attrs['units'])
347+
if 'calendar' in attrs:
348+
bounds_attrs.setdefault('calendar', attrs['calendar'])
349+
350+
323351
def decode_cf_variables(variables, attributes, concat_characters=True,
324352
mask_and_scale=True, decode_times=True,
325353
decode_coords=True, drop_variables=None):
326354
"""
327-
Decode a several CF encoded variables.
355+
Decode several CF encoded variables.
328356
329357
See: decode_cf_variable
330358
"""
@@ -350,6 +378,10 @@ def stackable(dim):
350378
drop_variables = []
351379
drop_variables = set(drop_variables)
352380

381+
# Time bounds coordinates might miss the decoding attributes
382+
if decode_times:
383+
_update_bounds_attributes(variables)
384+
353385
new_vars = OrderedDict()
354386
for k, v in iteritems(variables):
355387
if k in drop_variables:

xarray/core/common.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
592592
center=center)
593593

594594
def resample(self, indexer=None, skipna=None, closed=None, label=None,
595-
base=0, keep_attrs=None, **indexer_kwargs):
595+
base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
596596
"""Returns a Resample object for performing resampling operations.
597597
598598
Handles both downsampling and upsampling. If any intervals contain no
@@ -612,6 +612,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
612612
For frequencies that evenly subdivide 1 day, the "origin" of the
613613
aggregated intervals. For example, for '24H' frequency, base could
614614
range from 0 through 23.
615+
loffset : timedelta or str, optional
616+
Offset used to adjust the resampled time labels. Some pandas date
617+
offset strings are supported.
615618
keep_attrs : bool, optional
616619
If True, the object's attributes (`attrs`) will be copied from
617620
the original object to the new one. If False (default), the new
@@ -700,7 +703,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
700703

701704
group = DataArray(dim_coord, coords=dim_coord.coords,
702705
dims=dim_coord.dims, name=RESAMPLE_DIM)
703-
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
706+
# TODO: to_offset() call required for pandas==0.19.2
707+
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
708+
loffset=pd.tseries.frequencies.to_offset(loffset))
704709
resampler = self._resample_cls(self, group=group, dim=dim_name,
705710
grouper=grouper,
706711
resample_dim=RESAMPLE_DIM)

xarray/core/groupby.py

+28
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import functools
44
import warnings
55

6+
import datetime
67
import numpy as np
78
import pandas as pd
89

@@ -154,6 +155,32 @@ def _unique_and_monotonic(group):
154155
return index.is_unique and index.is_monotonic
155156

156157

158+
def _apply_loffset(grouper, result):
159+
"""
160+
(copied from pandas)
161+
if loffset is set, offset the result index
162+
163+
This is NOT an idempotent routine, it will be applied
164+
exactly once to the result.
165+
166+
Parameters
167+
----------
168+
result : Series or DataFrame
169+
the result of resample
170+
"""
171+
172+
needs_offset = (
173+
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
174+
and isinstance(result.index, pd.DatetimeIndex)
175+
and len(result.index) > 0
176+
)
177+
178+
if needs_offset:
179+
result.index = result.index + grouper.loffset
180+
181+
grouper.loffset = None
182+
183+
157184
class GroupBy(SupportsArithmetic):
158185
"""A object that implements the split-apply-combine pattern.
159186
@@ -235,6 +262,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
235262
raise ValueError('index must be monotonic for resampling')
236263
s = pd.Series(np.arange(index.size), index)
237264
first_items = s.groupby(grouper).first()
265+
_apply_loffset(grouper, first_items)
238266
full_index = first_items.index
239267
if first_items.isnull().any():
240268
first_items = first_items.dropna()

xarray/core/indexing.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -1142,15 +1142,6 @@ def __init__(self, array):
11421142
'Trying to wrap {}'.format(type(array)))
11431143
self.array = array
11441144

1145-
def _ensure_ndarray(self, value):
1146-
# We always want the result of indexing to be a NumPy array. If it's
1147-
# not, then it really should be a 0d array. Doing the coercion here
1148-
# instead of inside variable.as_compatible_data makes it less error
1149-
# prone.
1150-
if not isinstance(value, np.ndarray):
1151-
value = utils.to_0d_array(value)
1152-
return value
1153-
11541145
def _indexing_array_and_key(self, key):
11551146
if isinstance(key, OuterIndexer):
11561147
array = self.array
@@ -1160,7 +1151,10 @@ def _indexing_array_and_key(self, key):
11601151
key = key.tuple
11611152
elif isinstance(key, BasicIndexer):
11621153
array = self.array
1163-
key = key.tuple
1154+
# We want 0d slices rather than scalars. This is achieved by
1155+
# appending an ellipsis (see
1156+
# https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes). # noqa
1157+
key = key.tuple + (Ellipsis,)
11641158
else:
11651159
raise TypeError('unexpected key type: {}'.format(type(key)))
11661160

@@ -1171,7 +1165,7 @@ def transpose(self, order):
11711165

11721166
def __getitem__(self, key):
11731167
array, key = self._indexing_array_and_key(key)
1174-
return self._ensure_ndarray(array[key])
1168+
return array[key]
11751169

11761170
def __setitem__(self, key, value):
11771171
array, key = self._indexing_array_and_key(key)

0 commit comments

Comments
 (0)