Skip to content

Commit 54a5fce

Browse files
committed
Merge remote-tracking branch 'upstream/master' into integrate_dim_arg
* upstream/master: Bugfix in list_engine (pydata#4811) Add drop_isel (pydata#4819) Fix RST. Remove the references to `_file_obj` outside low level code paths, change to `_close` (pydata#4809)
2 parents 4614d76 + 7dbbdca commit 54a5fce

23 files changed

+327
-87
lines changed

doc/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ Indexing
126126
Dataset.isel
127127
Dataset.sel
128128
Dataset.drop_sel
129+
Dataset.drop_isel
129130
Dataset.head
130131
Dataset.tail
131132
Dataset.thin
@@ -307,6 +308,7 @@ Indexing
307308
DataArray.isel
308309
DataArray.sel
309310
DataArray.drop_sel
311+
DataArray.drop_isel
310312
DataArray.head
311313
DataArray.tail
312314
DataArray.thin

doc/whats-new.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ Breaking changes
3939
always be set such that ``int64`` values can be used. In the past, no units
4040
finer than "seconds" were chosen, which would sometimes mean that ``float64``
4141
values were required, which would lead to inaccurate I/O round-trips.
42-
- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull: `4725`).
43-
By `Aureliana Barghini <https://github.com/aurghs>`_
42+
- remove deprecated ``autoclose`` kwargs from :py:func:`open_dataset` (:pull:`4725`).
43+
By `Aureliana Barghini <https://github.com/aurghs>`_.
4444

4545
Deprecations
4646
~~~~~~~~~~~~
@@ -87,6 +87,7 @@ Bug fixes
8787
- Expand user directory paths (e.g. ``~/``) in :py:func:`open_mfdataset` and
8888
:py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`).
8989
By `Julien Seguinot <https://github.com/juseg>`_.
90+
- Add :py:meth:`Dataset.drop_isel` and :py:meth:`DataArray.drop_isel` (:issue:`4658`, :pull:`4819`). By `Daniel Mesejo <https://github.com/mesejo>`_.
9091

9192
Documentation
9293
~~~~~~~~~~~~~
@@ -115,6 +116,8 @@ Internal Changes
115116
By `Maximilian Roos <https://github.com/max-sixty>`_.
116117
- Speed up attribute style access (e.g. ``ds.somevar`` instead of ``ds["somevar"]``) and tab completion
117118
in ipython (:issue:`4741`, :pull:`4742`). By `Richard Kleijn <https://github.com/rhkleijn>`_.
119+
- Added the ``set_close`` method to ``Dataset`` and ``DataArray`` for beckends to specify how to voluntary release
120+
all resources. (:pull:`#4809`), By `Alessandro Amici <https://github.com/alexamici>`_.
118121

119122
.. _whats-new.0.16.2:
120123

xarray/backends/api.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ def maybe_decode_store(store, chunks):
522522

523523
else:
524524
ds2 = ds
525-
ds2._file_obj = ds._file_obj
525+
ds2.set_close(ds._close)
526526
return ds2
527527

528528
filename_or_obj = _normalize_path(filename_or_obj)
@@ -701,7 +701,7 @@ def open_dataarray(
701701
else:
702702
(data_array,) = dataset.data_vars.values()
703703

704-
data_array._file_obj = dataset._file_obj
704+
data_array.set_close(dataset._close)
705705

706706
# Reset names if they were changed during saving
707707
# to ensure that we can 'roundtrip' perfectly
@@ -715,17 +715,6 @@ def open_dataarray(
715715
return data_array
716716

717717

718-
class _MultiFileCloser:
719-
__slots__ = ("file_objs",)
720-
721-
def __init__(self, file_objs):
722-
self.file_objs = file_objs
723-
724-
def close(self):
725-
for f in self.file_objs:
726-
f.close()
727-
728-
729718
def open_mfdataset(
730719
paths,
731720
chunks=None,
@@ -918,14 +907,14 @@ def open_mfdataset(
918907
getattr_ = getattr
919908

920909
datasets = [open_(p, **open_kwargs) for p in paths]
921-
file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
910+
closers = [getattr_(ds, "_close") for ds in datasets]
922911
if preprocess is not None:
923912
datasets = [preprocess(ds) for ds in datasets]
924913

925914
if parallel:
926915
# calling compute here will return the datasets/file_objs lists,
927916
# the underlying datasets will still be stored as dask arrays
928-
datasets, file_objs = dask.compute(datasets, file_objs)
917+
datasets, closers = dask.compute(datasets, closers)
929918

930919
# Combine all datasets, closing them in case of a ValueError
931920
try:
@@ -963,7 +952,11 @@ def open_mfdataset(
963952
ds.close()
964953
raise
965954

966-
combined._file_obj = _MultiFileCloser(file_objs)
955+
def multi_file_closer():
956+
for closer in closers:
957+
closer()
958+
959+
combined.set_close(multi_file_closer)
967960

968961
# read global attributes from the attrs_file or from the first dataset
969962
if attrs_file is not None:

xarray/backends/apiv2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def _dataset_from_backend_dataset(
9090
**extra_tokens,
9191
)
9292

93-
ds._file_obj = backend_ds._file_obj
93+
ds.set_close(backend_ds._close)
9494

9595
# Ensure source filename always stored in dataset object (GH issue #2550)
9696
if "source" not in ds.encoding:

xarray/backends/cfgrib_.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,23 @@
55
from ..core import indexing
66
from ..core.utils import Frozen, FrozenDict, close_on_error
77
from ..core.variable import Variable
8-
from .common import AbstractDataStore, BackendArray, BackendEntrypoint
8+
from .common import (
9+
BACKEND_ENTRYPOINTS,
10+
AbstractDataStore,
11+
BackendArray,
12+
BackendEntrypoint,
13+
)
914
from .locks import SerializableLock, ensure_lock
1015
from .store import open_backend_dataset_store
1116

17+
try:
18+
import cfgrib
19+
20+
has_cfgrib = True
21+
except ModuleNotFoundError:
22+
has_cfgrib = False
23+
24+
1225
# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe
1326
# in most circumstances. See:
1427
# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions
@@ -38,7 +51,6 @@ class CfGribDataStore(AbstractDataStore):
3851
"""
3952

4053
def __init__(self, filename, lock=None, **backend_kwargs):
41-
import cfgrib
4254

4355
if lock is None:
4456
lock = ECCODES_LOCK
@@ -129,3 +141,7 @@ def open_backend_dataset_cfgrib(
129141
cfgrib_backend = BackendEntrypoint(
130142
open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib
131143
)
144+
145+
146+
if has_cfgrib:
147+
BACKEND_ENTRYPOINTS["cfgrib"] = cfgrib_backend

xarray/backends/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import time
33
import traceback
4+
from typing import Dict
45

56
import numpy as np
67

@@ -349,3 +350,6 @@ def __init__(self, open_dataset, open_dataset_parameters=None, guess_can_open=No
349350
self.open_dataset = open_dataset
350351
self.open_dataset_parameters = open_dataset_parameters
351352
self.guess_can_open = guess_can_open
353+
354+
355+
BACKEND_ENTRYPOINTS: Dict[str, BackendEntrypoint] = {}

xarray/backends/h5netcdf_.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88
from ..core import indexing
99
from ..core.utils import FrozenDict, is_remote_uri, read_magic_number
1010
from ..core.variable import Variable
11-
from .common import BackendEntrypoint, WritableCFDataStore, find_root_and_group
11+
from .common import (
12+
BACKEND_ENTRYPOINTS,
13+
BackendEntrypoint,
14+
WritableCFDataStore,
15+
find_root_and_group,
16+
)
1217
from .file_manager import CachingFileManager, DummyFileManager
1318
from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
1419
from .netCDF4_ import (
@@ -20,6 +25,13 @@
2025
)
2126
from .store import open_backend_dataset_store
2227

28+
try:
29+
import h5netcdf
30+
31+
has_h5netcdf = True
32+
except ModuleNotFoundError:
33+
has_h5netcdf = False
34+
2335

2436
class H5NetCDFArrayWrapper(BaseNetCDF4Array):
2537
def get_array(self, needs_lock=True):
@@ -85,8 +97,6 @@ class H5NetCDFStore(WritableCFDataStore):
8597

8698
def __init__(self, manager, group=None, mode=None, lock=HDF5_LOCK, autoclose=False):
8799

88-
import h5netcdf
89-
90100
if isinstance(manager, (h5netcdf.File, h5netcdf.Group)):
91101
if group is None:
92102
root, group = find_root_and_group(manager)
@@ -122,7 +132,6 @@ def open(
122132
invalid_netcdf=None,
123133
phony_dims=None,
124134
):
125-
import h5netcdf
126135

127136
if isinstance(filename, bytes):
128137
raise ValueError(
@@ -375,3 +384,6 @@ def open_backend_dataset_h5netcdf(
375384
h5netcdf_backend = BackendEntrypoint(
376385
open_dataset=open_backend_dataset_h5netcdf, guess_can_open=guess_can_open_h5netcdf
377386
)
387+
388+
if has_h5netcdf:
389+
BACKEND_ENTRYPOINTS["h5netcdf"] = h5netcdf_backend

xarray/backends/netCDF4_.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from ..core.utils import FrozenDict, close_on_error, is_remote_uri
1313
from ..core.variable import Variable
1414
from .common import (
15+
BACKEND_ENTRYPOINTS,
1516
BackendArray,
1617
BackendEntrypoint,
1718
WritableCFDataStore,
@@ -23,6 +24,14 @@
2324
from .netcdf3 import encode_nc3_attr_value, encode_nc3_variable
2425
from .store import open_backend_dataset_store
2526

27+
try:
28+
import netCDF4
29+
30+
has_netcdf4 = True
31+
except ModuleNotFoundError:
32+
has_netcdf4 = False
33+
34+
2635
# This lookup table maps from dtype.byteorder to a readable endian
2736
# string used by netCDF4.
2837
_endian_lookup = {"=": "native", ">": "big", "<": "little", "|": "native"}
@@ -298,7 +307,6 @@ class NetCDF4DataStore(WritableCFDataStore):
298307
def __init__(
299308
self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
300309
):
301-
import netCDF4
302310

303311
if isinstance(manager, netCDF4.Dataset):
304312
if group is None:
@@ -335,7 +343,6 @@ def open(
335343
lock_maker=None,
336344
autoclose=False,
337345
):
338-
import netCDF4
339346

340347
if isinstance(filename, pathlib.Path):
341348
filename = os.fspath(filename)
@@ -563,3 +570,7 @@ def open_backend_dataset_netcdf4(
563570
netcdf4_backend = BackendEntrypoint(
564571
open_dataset=open_backend_dataset_netcdf4, guess_can_open=guess_can_open_netcdf4
565572
)
573+
574+
575+
if has_netcdf4:
576+
BACKEND_ENTRYPOINTS["netcdf4"] = netcdf4_backend

xarray/backends/plugins.py

+1-23
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,11 @@
22
import inspect
33
import itertools
44
import logging
5-
import typing as T
65
import warnings
76

87
import pkg_resources
98

10-
from .cfgrib_ import cfgrib_backend
11-
from .common import BackendEntrypoint
12-
from .h5netcdf_ import h5netcdf_backend
13-
from .netCDF4_ import netcdf4_backend
14-
from .pseudonetcdf_ import pseudonetcdf_backend
15-
from .pydap_ import pydap_backend
16-
from .pynio_ import pynio_backend
17-
from .scipy_ import scipy_backend
18-
from .store import store_backend
19-
from .zarr import zarr_backend
20-
21-
BACKEND_ENTRYPOINTS: T.Dict[str, BackendEntrypoint] = {
22-
"store": store_backend,
23-
"netcdf4": netcdf4_backend,
24-
"h5netcdf": h5netcdf_backend,
25-
"scipy": scipy_backend,
26-
"pseudonetcdf": pseudonetcdf_backend,
27-
"zarr": zarr_backend,
28-
"cfgrib": cfgrib_backend,
29-
"pydap": pydap_backend,
30-
"pynio": pynio_backend,
31-
}
9+
from .common import BACKEND_ENTRYPOINTS
3210

3311

3412
def remove_duplicates(backend_entrypoints):

xarray/backends/pseudonetcdf_.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,24 @@
33
from ..core import indexing
44
from ..core.utils import Frozen, FrozenDict, close_on_error
55
from ..core.variable import Variable
6-
from .common import AbstractDataStore, BackendArray, BackendEntrypoint
6+
from .common import (
7+
BACKEND_ENTRYPOINTS,
8+
AbstractDataStore,
9+
BackendArray,
10+
BackendEntrypoint,
11+
)
712
from .file_manager import CachingFileManager
813
from .locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock
914
from .store import open_backend_dataset_store
1015

16+
try:
17+
from PseudoNetCDF import pncopen
18+
19+
has_pseudonetcdf = True
20+
except ModuleNotFoundError:
21+
has_pseudonetcdf = False
22+
23+
1124
# psuedonetcdf can invoke netCDF libraries internally
1225
PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK])
1326

@@ -40,7 +53,6 @@ class PseudoNetCDFDataStore(AbstractDataStore):
4053

4154
@classmethod
4255
def open(cls, filename, lock=None, mode=None, **format_kwargs):
43-
from PseudoNetCDF import pncopen
4456

4557
keywords = {"kwargs": format_kwargs}
4658
# only include mode if explicitly passed
@@ -138,3 +150,7 @@ def open_backend_dataset_pseudonetcdf(
138150
open_dataset=open_backend_dataset_pseudonetcdf,
139151
open_dataset_parameters=open_dataset_parameters,
140152
)
153+
154+
155+
if has_pseudonetcdf:
156+
BACKEND_ENTRYPOINTS["pseudonetcdf"] = pseudonetcdf_backend

xarray/backends/pydap_.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,22 @@
44
from ..core.pycompat import integer_types
55
from ..core.utils import Frozen, FrozenDict, close_on_error, is_dict_like, is_remote_uri
66
from ..core.variable import Variable
7-
from .common import AbstractDataStore, BackendArray, BackendEntrypoint, robust_getitem
7+
from .common import (
8+
BACKEND_ENTRYPOINTS,
9+
AbstractDataStore,
10+
BackendArray,
11+
BackendEntrypoint,
12+
robust_getitem,
13+
)
814
from .store import open_backend_dataset_store
915

16+
try:
17+
import pydap.client
18+
19+
has_pydap = True
20+
except ModuleNotFoundError:
21+
has_pydap = False
22+
1023

1124
class PydapArrayWrapper(BackendArray):
1225
def __init__(self, array):
@@ -74,7 +87,6 @@ def __init__(self, ds):
7487

7588
@classmethod
7689
def open(cls, url, session=None):
77-
import pydap.client
7890

7991
ds = pydap.client.open_url(url, session=session)
8092
return cls(ds)
@@ -133,3 +145,7 @@ def open_backend_dataset_pydap(
133145
pydap_backend = BackendEntrypoint(
134146
open_dataset=open_backend_dataset_pydap, guess_can_open=guess_can_open_pydap
135147
)
148+
149+
150+
if has_pydap:
151+
BACKEND_ENTRYPOINTS["pydap"] = pydap_backend

0 commit comments

Comments
 (0)