1
- import os
2
- from typing import Dict , Sequence
1
+ import pathlib
2
+ from typing import Sequence
3
3
4
- import netCDF4
5
4
from xarray import open_dataset
6
5
7
- from .datatree import DataNode , DataTree , PathType
6
+ from .datatree import DataTree , PathType
8
7
9
8
10
9
def _ds_or_none (ds ):
@@ -14,37 +13,87 @@ def _ds_or_none(ds):
14
13
return None
15
14
16
15
17
- def _open_group_children_recursively (filename , node , ncgroup , chunks , ** kwargs ):
18
- for g in ncgroup .groups .values ():
16
+ def _iter_zarr_groups (root , parrent = "" ):
17
+ parrent = pathlib .Path (parrent )
18
+ for path , group in root .groups ():
19
+ gpath = parrent / path
20
+ yield str (gpath )
21
+ yield from _iter_zarr_groups (group , parrent = gpath )
19
22
20
- # Open and add this node's dataset to the tree
21
- name = os .path .basename (g .path )
22
- ds = open_dataset (filename , group = g .path , chunks = chunks , ** kwargs )
23
- ds = _ds_or_none (ds )
24
- child_node = DataNode (name , ds )
25
- node .add_child (child_node )
26
23
27
- _open_group_children_recursively (filename , node [name ], g , chunks , ** kwargs )
24
+ def _iter_nc_groups (root , parrent = "" ):
25
+ parrent = pathlib .Path (parrent )
26
+ for path , group in root .groups .items ():
27
+ gpath = parrent / path
28
+ yield str (gpath )
29
+ yield from _iter_nc_groups (group , parrent = gpath )
28
30
29
31
30
- def open_datatree (filename : str , chunks : Dict = None , ** kwargs ) -> DataTree :
32
+ def _get_nc_dataset_class (engine ):
33
+ if engine == "netcdf4" :
34
+ from netCDF4 import Dataset
35
+ elif engine == "h5netcdf" :
36
+ from h5netcdf import Dataset
37
+ elif engine is None :
38
+ try :
39
+ from netCDF4 import Dataset
40
+ except ImportError :
41
+ from h5netcdf import Dataset
42
+ else :
43
+ raise ValueError (f"unsupported engine: { engine } " )
44
+ return Dataset
45
+
46
+
47
+ def open_datatree (filename_or_obj , engine = None , ** kwargs ) -> DataTree :
31
48
"""
32
49
Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file.
33
50
34
51
Parameters
35
52
----------
36
- filename
37
- chunks
53
+ filename_or_obj : str, Path, file-like, or DataStore
54
+ Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
55
+ engine : str, optional
56
+ Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`.
57
+ kwargs :
58
+ Additional keyword arguments passed to ``xarray.open_dataset`` for each group.
38
59
39
60
Returns
40
61
-------
41
62
DataTree
42
63
"""
43
64
44
- with netCDF4 .Dataset (filename , mode = "r" ) as ncfile :
45
- ds = open_dataset (filename , chunks = chunks , ** kwargs )
65
+ if engine == "zarr" :
66
+ return _open_datatree_zarr (filename_or_obj , ** kwargs )
67
+ elif engine in [None , "netcdf4" , "h5netcdf" ]:
68
+ return _open_datatree_netcdf (filename_or_obj , engine = engine , ** kwargs )
69
+
70
+
71
+ def _open_datatree_netcdf (filename : str , ** kwargs ) -> DataTree :
72
+ ncDataset = _get_nc_dataset_class (kwargs .get ("engine" , None ))
73
+
74
+ with ncDataset (filename , mode = "r" ) as ncds :
75
+ ds = open_dataset (filename , ** kwargs ).pipe (_ds_or_none )
76
+ tree_root = DataTree (data_objects = {"root" : ds })
77
+ for key in _iter_nc_groups (ncds ):
78
+ tree_root [key ] = open_dataset (filename , group = key , ** kwargs ).pipe (
79
+ _ds_or_none
80
+ )
81
+ return tree_root
82
+
83
+
84
+ def _open_datatree_zarr (store , ** kwargs ) -> DataTree :
85
+ import zarr
86
+
87
+ with zarr .open_group (store , mode = "r" ) as zds :
88
+ ds = open_dataset (store , engine = "zarr" , ** kwargs ).pipe (_ds_or_none )
46
89
tree_root = DataTree (data_objects = {"root" : ds })
47
- _open_group_children_recursively (filename , tree_root , ncfile , chunks , ** kwargs )
90
+ for key in _iter_zarr_groups (zds ):
91
+ try :
92
+ tree_root [key ] = open_dataset (
93
+ store , engine = "zarr" , group = key , ** kwargs
94
+ ).pipe (_ds_or_none )
95
+ except zarr .errors .PathNotFoundError :
96
+ tree_root [key ] = None
48
97
return tree_root
49
98
50
99
@@ -80,8 +129,10 @@ def _maybe_extract_group_kwargs(enc, group):
80
129
return None
81
130
82
131
83
- def _create_empty_group (filename , group , mode ):
84
- with netCDF4 .Dataset (filename , mode = mode ) as rootgrp :
132
+ def _create_empty_netcdf_group (filename , group , mode , engine ):
133
+ ncDataset = _get_nc_dataset_class (engine )
134
+
135
+ with ncDataset (filename , mode = mode ) as rootgrp :
85
136
rootgrp .createGroup (group )
86
137
87
138
@@ -91,13 +142,14 @@ def _datatree_to_netcdf(
91
142
mode : str = "w" ,
92
143
encoding = None ,
93
144
unlimited_dims = None ,
94
- ** kwargs
145
+ ** kwargs ,
95
146
):
96
147
97
148
if kwargs .get ("format" , None ) not in [None , "NETCDF4" ]:
98
149
raise ValueError ("to_netcdf only supports the NETCDF4 format" )
99
150
100
- if kwargs .get ("engine" , None ) not in [None , "netcdf4" , "h5netcdf" ]:
151
+ engine = kwargs .get ("engine" , None )
152
+ if engine not in [None , "netcdf4" , "h5netcdf" ]:
101
153
raise ValueError ("to_netcdf only supports the netcdf4 and h5netcdf engines" )
102
154
103
155
if kwargs .get ("group" , None ) is not None :
@@ -118,14 +170,52 @@ def _datatree_to_netcdf(
118
170
ds = node .ds
119
171
group_path = node .pathstr .replace (dt .root .pathstr , "" )
120
172
if ds is None :
121
- _create_empty_group (filepath , group_path , mode )
173
+ _create_empty_netcdf_group (filepath , group_path , mode , engine )
122
174
else :
175
+
123
176
ds .to_netcdf (
124
177
filepath ,
125
178
group = group_path ,
126
179
mode = mode ,
127
180
encoding = _maybe_extract_group_kwargs (encoding , dt .pathstr ),
128
181
unlimited_dims = _maybe_extract_group_kwargs (unlimited_dims , dt .pathstr ),
129
- ** kwargs
182
+ ** kwargs ,
130
183
)
131
184
mode = "a"
185
+
186
+
187
+ def _create_empty_zarr_group (store , group , mode ):
188
+ import zarr
189
+
190
+ root = zarr .open_group (store , mode = mode )
191
+ root .create_group (group , overwrite = True )
192
+
193
+
194
+ def _datatree_to_zarr (dt : DataTree , store , mode : str = "w" , encoding = None , ** kwargs ):
195
+
196
+ if kwargs .get ("group" , None ) is not None :
197
+ raise NotImplementedError (
198
+ "specifying a root group for the tree has not been implemented"
199
+ )
200
+
201
+ if not kwargs .get ("compute" , True ):
202
+ raise NotImplementedError ("compute=False has not been implemented yet" )
203
+
204
+ if encoding is None :
205
+ encoding = {}
206
+
207
+ for node in dt .subtree :
208
+ ds = node .ds
209
+ group_path = node .pathstr .replace (dt .root .pathstr , "" )
210
+ if ds is None :
211
+ _create_empty_zarr_group (store , group_path , mode )
212
+ else :
213
+ ds .to_zarr (
214
+ store ,
215
+ group = group_path ,
216
+ mode = mode ,
217
+ encoding = _maybe_extract_group_kwargs (encoding , dt .pathstr ),
218
+ ** kwargs ,
219
+ )
220
+ if "w" in mode :
221
+ mode = "a"
0 commit comments