@@ -78,6 +78,71 @@ def __repr__(self) -> str:
78
78
)
79
79
80
80
81
+ class NativeEndiannessArray (indexing .ExplicitlyIndexedNDArrayMixin ):
82
+ """Decode arrays on the fly from non-native to native endianness
83
+
84
+ This is useful for decoding arrays from netCDF3 files (which are all
85
+ big endian) into native endianness, so they can be used with Cython
86
+ functions, such as those found in bottleneck and pandas.
87
+
88
+ >>> x = np.arange(5, dtype=">i2")
89
+
90
+ >>> x.dtype
91
+ dtype('>i2')
92
+
93
+ >>> NativeEndiannessArray(x).dtype
94
+ dtype('int16')
95
+
96
+ >>> indexer = indexing.BasicIndexer((slice(None),))
97
+ >>> NativeEndiannessArray(x)[indexer].dtype
98
+ dtype('int16')
99
+ """
100
+
101
+ __slots__ = ("array" ,)
102
+
103
+ def __init__ (self , array ) -> None :
104
+ self .array = indexing .as_indexable (array )
105
+
106
+ @property
107
+ def dtype (self ) -> np .dtype :
108
+ return np .dtype (self .array .dtype .kind + str (self .array .dtype .itemsize ))
109
+
110
+ def __getitem__ (self , key ) -> np .ndarray :
111
+ return np .asarray (self .array [key ], dtype = self .dtype )
112
+
113
+
114
+ class BoolTypeArray (indexing .ExplicitlyIndexedNDArrayMixin ):
115
+ """Decode arrays on the fly from integer to boolean datatype
116
+
117
+ This is useful for decoding boolean arrays from integer typed netCDF
118
+ variables.
119
+
120
+ >>> x = np.array([1, 0, 1, 1, 0], dtype="i1")
121
+
122
+ >>> x.dtype
123
+ dtype('int8')
124
+
125
+ >>> BoolTypeArray(x).dtype
126
+ dtype('bool')
127
+
128
+ >>> indexer = indexing.BasicIndexer((slice(None),))
129
+ >>> BoolTypeArray(x)[indexer].dtype
130
+ dtype('bool')
131
+ """
132
+
133
+ __slots__ = ("array" ,)
134
+
135
+ def __init__ (self , array ) -> None :
136
+ self .array = indexing .as_indexable (array )
137
+
138
+ @property
139
+ def dtype (self ) -> np .dtype :
140
+ return np .dtype ("bool" )
141
+
142
+ def __getitem__ (self , key ) -> np .ndarray :
143
+ return np .asarray (self .array [key ], dtype = self .dtype )
144
+
145
+
81
146
def lazy_elemwise_func (array , func : Callable , dtype : np .typing .DTypeLike ):
82
147
"""Lazily apply an element-wise function to an array.
83
148
Parameters
@@ -159,27 +224,29 @@ def encode(self, variable: Variable, name: T_Name = None):
159
224
fv = encoding .get ("_FillValue" )
160
225
mv = encoding .get ("missing_value" )
161
226
162
- if (
163
- fv is not None
164
- and mv is not None
165
- and not duck_array_ops .allclose_or_equiv (fv , mv )
166
- ):
227
+ fv_exists = fv is not None
228
+ mv_exists = mv is not None
229
+
230
+ if not fv_exists and not mv_exists :
231
+ return variable
232
+
233
+ if fv_exists and mv_exists and not duck_array_ops .allclose_or_equiv (fv , mv ):
167
234
raise ValueError (
168
235
f"Variable { name !r} has conflicting _FillValue ({ fv } ) and missing_value ({ mv } ). Cannot encode data."
169
236
)
170
237
171
- if fv is not None :
238
+ if fv_exists :
172
239
# Ensure _FillValue is cast to same dtype as data's
173
240
encoding ["_FillValue" ] = dtype .type (fv )
174
241
fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
175
242
if not pd .isnull (fill_value ):
176
243
data = duck_array_ops .fillna (data , fill_value )
177
244
178
- if mv is not None :
245
+ if mv_exists :
179
246
# Ensure missing_value is cast to same dtype as data's
180
247
encoding ["missing_value" ] = dtype .type (mv )
181
248
fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
182
- if not pd .isnull (fill_value ) and fv is None :
249
+ if not pd .isnull (fill_value ) and not fv_exists :
183
250
data = duck_array_ops .fillna (data , fill_value )
184
251
185
252
return Variable (dims , data , attrs , encoding , fastpath = True )
@@ -349,3 +416,99 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
349
416
return Variable (dims , data , attrs , encoding , fastpath = True )
350
417
else :
351
418
return variable
419
+
420
+
421
+ class DefaultFillvalueCoder (VariableCoder ):
422
+ """Encode default _FillValue if needed."""
423
+
424
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
425
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
426
+ # make NaN the fill value for float types
427
+ if (
428
+ "_FillValue" not in attrs
429
+ and "_FillValue" not in encoding
430
+ and np .issubdtype (variable .dtype , np .floating )
431
+ ):
432
+ attrs ["_FillValue" ] = variable .dtype .type (np .nan )
433
+ return Variable (dims , data , attrs , encoding , fastpath = True )
434
+ else :
435
+ return variable
436
+
437
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
438
+ raise NotImplementedError ()
439
+
440
+
441
+ class BooleanCoder (VariableCoder ):
442
+ """Code boolean values."""
443
+
444
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
445
+ if (
446
+ (variable .dtype == bool )
447
+ and ("dtype" not in variable .encoding )
448
+ and ("dtype" not in variable .attrs )
449
+ ):
450
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
451
+ attrs ["dtype" ] = "bool"
452
+ data = duck_array_ops .astype (data , dtype = "i1" , copy = True )
453
+
454
+ return Variable (dims , data , attrs , encoding , fastpath = True )
455
+ else :
456
+ return variable
457
+
458
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
459
+ if variable .attrs .get ("dtype" , False ) == "bool" :
460
+ dims , data , attrs , encoding = unpack_for_decoding (variable )
461
+ del attrs ["dtype" ]
462
+ data = BoolTypeArray (data )
463
+ return Variable (dims , data , attrs , encoding , fastpath = True )
464
+ else :
465
+ return variable
466
+
467
+
468
+ class EndianCoder (VariableCoder ):
469
+ """Decode Endianness to native."""
470
+
471
+ def encode (self ):
472
+ raise NotImplementedError ()
473
+
474
+ def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
475
+ dims , data , attrs , encoding = unpack_for_decoding (variable )
476
+ if not data .dtype .isnative :
477
+ data = NativeEndiannessArray (data )
478
+ return Variable (dims , data , attrs , encoding , fastpath = True )
479
+ else :
480
+ return variable
481
+
482
+
483
+ class NonStringCoder (VariableCoder ):
484
+ """Encode NonString variables if dtypes differ."""
485
+
486
+ def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
487
+ if "dtype" in variable .encoding and variable .encoding ["dtype" ] not in (
488
+ "S1" ,
489
+ str ,
490
+ ):
491
+ dims , data , attrs , encoding = unpack_for_encoding (variable )
492
+ dtype = np .dtype (encoding .pop ("dtype" ))
493
+ if dtype != variable .dtype :
494
+ if np .issubdtype (dtype , np .integer ):
495
+ if (
496
+ np .issubdtype (variable .dtype , np .floating )
497
+ and "_FillValue" not in variable .attrs
498
+ and "missing_value" not in variable .attrs
499
+ ):
500
+ warnings .warn (
501
+ f"saving variable { name } with floating "
502
+ "point data as an integer dtype without "
503
+ "any _FillValue to use for NaNs" ,
504
+ SerializationWarning ,
505
+ stacklevel = 10 ,
506
+ )
507
+ data = np .around (data )
508
+ data = data .astype (dtype = dtype )
509
+ return Variable (dims , data , attrs , encoding , fastpath = True )
510
+ else :
511
+ return variable
512
+
513
+ def decode (self ):
514
+ raise NotImplementedError ()
0 commit comments