@@ -205,6 +205,25 @@ def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
205
205
return data
206
206
207
207
208
+ def _choose_float_dtype (dtype , has_offset ):
209
+ """Return a float dtype that can losslessly represent `dtype` values."""
210
+ # Keep float32 as-is. Upcast half-precision to single-precision,
211
+ # because float16 is "intended for storage but not computation"
212
+ if dtype .itemsize <= 4 and np .issubdtype (dtype , np .floating ):
213
+ return np .float32
214
+ # float32 can exactly represent all integers up to 24 bits
215
+ if dtype .itemsize <= 2 and np .issubdtype (dtype , np .integer ):
216
+ # A scale factor is entirely safe (vanishing into the mantissa),
217
+ # but a large integer offset could lead to loss of precision.
218
+ # Sensitivity analysis can be tricky, so we just use a float64
219
+ # if there's any offset at all - better unoptimised than wrong!
220
+ if not has_offset :
221
+ return np .float32
222
+ # For all other types and circumstances, we just use float64.
223
+ # (safe because eg. complex numbers are not supported in NetCDF)
224
+ return np .float64
225
+
226
+
208
227
class CFScaleOffsetCoder (VariableCoder ):
209
228
"""Scale and offset variables according to CF conventions.
210
229
@@ -216,7 +235,8 @@ def encode(self, variable, name=None):
216
235
dims , data , attrs , encoding = unpack_for_encoding (variable )
217
236
218
237
if 'scale_factor' in encoding or 'add_offset' in encoding :
219
- data = data .astype (dtype = np .float64 , copy = True )
238
+ dtype = _choose_float_dtype (data .dtype , 'add_offset' in encoding )
239
+ data = data .astype (dtype = dtype , copy = True )
220
240
if 'add_offset' in encoding :
221
241
data -= pop_to (encoding , attrs , 'add_offset' , name = name )
222
242
if 'scale_factor' in encoding :
@@ -230,7 +250,7 @@ def decode(self, variable, name=None):
230
250
if 'scale_factor' in attrs or 'add_offset' in attrs :
231
251
scale_factor = pop_to (attrs , encoding , 'scale_factor' , name = name )
232
252
add_offset = pop_to (attrs , encoding , 'add_offset' , name = name )
233
- dtype = np . float64
253
+ dtype = _choose_float_dtype ( data . dtype , 'add_offset' in attrs )
234
254
transform = partial (_scale_offset_decoding ,
235
255
scale_factor = scale_factor ,
236
256
add_offset = add_offset ,
0 commit comments