@@ -147,23 +147,48 @@ def nan_safe_num2date(num):
147
147
return dates
148
148
149
149
150
- def guess_time_units (dates ):
151
- """Given an array of dates suitable for input to `pandas.DatetimeIndex`,
152
- returns a CF compatible time-unit string of the form "{time_unit} since
153
- {date[0]}", where `time_unit` is 'days', 'hours', 'minutes' or 'seconds'
154
- (the first one that can evenly divide all unique time deltas in `dates`)
150
+ def decode_cf_timedelta (num_timedeltas , units ):
151
+ """Given an array of numeric timedeltas in netCDF format, convert it into a
152
+ numpy timedelta64[ns] array.
155
153
"""
156
- dates = pd .DatetimeIndex (np .asarray (dates ).reshape (- 1 ))
157
- unique_timedeltas = np .unique (np .diff (dates .values [pd .notnull (dates )]))
154
+ # rename 'seconds', 'minutes' and 'hours' to formats pandas recognizes
155
+ units = {'seconds' : 's' , 'minutes' : 'm' , 'hours' : 'h' }.get (units , units )
156
+ return pd .to_timedelta (np .asarray (num_timedeltas ), unit = units , box = False )
157
+
158
+
159
+ TIME_UNITS = set (['days' , 'hours' , 'minutes' , 'seconds' ])
160
+
161
+ def _infer_time_units_from_diff (unique_timedeltas ):
158
162
for time_unit , delta in [('days' , 86400 ), ('hours' , 3600 ),
159
163
('minutes' , 60 ), ('seconds' , 1 )]:
160
164
unit_delta = np .timedelta64 (10 ** 9 * delta , 'ns' )
161
165
diffs = unique_timedeltas / unit_delta
162
166
if np .all (diffs == diffs .astype (int )):
163
- break
164
- else :
165
- raise ValueError ('could not automatically determine time units' )
166
- return '%s since %s' % (time_unit , dates [0 ])
167
+ return time_unit
168
+ raise ValueError ('could not automatically determine time units' )
169
+
170
+
171
+ def infer_datetime_units (dates ):
172
+ """Given an array of datetimes, returns a CF compatible time-unit string of
173
+ the form "{time_unit} since {date[0]}", where `time_unit` is 'days',
174
+ 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all
175
+ unique time deltas in `dates`)
176
+ """
177
+ dates = pd .to_datetime (dates , box = False )
178
+ unique_timedeltas = np .unique (np .diff (dates [pd .notnull (dates )]))
179
+ units = _infer_time_units_from_diff (unique_timedeltas )
180
+ return '%s since %s' % (units , pd .Timestamp (dates [0 ]))
181
+
182
+
183
+ def infer_timedelta_units (deltas ):
184
+ """Given an array of timedeltas, returns a CF compatible time-unit from
185
+ {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly
186
+ divide all unique time deltas in `deltas`)
187
+ """
188
+ deltas = pd .to_timedelta (deltas , box = False )
189
+ unique_timedeltas = np .unique (deltas [pd .notnull (deltas )])
190
+ units = _infer_time_units_from_diff (unique_timedeltas )
191
+ return units
167
192
168
193
169
194
def nctime_to_nptime (times ):
@@ -193,7 +218,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
193
218
dates = np .asarray (dates )
194
219
195
220
if units is None :
196
- units = guess_time_units (dates )
221
+ units = infer_datetime_units (dates )
197
222
if calendar is None :
198
223
calendar = 'proleptic_gregorian'
199
224
@@ -211,6 +236,21 @@ def encode_datetime(d):
211
236
return (num , units , calendar )
212
237
213
238
239
+ def encode_cf_timedelta (timedeltas , units = None ):
240
+ if units is None :
241
+ units = infer_timedelta_units (timedeltas )
242
+
243
+ np_unit = {'seconds' : 's' , 'minutes' : 'm' , 'hours' : 'h' , 'days' : 'D' }[units ]
244
+ num = timedeltas .astype ('timedelta64[%s]' % np_unit ).view (np .int64 )
245
+
246
+ missing = pd .isnull (timedeltas )
247
+ if np .any (missing ):
248
+ num = num .astype (float )
249
+ num [missing ] = np .nan
250
+
251
+ return (num , units )
252
+
253
+
214
254
class MaskedAndScaledArray (utils .NDArrayMixin ):
215
255
"""Wrapper around array-like objects to create a new indexable object where
216
256
values, when accessesed, are automatically scaled and masked according to
@@ -288,6 +328,23 @@ def __getitem__(self, key):
288
328
calendar = self .calendar )
289
329
290
330
331
+ class DecodedCFTimedeltaArray (utils .NDArrayMixin ):
332
+ """Wrapper around array-like objects to create a new indexable object where
333
+ values, when accessesed, are automatically converted into timedelta objects
334
+ using decode_cf_timedelta.
335
+ """
336
+ def __init__ (self , array , units ):
337
+ self .array = array
338
+ self .units = units
339
+
340
+ @property
341
+ def dtype (self ):
342
+ return np .dtype ('timedelta64[ns]' )
343
+
344
+ def __getitem__ (self , key ):
345
+ return decode_cf_timedelta (self .array [key ], units = self .units )
346
+
347
+
291
348
class CharToStringArray (utils .NDArrayMixin ):
292
349
"""Wrapper around array-like objects to create a new indexable object where
293
350
values, when accessesed, are automatically concatenated along the last
@@ -358,7 +415,7 @@ def char_to_string(arr):
358
415
return arr .view (kind + str (arr .shape [- 1 ]))[..., 0 ]
359
416
360
417
361
- def _safe_setitem (dest , key , value ):
418
+ def safe_setitem (dest , key , value ):
362
419
if key in dest :
363
420
raise ValueError ('Failed hard to prevent overwriting key %r' % key )
364
421
dest [key ] = value
@@ -370,9 +427,9 @@ def pop_to(source, dest, key, default=None):
370
427
None values are not passed on. If k already exists in dest an
371
428
error is raised.
372
429
"""
373
- value = source .pop (key , default )
430
+ value = source .pop (key , None )
374
431
if value is not None :
375
- _safe_setitem (dest , key , value )
432
+ safe_setitem (dest , key , value )
376
433
return value
377
434
378
435
@@ -384,16 +441,21 @@ def maybe_encode_datetime(var):
384
441
if (np .issubdtype (var .dtype , np .datetime64 )
385
442
or (var .dtype .kind == 'O'
386
443
and isinstance (var .values .flat [0 ], datetime ))):
387
-
388
444
dims , values , attrs , encoding = _var_as_tuple (var )
389
- if 'units' in attrs or 'calendar' in attrs :
390
- raise ValueError (
391
- "Failed hard to prevent overwriting 'units' or 'calendar'" )
392
-
393
445
(values , units , calendar ) = encode_cf_datetime (
394
446
values , encoding .pop ('units' , None ), encoding .pop ('calendar' , None ))
395
- attrs ['units' ] = units
396
- attrs ['calendar' ] = calendar
447
+ safe_setitem (attrs , 'units' , units )
448
+ safe_setitem (attrs , 'calendar' , calendar )
449
+ var = Variable (dims , values , attrs , encoding )
450
+ return var
451
+
452
+
453
+ def maybe_encode_timedelta (var ):
454
+ if np .issubdtype (var .dtype , np .timedelta64 ):
455
+ dims , values , attrs , encoding = _var_as_tuple (var )
456
+ values , units = encode_cf_timedelta (
457
+ values , encoding .pop ('units' , None ))
458
+ safe_setitem (attrs , 'units' , units )
397
459
var = Variable (dims , values , attrs , encoding )
398
460
return var
399
461
@@ -452,7 +514,7 @@ def _infer_dtype(array):
452
514
else :
453
515
dtype = np .array (array .flat [0 ]).dtype
454
516
if dtype .kind in ['S' , 'U' ]:
455
- # don't just use inferred_dtype to avoid truncating arrays to
517
+ # don't just use inferred dtype to avoid truncating arrays to
456
518
# the length of their first element
457
519
dtype = np .dtype (dtype .kind )
458
520
elif dtype .kind == 'O' :
@@ -511,6 +573,7 @@ def encode_cf_variable(var, needs_copy=True):
511
573
A variable which has been encoded as described above.
512
574
"""
513
575
var = maybe_encode_datetime (var )
576
+ var = maybe_encode_timedelta (var )
514
577
var , needs_copy = maybe_encode_offset_and_scale (var , needs_copy )
515
578
var , needs_copy = maybe_encode_fill_value (var , needs_copy )
516
579
var = maybe_encode_dtype (var , needs_copy )
@@ -585,11 +648,16 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
585
648
data = MaskedAndScaledArray (data , fill_value , scale_factor ,
586
649
add_offset , dtype )
587
650
588
- if decode_times :
589
- if 'units' in attributes and 'since' in attributes ['units' ]:
651
+ if decode_times and 'units' in attributes :
652
+ if 'since' in attributes ['units' ]:
653
+ # datetime
590
654
units = pop_to (attributes , encoding , 'units' )
591
655
calendar = pop_to (attributes , encoding , 'calendar' )
592
656
data = DecodedCFDatetimeArray (data , units , calendar )
657
+ elif attributes ['units' ] in TIME_UNITS :
658
+ # timedelta
659
+ units = pop_to (attributes , encoding , 'units' )
660
+ data = DecodedCFTimedeltaArray (data , units )
593
661
594
662
return Variable (dimensions , indexing .LazilyIndexedArray (data ),
595
663
attributes , encoding = encoding )
0 commit comments