1
1
import functools
2
2
import warnings
3
3
from collections import OrderedDict
4
+ from itertools import izip
4
5
5
6
import numpy as np
6
7
7
8
import conventions
8
9
import dataset
9
10
import dataset_array
11
+ import groupby
10
12
import ops
11
13
import utils
12
14
from common import AbstractArray
@@ -350,6 +352,9 @@ def _collapse(self, f, dim, **kwargs):
350
352
+ ': ' + f .__name__ )
351
353
return new_var
352
354
355
+ def groupby (self , group_name , group_array , squeeze = True ):
356
+ return groupby .GroupBy (self , group_name , group_array , squeeze = squeeze )
357
+
353
358
def aggregate (self , func , new_dim_name , group_by , ** kwargs ):
354
359
"""Aggregate this variable by applying `func` to grouped elements
355
360
@@ -396,7 +401,7 @@ def aggregate(self, func, new_dim_name, group_by, **kwargs):
396
401
397
402
@classmethod
398
403
def from_stack (cls , variables , dimension = 'stacked_dimension' ,
399
- length = None ):
404
+ stacked_indexers = None , length = None , template = None ):
400
405
"""Stack variables along a new or existing dimension to form a new
401
406
variable
402
407
@@ -406,12 +411,13 @@ def from_stack(cls, variables, dimension='stacked_dimension',
406
411
Arrays to stack together. Each variable is expected to have
407
412
matching dimensions and shape except for along the stacked
408
413
dimension.
409
- dimension : str, optional
414
+ dimension : str or DatasetArray , optional
410
415
Name of the dimension to stack along. This can either be a new
411
416
dimension name, in which case it is added along axis=0, or an
412
417
existing dimension name, in which case the location of the
413
418
dimension is unchanged. Where to insert the new dimension is
414
419
determined by the first variable.
420
+ stacked_indexers : iterable of indexers, optional
415
421
length : int, optional
416
422
Length of the new dimension. This is used to allocate the new data
417
423
array for the stacked variable data before iterating over all
@@ -423,73 +429,68 @@ def from_stack(cls, variables, dimension='stacked_dimension',
423
429
Stacked variable formed by stacking all the supplied variables
424
430
along the new dimension.
425
431
"""
426
- if length is None :
432
+ if not isinstance (dimension , basestring ):
433
+ length = dimension .size
434
+ dimension , = dimension .dimensions
435
+
436
+ if length is None or stacked_indexers is None :
427
437
# so much for lazy evaluation! we need to look at all the variables
428
- # to figure out the dimensions of the stacked variable
438
+ # to figure out the indexers and/or dimensions of the stacked
439
+ # variable
429
440
variables = list (variables )
430
- length = 0
431
- for var in variables :
441
+ steps = [var .shape [var .dimensions .index (dimension )]
442
+ if dimension in var .dimensions else 1
443
+ for var in variables ]
444
+ if length is None :
445
+ length = sum (steps )
446
+ if stacked_indexers is None :
447
+ stacked_indexers = []
448
+ i = 0
449
+ for step in steps :
450
+ stacked_indexers .append (slice (i , i + step ))
451
+ i += step
452
+ if i != length :
453
+ raise ValueError ('actual length of stacked variables '
454
+ 'along %s is %r but expected length was '
455
+ '%s' % (dimension , i , length ))
456
+
457
+ # initialize the stacked variable with empty data
458
+ first_var , variables = groupby .peek_at (variables )
459
+ if dimension in first_var .dimensions :
460
+ axis = first_var .dimensions .index (dimension )
461
+ shape = tuple (length if n == axis else s
462
+ for n , s in enumerate (first_var .shape ))
463
+ dims = first_var .dimensions
464
+ else :
465
+ axis = 0
466
+ shape = (length ,) + first_var .shape
467
+ dims = (dimension ,) + first_var .dimensions
468
+ attr = OrderedDict () if template is None else template .attributes
469
+
470
+ stacked = cls (dims , np .empty (shape , dtype = first_var .dtype ), attr )
471
+ stacked .attributes .update (first_var .attributes )
472
+
473
+ alt_dims = tuple (d for d in dims if d != dimension )
474
+
475
+ # copy in the data from the variables
476
+ for var , indexer in izip (variables , stacked_indexers ):
477
+ if template is None :
478
+ # do sanity checks if we don't have a template
432
479
if dimension in var .dimensions :
433
- axis = var .dimensions .index (dimension )
434
- length += var .shape [axis ]
435
- else :
436
- length += 1
437
-
438
- # manually keep track of progress along
439
- i = 0
440
- for var in variables :
441
- if i == 0 :
442
- # initialize the stacked variable with empty data
443
- if dimension not in var .dimensions :
444
- shape = (length ,) + var .shape
445
- dims = (dimension ,) + var .dimensions
446
- else :
447
- shape = tuple (length if d == dimension else s
448
- for d , s in zip (var .dimensions , var .shape ))
449
- dims = var .dimensions
450
- stacked = cls (dims , np .empty (shape , dtype = var .dtype ),
451
- var .attributes )
452
- # required dimensions (including order) if we have any N - 1
453
- # dimensional variables
454
- alt_dims = tuple (d for d in dims if d != dimension )
455
-
456
- if dimension in var .dimensions :
457
- # transpose requires that the dimensions are equivalent
458
- var = var .transpose (* stacked .dimensions )
459
- axis = var .dimensions .index (dimension )
460
- step = var .shape [axis ]
461
- elif var .dimensions == alt_dims :
462
- step = 1
463
- else :
464
- raise ValueError ('inconsistent dimensions' )
465
-
466
- if i + step > length :
467
- raise ValueError ('actual length of stacked variables along %s '
468
- 'is greater than expected length %s'
469
- % (dimension , length ))
470
-
471
- indexer = tuple ((slice (i , i + step ) if step > 1 else i )
472
- if d == dimension else slice (None )
473
- for d in stacked .dimensions )
474
- # by-pass variable indexing for possible speedup
475
- stacked .data [indexer ] = var .data
476
- utils .remove_incompatible_items (stacked .attributes , var .attributes )
477
- i += step
478
-
479
- if i != length :
480
- raise ValueError ('actual length of stacked variables along %s is '
481
- '%s but expected length was %s'
482
- % (dimension , i , length ))
480
+ # transpose verifies that the dimensions are equivalent
481
+ if var .dimensions != stacked .dimensions :
482
+ var = var .transpose (* stacked .dimensions )
483
+ elif var .dimensions != alt_dims :
484
+ raise ValueError ('inconsistent dimensions' )
485
+ utils .remove_incompatible_items (stacked .attributes ,
486
+ var .attributes )
487
+
488
+ key = tuple (indexer if n == axis else slice (None )
489
+ for n in range (stacked .ndim ))
490
+ stacked .data [tuple (key )] = var .data
483
491
484
492
return stacked
485
493
486
- def apply (self , func , * args , ** kwargs ):
487
- """Apply `func` with *args and **kwargs to this variable's data and
488
- return the result as a new variable with the same dimensions
489
- """
490
- data = np .asarray (func (self .data , * args , ** kwargs ))
491
- return type (self )(self .dimensions , data , self .attributes )
492
-
493
494
def __array_wrap__ (self , result ):
494
495
return type (self )(self .dimensions , result , self .attributes )
495
496
0 commit comments