@@ -88,6 +88,28 @@ def __repr__(self):
88
88
list (self .input_core_dims ),
89
89
list (self .output_core_dims )))
90
90
91
+ def __str__ (self ):
92
+ lhs = ',' .join ('({})' .format (',' .join (dims ))
93
+ for dims in self .input_core_dims )
94
+ rhs = ',' .join ('({})' .format (',' .join (dims ))
95
+ for dims in self .output_core_dims )
96
+ return '{}->{}' .format (lhs , rhs )
97
+
98
+ def to_gufunc_string (self ):
99
+ """Create an equivalent signature string for a NumPy gufunc.
100
+
101
+ Unlike __str__, handles dimensions that don't map to Python
102
+ identifiers.
103
+ """
104
+ all_dims = self .all_core_dims
105
+ dims_map = dict (zip (sorted (all_dims ), range (len (all_dims ))))
106
+ input_core_dims = [['dim%d' % dims_map [dim ] for dim in core_dims ]
107
+ for core_dims in self .input_core_dims ]
108
+ output_core_dims = [['dim%d' % dims_map [dim ] for dim in core_dims ]
109
+ for core_dims in self .output_core_dims ]
110
+ alt_signature = type (self )(input_core_dims , output_core_dims )
111
+ return str (alt_signature )
112
+
91
113
92
114
def result_name (objects ):
93
115
# type: List[object] -> Any
@@ -636,6 +658,7 @@ def apply_ufunc(func, *args, **kwargs):
636
658
input_core_dims : Optional[Sequence[Sequence]] = None,
637
659
output_core_dims : Optional[Sequence[Sequence]] = ((),),
638
660
exclude_dims : Collection = frozenset(),
661
+ vectorize : bool = False,
639
662
join : str = 'exact',
640
663
dataset_join : str = 'exact',
641
664
dataset_fill_value : Any = _NO_FILL_VALUE,
@@ -659,8 +682,9 @@ def apply_ufunc(func, *args, **kwargs):
659
682
(``.data``) that returns an array or tuple of arrays. If multiple
660
683
arguments with non-matching dimensions are supplied, this function is
661
684
expected to vectorize (broadcast) over axes of positional arguments in
662
- the style of NumPy universal functions [1]_. If this function returns
663
- multiple outputs, you most set ``output_core_dims`` as well.
685
+ the style of NumPy universal functions [1]_ (if this is not the case,
686
+ set ``vectorize=True``). If this function returns multiple outputs, you
687
+ must set ``output_core_dims`` as well.
664
688
*args : Dataset, DataArray, GroupBy, Variable, numpy/dask arrays or scalars
665
689
Mix of labeled and/or unlabeled arrays to which to apply the function.
666
690
input_core_dims : Sequence[Sequence], optional
@@ -689,6 +713,12 @@ def apply_ufunc(func, *args, **kwargs):
689
713
broadcasting entirely. Any input coordinates along these dimensions
690
714
will be dropped. Each excluded dimension must also appear in
691
715
``input_core_dims`` for at least one argument.
716
+ vectorize : bool, optional
717
+ If True, then assume ``func`` only takes arrays defined over core
718
+ dimensions as input and vectorize it automatically with
719
+ :py:func:`numpy.vectorize`. This option exists for convenience, but is
720
+ almost always slower than supplying a pre-vectorized function.
721
+ Using this option requires NumPy version 1.12 or newer.
692
722
join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
693
723
Method for joining the indexes of the passed objects along each
694
724
dimension, and the variables of Dataset objects with mismatched
@@ -779,15 +809,31 @@ def stack(objects, dim, new_coord):
779
809
result[dim] = new_coord
780
810
return result
781
811
812
+ If your function is not vectorized but can be applied only to core
813
+ dimensions, you can use ``vectorize=True`` to turn into a vectorized
814
+ function. This wraps :py:func:`numpy.vectorize`, so the operation isn't
815
+ terribly fast. Here we'll use it to calculate the distance between
816
+ empirical samples from two probability distributions, using a scipy
817
+ function that needs to be applied to vectors::
818
+
819
+ import scipy.stats
820
+
821
+ def earth_mover_distance(first_samples,
822
+ second_samples,
823
+ dim='ensemble'):
824
+ return apply_ufunc(scipy.stats.wasserstein_distance,
825
+ first_samples, second_samples,
826
+ input_core_dims=[[dim], [dim]],
827
+ vectorize=True)
828
+
782
829
Most of NumPy's builtin functions already broadcast their inputs
783
830
appropriately for use in `apply`. You may find helper functions such as
784
- numpy.broadcast_arrays or numpy.vectorize helpful in writing your function.
785
- `apply_ufunc` also works well with numba's vectorize and guvectorize.
831
+ numpy.broadcast_arrays helpful in writing your function. `apply_ufunc` also
832
+ works well with numba's vectorize and guvectorize.
786
833
787
834
See also
788
835
--------
789
836
numpy.broadcast_arrays
790
- numpy.vectorize
791
837
numba.vectorize
792
838
numba.guvectorize
793
839
@@ -802,6 +848,7 @@ def stack(objects, dim, new_coord):
802
848
803
849
input_core_dims = kwargs .pop ('input_core_dims' , None )
804
850
output_core_dims = kwargs .pop ('output_core_dims' , ((),))
851
+ vectorize = kwargs .pop ('vectorize' , False )
805
852
join = kwargs .pop ('join' , 'exact' )
806
853
dataset_join = kwargs .pop ('dataset_join' , 'exact' )
807
854
keep_attrs = kwargs .pop ('keep_attrs' , False )
@@ -827,6 +874,12 @@ def stack(objects, dim, new_coord):
827
874
if kwargs_ :
828
875
func = functools .partial (func , ** kwargs_ )
829
876
877
+ if vectorize :
878
+ func = np .vectorize (func ,
879
+ otypes = output_dtypes ,
880
+ signature = signature .to_gufunc_string (),
881
+ excluded = set (kwargs ))
882
+
830
883
variables_ufunc = functools .partial (apply_variable_ufunc , func ,
831
884
signature = signature ,
832
885
exclude_dims = exclude_dims ,
0 commit comments