diff --git a/xarray/core/common.py b/xarray/core/common.py
index 0acff90c149..2a5650011b3 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -341,14 +341,16 @@ def groupby(self, group, squeeze=True):
         """
         if isinstance(group, basestring):
             group = self[group]
+        elif isinstance(group, (list, tuple)):
+            group = [self[g] if isinstance(g, basestring) else g for g in group]
         return self.groupby_cls(self, group, squeeze=squeeze)
 
     def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
                      include_lowest=False, squeeze=True):
         """Returns a GroupBy object for performing grouped operations.
 
-        Rather than using all unique values of `group`, the values are discretized
-        first by applying `pandas.cut` [1]_ to `group`.
+        Rather than using all unique values of `group`, the values are
+        discretized first by applying `pandas.cut` [1]_ to `group`.
 
         Parameters
         ----------
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 868961ee653..ee30802c510 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -286,9 +286,11 @@ def _to_dataset_whole(self, name=None, shallow_copy=True):
         if name is None:
             raise ValueError('unable to convert unnamed DataArray to a '
                              'Dataset without providing an explicit name')
-        if name in self.coords:
+        if (name in self.coords and
+                not self.variable.identical(self._coords[name])):
             raise ValueError('cannot create a Dataset from a DataArray with '
-                             'the same name as one of its coordinates')
+                             'the same name as one of its coordinates '
+                             'unless they are identical')
         # use private APIs here for speed: this is called by _to_temp_dataset(),
         # which is used in the guts of a lot of operations (e.g., reindex)
         variables = self._coords.copy()
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 3d5d61c747e..f0f77836c21 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -4,11 +4,13 @@
 
 from . import nputils
 from . import ops
+from .alignment import broadcast
 from .combine import concat
 from .common import (
     ImplementsArrayReduce, ImplementsDatasetReduce, _maybe_promote,
 )
-from .pycompat import zip
+from .merge import merge
+from .pycompat import zip, OrderedDict
 from .utils import peek_at, maybe_wrap_array, safe_cast_to_index
 from .variable import as_variable, Variable, Coordinate
 
@@ -19,22 +21,28 @@ def unique_value_groups(ar):
     Parameters
     ----------
     ar : array-like
-        Input array. This will be flattened if it is not already 1-D.
+        One dimensional array-like.
 
     Returns
     -------
-    values : np.ndarray
-        Sorted, unique values as returned by `np.unique`.
+    values : pd.Index
+        Sorted, unique values as returned by `pd.factorize`.
     indices : list of lists of int
         Each element provides the integer indices in `ar` with values given by
         the corresponding value in `unique_values`.
     """
-    inverse, values = pd.factorize(ar, sort=True)
+    index = safe_cast_to_index(ar)
+    inverse, values = pd.factorize(index, sort=True)
     groups = [[] for _ in range(len(values))]
     for n, g in enumerate(inverse):
         if g >= 0:
             # pandas uses -1 to mark NaN, but doesn't include them in values
             groups[g].append(n)
+
+    if isinstance(values, pd.MultiIndex):
+        # restore level names
+        values = values.set_names(index.names)
+
     return values, groups
 
 
@@ -114,6 +122,11 @@ def _inverse_permutation_indices(positions):
     return indices
 
 
+def _is_monotonic_unique(group):
+    index = safe_cast_to_index(group)
+    return index.is_monotonic and index.is_unique
+
+
 class GroupBy(object):
     """A object that implements the split-apply-combine pattern.
 
@@ -131,7 +144,7 @@ class GroupBy(object):
     DataArray.groupby
     """
     def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
-                    cut_kwargs={}):
+                 cut_kwargs={}):
         """Create a GroupBy object
 
         Parameters
@@ -152,44 +165,104 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
         cut_kwargs : dict, optional
             Extra keyword arguments to pass to `pandas.cut`
         """
-        from .dataset import as_dataset
+        from .dataset import Dataset
         from .dataarray import DataArray
 
-        if getattr(group, 'name', None) is None:
-            raise ValueError('`group` must have a name')
-        self._stacked_dim = None
-        if group.ndim != 1:
+        def check_valid_group(group_obj):
+            if not isinstance(group_obj, (DataArray, Variable)):
+                raise TypeError('`group` must be a DataArray, Variable or list '
+                                'of DataArrays and/or Variables')
+            if getattr(group_obj, 'name', None) is None:
+                raise ValueError('each item in `group` must have a name')
+
+        if grouper is not None and bins is not None:
+            raise TypeError("Can't specify both `grouper` and `bins`.")
+
+        if isinstance(group, (list, tuple)):
+            if not group:
+                raise ValueError('must supply at least one item to groupby')
+            for g in group:
+                check_valid_group(g)
+            group_names = [g.name for g in group]
+            # we merge multiple groupby variables into Dataset, so they can be
+            # stacked if they use multiple dimensions
+            group = merge(group)
+        else:
+            check_valid_group(group)
+            group_names = []
+
+        orig_dims = []
+        stacked_dim_name = None
+        if len(group.dims) > 1:
             # try to stack the dims of the group into a single dim
             # TODO: figure out how to exclude dimensions from the stacking
             #       (e.g. group over space dims but leave time dim intact)
-            orig_dims = group.dims
+            orig_dims = tuple(group.dims)
             stacked_dim_name = 'stacked_' + '_'.join(orig_dims)
+
             # the copy is necessary here, otherwise read only array raises error
             # in pandas: https://github.com/pydata/pandas/issues/12813
             group = group.stack(**{stacked_dim_name: orig_dims}).copy()
             obj = obj.stack(**{stacked_dim_name: orig_dims})
-            self._stacked_dim = stacked_dim_name
-            self._unstacked_dims = orig_dims
-        if not hasattr(group, 'dims'):
-            raise ValueError("`group` must have a 'dims' attribute")
-        group_dim, = group.dims
 
-        try:
-            expected_size = obj.dims[group_dim]
-        except TypeError:
-            expected_size = obj.shape[obj.get_axis_num(group_dim)]
+        grouped_dim_name = None
+
+        if isinstance(group, Dataset):
+            # list or tuple input is now a 1-dimensional Dataset
+
+            unstacked_group_names = [g for g in group_names
+                                     if g not in orig_dims]
+            stacked_group_names = [g for g in group_names if g in orig_dims]
+
+            levels = []
+            labels = []
+            names = []
+            if unstacked_group_names:
+                if len(unstacked_group_names) == 1:
+                    # MultiIndex.from_array returns a normal Index when passed
+                    # a single argument, so we use factorize instead.
+                    unstacked_name, = unstacked_group_names
+                    label, level = pd.factorize(
+                        group[unstacked_name].to_index())
+                    levels.append(level)
+                    labels.append(label)
+                    names.append(unstacked_name)
+                else:
+                    index = pd.MultiIndex.from_arrays(
+                        [group[name].to_index()
+                         for name in unstacked_group_names],
+                        names=unstacked_group_names)
+                    levels.extend(index.levels)
+                    labels.extend(index.labels)
+                    names.extend(index.names)
+
+            if stacked_group_names:
+                index = group.coords[stacked_dim_name].to_index()
+                for level, label, name in zip(
+                        index.levels, index.labels, index.names):
+                    if name in stacked_group_names:
+                        levels.append(level)
+                        labels.append(label)
+                        names.append(name)
+
+            group_index = pd.MultiIndex(levels, labels, names=names)
+            grouped_dim_name = 'grouped_' + '_'.join(group_names)
+            group = DataArray(group_index, group.coords,
+                              dims=list(group.dims), name=grouped_dim_name)
+
+        group_dim, = group.dims
+        expected_size = obj.coords[group_dim].size
         if group.size != expected_size:
             raise ValueError('the group variable\'s length does not '
                              'match the length of this variable along its '
                              'dimension')
         full_index = None
 
-        if grouper is not None and bins is not None:
-            raise TypeError("Can't specify both `grouper` and `bins`.")
         if bins is not None:
             binned = pd.cut(group.values, bins, **cut_kwargs)
             new_dim_name = group.name + '_bins'
             group = DataArray(binned, group.coords, name=new_dim_name)
+
         if grouper is not None:
             index = safe_cast_to_index(group)
             if not index.is_monotonic:
@@ -205,13 +278,10 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
             group_indices = ([slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])] +
                              [slice(sbins[-1], None)])
             unique_coord = Coordinate(group.name, first_items.index)
-        elif group.name in obj.dims and bins is None:
-            # assume that group already has sorted, unique values
-            # (if using bins, the group will have the same name as a dimension
-            # but different values)
-            if group.dims != (group.name,):
-                raise ValueError('`group` is required to be a coordinate if '
-                                 '`group.name` is a dimension in `obj`')
+        elif group.name in obj.dims and _is_monotonic_unique(group):
+            # TODO(shoyer): Figure out how to handle cases where group is a
+            # dimension coordinate, but not monotonic unique. How should we
+            # handle squeeze?
             group_indices = np.arange(group.size)
             if not squeeze:
                 # group_indices = group_indices.reshape(-1, 1)
@@ -230,6 +300,8 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
         self.unique_coord = unique_coord
         self._groups = None
         self._full_index = full_index
+        self._stacked_dim = stacked_dim_name
+        self._grouped_dim = grouped_dim_name
 
     @property
     def groups(self):
@@ -296,7 +368,7 @@ def _maybe_restore_empty_groups(self, combined):
         """Our index contained empty groups (e.g., from a resampling). If we
         reduced on that dimension, we want to restore the full index.
         """
-        if (self._full_index is not None and self.group.name in combined.dims):
+        if self._full_index is not None and self.group.name in combined.dims:
             indexers = {self.group.name: self._full_index}
             combined = combined.reindex(**indexers)
         return combined
@@ -304,8 +376,12 @@ def _maybe_restore_empty_groups(self, combined):
     def _maybe_unstack_array(self, arr):
         """This gets called if we are applying on an array with a
         multidimensional group."""
-        if self._stacked_dim is not None and self._stacked_dim in arr.dims:
-            arr = arr.unstack(self._stacked_dim)
+        if self._stacked_dim is not None:
+            if self._stacked_dim in arr.dims:
+                arr = arr.unstack(self._stacked_dim)
+            elif (self._grouped_dim is not None
+                  and self._grouped_dim in arr.dims):
+                arr = arr.unstack(self._grouped_dim)
         return arr
 
     def fillna(self, value):
@@ -426,12 +502,6 @@ def lookup_order(dimension):
         new_order = sorted(stacked.dims, key=lookup_order)
         return stacked.transpose(*new_order)
 
-    def _restore_multiindex(self, combined):
-        if self._stacked_dim is not None and self._stacked_dim in combined.dims:
-            stacked_dim = self.group[self._stacked_dim]
-            combined[self._stacked_dim] = stacked_dim
-        return combined
-
     def apply(self, func, shortcut=False, **kwargs):
         """Apply a function over each array in the group and concatenate them
         together into a new array.
@@ -490,7 +560,6 @@ def _concat(self, applied, shortcut=False):
             combined = _maybe_reorder(combined, concat_dim, positions)
         if isinstance(combined, type(self.obj)):
             combined = self._restore_dim_order(combined)
-            combined = self._restore_multiindex(combined)
         return combined
 
     def reduce(self, func, dim=None, axis=None, keep_attrs=False,
diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
index e873280721b..837116d443f 100644
--- a/xarray/test/test_dataarray.py
+++ b/xarray/test/test_dataarray.py
@@ -1131,270 +1131,6 @@ def test_fillna(self):
         actual = a.groupby('b').fillna(DataArray([0, 2], dims='b'))
         self.assertDataArrayIdentical(expected, actual)
 
-    def test_groupby_iter(self):
-        for ((act_x, act_dv), (exp_x, exp_ds)) in \
-                zip(self.dv.groupby('y'), self.ds.groupby('y')):
-            self.assertEqual(exp_x, act_x)
-            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
-        for ((_, exp_dv), act_dv) in zip(self.dv.groupby('x'), self.dv):
-            self.assertDataArrayIdentical(exp_dv, act_dv)
-
-    def make_groupby_example_array(self):
-        da = self.dv.copy()
-        da.coords['abc'] = ('y', np.array(['a'] * 9 + ['c'] + ['b'] * 10))
-        da.coords['y'] = 20 + 100 * da['y']
-        return da
-
-    def test_groupby_properties(self):
-        grouped = self.make_groupby_example_array().groupby('abc')
-        expected_unique = Variable('abc', ['a', 'b', 'c'])
-        self.assertVariableEqual(expected_unique, grouped.unique_coord)
-        self.assertEqual(3, len(grouped))
-
-    def test_groupby_apply_identity(self):
-        expected = self.make_groupby_example_array()
-        idx = expected.coords['y']
-
-        def identity(x):
-            return x
-
-        for g in ['x', 'y', 'abc', idx]:
-            for shortcut in [False, True]:
-                for squeeze in [False, True]:
-                    grouped = expected.groupby(g, squeeze=squeeze)
-                    actual = grouped.apply(identity, shortcut=shortcut)
-                    self.assertDataArrayIdentical(expected, actual)
-
-    def test_groupby_sum(self):
-        array = self.make_groupby_example_array()
-        grouped = array.groupby('abc')
-
-        expected_sum_all = Dataset(
-            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
-                                                self.x[:, 10:].sum(),
-                                                self.x[:, 9:10].sum()]).T),
-             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
-        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
-        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())
-
-        expected = DataArray([array['y'].values[idx].sum() for idx
-                              in [slice(9), slice(10, None), slice(9, 10)]],
-                             [['a', 'b', 'c']], ['abc'])
-        actual = array['y'].groupby('abc').apply(np.sum)
-        self.assertDataArrayAllClose(expected, actual)
-        actual = array['y'].groupby('abc').sum()
-        self.assertDataArrayAllClose(expected, actual)
-
-        expected_sum_axis1 = Dataset(
-            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
-                                             self.x[:, 10:].sum(1),
-                                             self.x[:, 9:10].sum(1)]).T),
-             'x': self.ds['x'],
-             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
-        self.assertDataArrayAllClose(expected_sum_axis1,
-                                     grouped.reduce(np.sum, 'y'))
-        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))
-
-    def test_groupby_count(self):
-        array = DataArray([0, 0, np.nan, np.nan, 0, 0],
-                          coords={'cat': ('x', ['a', 'b', 'b', 'c', 'c', 'c'])},
-                          dims='x')
-        actual = array.groupby('cat').count()
-        expected = DataArray([1, 1, 2], coords=[('cat', ['a', 'b', 'c'])])
-        self.assertDataArrayIdentical(actual, expected)
-
-    @unittest.skip('needs to be fixed for shortcut=False, keep_attrs=False')
-    def test_groupby_reduce_attrs(self):
-        array = self.make_groupby_example_array()
-        array.attrs['foo'] = 'bar'
-
-        for shortcut in [True, False]:
-            for keep_attrs in [True, False]:
-                print('shortcut=%s, keep_attrs=%s' % (shortcut, keep_attrs))
-                actual = array.groupby('abc').reduce(
-                    np.mean, keep_attrs=keep_attrs, shortcut=shortcut)
-                expected = array.groupby('abc').mean()
-                if keep_attrs:
-                    expected.attrs['foo'] = 'bar'
-                self.assertDataArrayIdentical(expected, actual)
-
-    def test_groupby_apply_center(self):
-        def center(x):
-            return x - np.mean(x)
-
-        array = self.make_groupby_example_array()
-        grouped = array.groupby('abc')
-
-        expected_ds = array.to_dataset()
-        exp_data = np.hstack([center(self.x[:, :9]),
-                              center(self.x[:, 9:10]),
-                              center(self.x[:, 10:])])
-        expected_ds['foo'] = (['x', 'y'], exp_data)
-        expected_centered = expected_ds['foo']
-        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))
-
-    def test_groupby_apply_ndarray(self):
-        # regression test for #326
-        array = self.make_groupby_example_array()
-        grouped = array.groupby('abc')
-        actual = grouped.apply(np.asarray)
-        self.assertDataArrayEqual(array, actual)
-
-    def test_groupby_apply_changes_metadata(self):
-        def change_metadata(x):
-            x.coords['x'] = x.coords['x'] * 2
-            x.attrs['fruit'] = 'lemon'
-            return x
-
-        array = self.make_groupby_example_array()
-        grouped = array.groupby('abc')
-        actual = grouped.apply(change_metadata)
-        expected = array.copy()
-        expected = change_metadata(expected)
-        self.assertDataArrayEqual(expected, actual)
-
-    def test_groupby_math(self):
-        array = self.make_groupby_example_array()
-        for squeeze in [True, False]:
-            grouped = array.groupby('x', squeeze=squeeze)
-
-            expected = array + array.coords['x']
-            actual = grouped + array.coords['x']
-            self.assertDataArrayIdentical(expected, actual)
-
-            actual = array.coords['x'] + grouped
-            self.assertDataArrayIdentical(expected, actual)
-
-            ds = array.coords['x'].to_dataset('X')
-            expected = array + ds
-            actual = grouped + ds
-            self.assertDatasetIdentical(expected, actual)
-
-            actual = ds + grouped
-            self.assertDatasetIdentical(expected, actual)
-
-        grouped = array.groupby('abc')
-        expected_agg = (grouped.mean() - np.arange(3)).rename(None)
-        actual = grouped - DataArray(range(3), [('abc', ['a', 'b', 'c'])])
-        actual_agg = actual.groupby('abc').mean()
-        self.assertDataArrayAllClose(expected_agg, actual_agg)
-
-        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
-            grouped + 1
-        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
-            grouped + grouped
-        with self.assertRaisesRegexp(TypeError, 'in-place operations'):
-            array += grouped
-
-    def test_groupby_math_not_aligned(self):
-        array = DataArray(range(4), {'b': ('x', [0, 0, 1, 1])}, dims='x')
-        other = DataArray([10], dims='b')
-        actual = array.groupby('b') + other
-        expected = DataArray([10, 11, np.nan, np.nan], array.coords)
-        self.assertDataArrayIdentical(expected, actual)
-
-        other = DataArray([10], coords={'c': 123}, dims='b')
-        actual = array.groupby('b') + other
-        expected.coords['c'] = (['x'], [123] * 2 + [np.nan] * 2)
-        self.assertDataArrayIdentical(expected, actual)
-
-        other = Dataset({'a': ('b', [10])})
-        actual = array.groupby('b') + other
-        expected = Dataset({'a': ('x', [10, 11, np.nan, np.nan])},
-                           array.coords)
-        self.assertDatasetIdentical(expected, actual)
-
-    def test_groupby_restore_dim_order(self):
-        array = DataArray(np.random.randn(5, 3),
-                          coords={'a': ('x', range(5)), 'b': ('y', range(3))},
-                          dims=['x', 'y'])
-        for by, expected_dims in [('x', ('x', 'y')),
-                                  ('y', ('x', 'y')),
-                                  ('a', ('a', 'y')),
-                                  ('b', ('x', 'b'))]:
-            result = array.groupby(by).apply(lambda x: x.squeeze())
-            self.assertEqual(result.dims, expected_dims)
-
-    def test_groupby_first_and_last(self):
-        array = DataArray([1, 2, 3, 4, 5], dims='x')
-        by = DataArray(['a'] * 2 + ['b'] * 3, dims='x', name='ab')
-
-        expected = DataArray([1, 3], [('ab', ['a', 'b'])])
-        actual = array.groupby(by).first()
-        self.assertDataArrayIdentical(expected, actual)
-
-        expected = DataArray([2, 5], [('ab', ['a', 'b'])])
-        actual = array.groupby(by).last()
-        self.assertDataArrayIdentical(expected, actual)
-
-        array = DataArray(np.random.randn(5, 3), dims=['x', 'y'])
-        expected = DataArray(array[[0, 2]], {'ab': ['a', 'b']}, ['ab', 'y'])
-        actual = array.groupby(by).first()
-        self.assertDataArrayIdentical(expected, actual)
-
-        actual = array.groupby('x').first()
-        expected = array  # should be a no-op
-        self.assertDataArrayIdentical(expected, actual)
-
-    def make_groupby_multidim_example_array(self):
-        return DataArray([[[0,1],[2,3]],[[5,10],[15,20]]],
-                        coords={'lon': (['ny', 'nx'], [[30., 40.], [40., 50.]] ),
-                                'lat': (['ny', 'nx'], [[10., 10.], [20., 20.]] ),},
-                        dims=['time', 'ny', 'nx'])
-
-    def test_groupby_multidim(self):
-        array = self.make_groupby_multidim_example_array()
-        for dim, expected_sum in [
-                ('lon', DataArray([5, 28, 23], coords={'lon': [30., 40., 50.]})),
-                ('lat', DataArray([16, 40], coords={'lat': [10., 20.]}))]:
-            actual_sum = array.groupby(dim).sum()
-            self.assertDataArrayIdentical(expected_sum, actual_sum)
-
-    def test_groupby_multidim_apply(self):
-        array = self.make_groupby_multidim_example_array()
-        actual = array.groupby('lon').apply(
-                lambda x : x - x.mean(), shortcut=False)
-        expected = DataArray([[[-2.5, -6.], [-5., -8.5]],
-                              [[ 2.5,  3.], [ 8.,  8.5]]],
-                    coords=array.coords, dims=array.dims)
-        self.assertDataArrayIdentical(expected, actual)
-
-    def test_groupby_bins(self):
-        array = DataArray(np.arange(4), dims='dim_0')
-        # the first value should not be part of any group ("right" binning)
-        array[0] = 99
-        # bins follow conventions for pandas.cut
-        # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
-        bins = [0,1.5,5]
-        bin_coords = ['(0, 1.5]', '(1.5, 5]']
-        expected = DataArray([1,5], dims='dim_0_bins',
-                        coords={'dim_0_bins': bin_coords})
-        # the problem with this is that it overwrites the dimensions of array!
-        #actual = array.groupby('dim_0', bins=bins).sum()
-        actual = array.groupby_bins('dim_0', bins).apply(
-                                    lambda x : x.sum(), shortcut=False)
-        self.assertDataArrayIdentical(expected, actual)
-        # make sure original array dims are unchanged
-        # (would fail with shortcut=True above)
-        self.assertEqual(len(array.dim_0), 4)
-
-    def test_groupby_bins_multidim(self):
-        array = self.make_groupby_multidim_example_array()
-        bins = [0,15,20]
-        bin_coords = ['(0, 15]', '(15, 20]']
-        expected = DataArray([16, 40], dims='lat_bins',
-                                coords={'lat_bins': bin_coords})
-        actual = array.groupby_bins('lat', bins).apply(
-                                    lambda x : x.sum(), shortcut=False)
-        self.assertDataArrayIdentical(expected, actual)
-        # modify the array coordinates to be non-monotonic after unstacking
-        array['lat'].data = np.array([[10., 20.], [20., 10.]])
-        expected = DataArray([28, 28], dims='lat_bins',
-                    coords={'lat_bins': bin_coords})
-        actual = array.groupby_bins('lat', bins).apply(
-                                    lambda x : x.sum(), shortcut=False)
-        self.assertDataArrayIdentical(expected, actual)
-
     def make_rolling_example_array(self):
         times = pd.date_range('2000-01-01', freq='1D', periods=21)
         values = np.random.random((21, 4))
diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
index 7636b67178a..7059fa8ff17 100644
--- a/xarray/test/test_dataset.py
+++ b/xarray/test/test_dataset.py
@@ -1499,141 +1499,6 @@ def get_args(v):
         with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'):
             data.squeeze('y')
 
-    def test_groupby(self):
-        data = Dataset({'z': (['x', 'y'], np.random.randn(3, 5))},
-                       {'x': ('x', list('abc')),
-                        'c': ('x', [0, 1, 0])})
-        groupby = data.groupby('x')
-        self.assertEqual(len(groupby), 3)
-        expected_groups = {'a': 0, 'b': 1, 'c': 2}
-        self.assertEqual(groupby.groups, expected_groups)
-        expected_items = [('a', data.isel(x=0)),
-                          ('b', data.isel(x=1)),
-                          ('c', data.isel(x=2))]
-        for actual, expected in zip(groupby, expected_items):
-            self.assertEqual(actual[0], expected[0])
-            self.assertDatasetEqual(actual[1], expected[1])
-
-        identity = lambda x: x
-        for k in ['x', 'c', 'y']:
-            actual = data.groupby(k, squeeze=False).apply(identity)
-            self.assertDatasetEqual(data, actual)
-
-    def test_groupby_returns_new_type(self):
-        data = Dataset({'z': (['x', 'y'], np.random.randn(3, 5))})
-
-        actual = data.groupby('x').apply(lambda ds: ds['z'])
-        expected = data['z']
-        self.assertDataArrayIdentical(expected, actual)
-
-        actual = data['z'].groupby('x').apply(lambda x: x.to_dataset())
-        expected = data
-        self.assertDatasetIdentical(expected, actual)
-
-    def test_groupby_iter(self):
-        data = create_test_data()
-        for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
-            self.assertEqual(data['dim1'][n], t)
-            self.assertVariableEqual(data['var1'][n], sub['var1'])
-            self.assertVariableEqual(data['var2'][n], sub['var2'])
-            self.assertVariableEqual(data['var3'][:, n], sub['var3'])
-
-    def test_groupby_errors(self):
-        data = create_test_data()
-        with self.assertRaisesRegexp(ValueError, 'must have a name'):
-            data.groupby(np.arange(10))
-        with self.assertRaisesRegexp(ValueError, 'length does not match'):
-            data.groupby(data['dim1'][:3])
-        with self.assertRaisesRegexp(ValueError, "must have a 'dims'"):
-            data.groupby(data.coords['dim1'].to_index())
-
-    def test_groupby_reduce(self):
-        data = Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)),
-                        'xonly': ('x', np.random.randn(3)),
-                        'yonly': ('y', np.random.randn(4)),
-                        'letters': ('y', ['a', 'a', 'b', 'b'])})
-
-        expected = data.mean('y')
-        expected['yonly'] = expected['yonly'].variable.expand_dims({'x': 3})
-        actual = data.groupby('x').mean()
-        self.assertDatasetAllClose(expected, actual)
-
-        actual = data.groupby('x').mean('y')
-        self.assertDatasetAllClose(expected, actual)
-
-        letters = data['letters']
-        expected = Dataset({'xy': data['xy'].groupby(letters).mean(),
-                            'xonly': (data['xonly'].mean().variable
-                                      .expand_dims({'letters': 2})),
-                            'yonly': data['yonly'].groupby(letters).mean()})
-        actual = data.groupby('letters').mean()
-        self.assertDatasetAllClose(expected, actual)
-
-    def test_groupby_math(self):
-        reorder_dims = lambda x: x.transpose('dim1', 'dim2', 'dim3', 'time')
-
-        ds = create_test_data()
-        for squeeze in [True, False]:
-            grouped = ds.groupby('dim1', squeeze=squeeze)
-
-            expected = reorder_dims(ds + ds.coords['dim1'])
-            actual = grouped + ds.coords['dim1']
-            self.assertDatasetIdentical(expected, reorder_dims(actual))
-
-            actual = ds.coords['dim1'] + grouped
-            self.assertDatasetIdentical(expected, reorder_dims(actual))
-
-            ds2 = 2 * ds
-            expected = reorder_dims(ds + ds2)
-            actual = grouped + ds2
-            self.assertDatasetIdentical(expected, reorder_dims(actual))
-
-            actual = ds2 + grouped
-            self.assertDatasetIdentical(expected, reorder_dims(actual))
-
-        grouped = ds.groupby('numbers')
-        zeros = DataArray([0, 0, 0, 0], [('numbers', range(4))])
-        expected = ((ds + Variable('dim3', np.zeros(10)))
-                    .transpose('dim3', 'dim1', 'dim2', 'time'))
-        actual = grouped + zeros
-        self.assertDatasetEqual(expected, actual)
-
-        actual = zeros + grouped
-        self.assertDatasetEqual(expected, actual)
-
-        with self.assertRaisesRegexp(ValueError, 'dimensions .* do not exist'):
-            grouped + ds
-        with self.assertRaisesRegexp(ValueError, 'dimensions .* do not exist'):
-            ds + grouped
-        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
-            grouped + 1
-        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
-            grouped + grouped
-        with self.assertRaisesRegexp(TypeError, 'in-place operations'):
-            ds += grouped
-
-        ds = Dataset({'x': ('time', np.arange(100)),
-                      'time': pd.date_range('2000-01-01', periods=100)})
-        with self.assertRaisesRegexp(ValueError, 'incompat.* grouped binary'):
-            ds + ds.groupby('time.month')
-
-    def test_groupby_math_virtual(self):
-        ds = Dataset({'x': ('t', [1, 2, 3])},
-                     {'t': pd.date_range('20100101', periods=3)})
-        grouped = ds.groupby('t.day')
-        actual = grouped - grouped.mean()
-        expected = Dataset({'x': ('t', [0, 0, 0])},
-                           ds[['t', 't.day']])
-        self.assertDatasetIdentical(actual, expected)
-
-    def test_groupby_nan(self):
-        # nan should be excluded from groupby
-        ds = Dataset({'foo': ('x', [1, 2, 3, 4])},
-                     {'bar': ('x', [1, 1, 2, np.nan])})
-        actual = ds.groupby('bar').mean()
-        expected = Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
-        self.assertDatasetIdentical(actual, expected)
-
     def test_resample_and_first(self):
         times = pd.date_range('2000-01-01', freq='6H', periods=10)
         ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
diff --git a/xarray/test/test_groupby.py b/xarray/test/test_groupby.py
index c10e574888c..5b8467cb73a 100644
--- a/xarray/test/test_groupby.py
+++ b/xarray/test/test_groupby.py
@@ -1,8 +1,12 @@
 import numpy as np
+import pandas as pd
+import pytest
+
 import xarray as xr
 from xarray.core.groupby import _consolidate_slices
 
-import pytest
+from . import TestCase, unittest
+from .test_dataset import create_test_data
 
 
 def test_consolidate_slices():
@@ -19,6 +23,420 @@ def test_consolidate_slices():
         _consolidate_slices([slice(3), 4])
 
 
+class TestDatasetGroupBy(TestCase):
+
+    def test_groupby(self):
+        data = xr.Dataset({'z': (['x', 'y'], np.random.randn(3, 5))},
+                          {'x': ('x', list('abc')),
+                           'c': ('x', [0, 1, 0])})
+        groupby = data.groupby('x')
+        self.assertEqual(len(groupby), 3)
+        expected_groups = {'a': 0, 'b': 1, 'c': 2}
+        self.assertEqual(groupby.groups, expected_groups)
+        expected_items = [('a', data.isel(x=0)),
+                          ('b', data.isel(x=1)),
+                          ('c', data.isel(x=2))]
+        for actual, expected in zip(groupby, expected_items):
+            self.assertEqual(actual[0], expected[0])
+            self.assertDatasetEqual(actual[1], expected[1])
+
+        identity = lambda x: x
+        for k in ['x', 'c', 'y']:
+            actual = data.groupby(k, squeeze=False).apply(identity)
+            self.assertDatasetEqual(data, actual)
+
+    def test_groupby_returns_new_type(self):
+        data = xr.Dataset({'z': (['x', 'y'], np.random.randn(3, 5))})
+
+        actual = data.groupby('x').apply(lambda ds: ds['z'])
+        expected = data['z']
+        self.assertDataArrayIdentical(expected, actual)
+
+        actual = data['z'].groupby('x').apply(lambda x: x.to_dataset())
+        expected = data
+        self.assertDatasetIdentical(expected, actual)
+
+    def test_groupby_iter(self):
+        data = create_test_data()
+        for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
+            self.assertEqual(data['dim1'][n], t)
+            self.assertVariableEqual(data['var1'][n], sub['var1'])
+            self.assertVariableEqual(data['var2'][n], sub['var2'])
+            self.assertVariableEqual(data['var3'][:, n], sub['var3'])
+
+    def test_groupby_errors(self):
+        data = create_test_data()
+        with self.assertRaisesRegexp(TypeError, 'must be'):
+            data.groupby(np.arange(10))
+        with self.assertRaisesRegexp(ValueError, 'must have a name'):
+            data.groupby(xr.DataArray(range(10), dims='foo'))
+        with self.assertRaisesRegexp(ValueError, 'length does not match'):
+            data.groupby(data['dim1'][:3])
+        with self.assertRaisesRegexp(TypeError, 'must be'):
+            data.groupby(data.coords['dim1'].to_index())
+
+    def test_groupby_reduce(self):
+        data = xr.Dataset({'xy': (['x', 'y'], np.random.randn(3, 4)),
+                           'xonly': ('x', np.random.randn(3)),
+                           'yonly': ('y', np.random.randn(4)),
+                           'letters': ('y', ['a', 'a', 'b', 'b'])})
+
+        expected = data.mean('y')
+        expected['yonly'] = expected['yonly'].variable.expand_dims({'x': 3})
+        actual = data.groupby('x').mean()
+        self.assertDatasetAllClose(expected, actual)
+
+        actual = data.groupby('x').mean('y')
+        self.assertDatasetAllClose(expected, actual)
+
+        letters = data['letters']
+        expected = xr.Dataset({'xy': data['xy'].groupby(letters).mean(),
+                               'xonly': (data['xonly'].mean().variable
+                                         .expand_dims({'letters': 2})),
+                               'yonly': data['yonly'].groupby(letters).mean()})
+        actual = data.groupby('letters').mean()
+        self.assertDatasetAllClose(expected, actual)
+
+    def test_groupby_math(self):
+        reorder_dims = lambda x: x.transpose('dim1', 'dim2', 'dim3', 'time')
+
+        ds = create_test_data()
+        for squeeze in [True, False]:
+            grouped = ds.groupby('dim1', squeeze=squeeze)
+
+            expected = reorder_dims(ds + ds.coords['dim1'])
+            actual = grouped + ds.coords['dim1']
+            self.assertDatasetIdentical(expected, reorder_dims(actual))
+
+            actual = ds.coords['dim1'] + grouped
+            self.assertDatasetIdentical(expected, reorder_dims(actual))
+
+            ds2 = 2 * ds
+            expected = reorder_dims(ds + ds2)
+            actual = grouped + ds2
+            self.assertDatasetIdentical(expected, reorder_dims(actual))
+
+            actual = ds2 + grouped
+            self.assertDatasetIdentical(expected, reorder_dims(actual))
+
+        grouped = ds.groupby('numbers')
+        zeros = xr.DataArray([0, 0, 0, 0], [('numbers', range(4))])
+        expected = ((ds + xr.Variable('dim3', np.zeros(10)))
+                    .transpose('dim3', 'dim1', 'dim2', 'time'))
+        actual = grouped + zeros
+        self.assertDatasetEqual(expected, actual)
+
+        actual = zeros + grouped
+        self.assertDatasetEqual(expected, actual)
+
+        with self.assertRaisesRegexp(ValueError, 'dimensions .* do not exist'):
+            grouped + ds
+        with self.assertRaisesRegexp(ValueError, 'dimensions .* do not exist'):
+            ds + grouped
+        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
+            grouped + 1
+        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
+            grouped + grouped
+        with self.assertRaisesRegexp(TypeError, 'in-place operations'):
+            ds += grouped
+
+        ds = xr.Dataset({'x': ('time', np.arange(100)),
+                         'time': pd.date_range('2000-01-01', periods=100)})
+        with self.assertRaisesRegexp(ValueError, 'incompat.* grouped binary'):
+            ds + ds.groupby('time.month')
+
+    def test_groupby_math_virtual(self):
+        ds = xr.Dataset({'x': ('t', [1, 2, 3])},
+                        {'t': pd.date_range('20100101', periods=3)})
+        grouped = ds.groupby('t.day')
+        actual = grouped - grouped.mean()
+        expected = xr.Dataset({'x': ('t', [0, 0, 0])},
+                              ds[['t', 't.day']])
+        self.assertDatasetIdentical(actual, expected)
+
+    def test_groupby_nan(self):
+        # nan should be excluded from groupby
+        ds = xr.Dataset({'foo': ('x', [1, 2, 3, 4])},
+                        {'bar': ('x', [1, 1, 2, np.nan])})
+        actual = ds.groupby('bar').mean()
+        expected = xr.Dataset({'foo': ('bar', [1.5, 3]), 'bar': [1, 2]})
+        self.assertDatasetIdentical(actual, expected)
+
+
+class TestDataArrayGroupBy(TestCase):
+
+    def make_groupby_example_array(self):
+        da = xr.DataArray(np.random.RandomState(0).rand(10, 20),
+                          {'abc':  ('y', ['a'] * 9 + ['c'] + ['b'] * 10),
+                           'y': 20 + 100 * np.arange(20)},
+                          ('x', 'y'),
+                          name='foo',
+                          attrs={'attr1': 'value1', 'attr2': 2929})
+        return da
+
+    def test_groupby_iter(self):
+        array = self.make_groupby_example_array()
+        ds = array.to_dataset()
+        for ((act_x, act_dv), (exp_x, exp_ds)) in \
+                zip(array.groupby('y'), ds.groupby('y')):
+            self.assertEqual(exp_x, act_x)
+            self.assertDataArrayIdentical(exp_ds['foo'], act_dv)
+        for ((_, exp_dv), act_dv) in zip(array.groupby('x'), array):
+            self.assertDataArrayIdentical(exp_dv, act_dv)
+
+    def test_groupby_properties(self):
+        grouped = self.make_groupby_example_array().groupby('abc')
+        expected_unique = xr.Variable('abc', ['a', 'b', 'c'])
+        self.assertVariableEqual(expected_unique, grouped.unique_coord)
+        self.assertEqual(3, len(grouped))
+
+    def test_groupby_apply_identity(self):
+        expected = self.make_groupby_example_array()
+        idx = expected.coords['y']
+
+        def identity(x):
+            return x
+
+        for g in ['x', 'y', 'abc', idx]:
+            for shortcut in [False, True]:
+                for squeeze in [False, True]:
+                    grouped = expected.groupby(g, squeeze=squeeze)
+                    actual = grouped.apply(identity, shortcut=shortcut)
+                    self.assertDataArrayIdentical(expected, actual)
+
+    def test_groupby_sum(self):
+        array = self.make_groupby_example_array()
+        grouped = array.groupby('abc')
+
+        expected_sum_all = xr.Dataset(
+            {'foo': (['abc'], np.array([array.values[:, :9].sum(),
+                                        array.values[:, 10:].sum(),
+                                        array.values[:, 9:10].sum()]).T),
+             'abc': (['abc'], np.array(['a', 'b', 'c']))})['foo']
+        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
+        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())
+
+        expected = xr.DataArray([array['y'].values[idx].sum() for idx
+                                 in [slice(9), slice(10, None), slice(9, 10)]],
+                                [['a', 'b', 'c']], ['abc'])
+        actual = array['y'].groupby('abc').apply(np.sum)
+        self.assertDataArrayAllClose(expected, actual)
+        actual = array['y'].groupby('abc').sum()
+        self.assertDataArrayAllClose(expected, actual)
+
+        expected_sum_axis1 = xr.Dataset(
+            {'foo': (['x', 'abc'], np.array([array.values[:, :9].sum(1),
+                                             array.values[:, 10:].sum(1),
+                                             array.values[:, 9:10].sum(1)]).T),
+             'x': array['x'],
+             'abc': (['abc'], np.array(['a', 'b', 'c']))})['foo']
+        self.assertDataArrayAllClose(expected_sum_axis1,
+                                     grouped.reduce(np.sum, 'y'))
+        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))
+
+    def test_groupby_count(self):
+        array = xr.DataArray([0, 0, np.nan, np.nan, 0, 0],
+                          coords={'cat': ('x', ['a', 'b', 'b', 'c', 'c', 'c'])},
+                          dims='x')
+        actual = array.groupby('cat').count()
+        expected = xr.DataArray([1, 1, 2], coords=[('cat', ['a', 'b', 'c'])])
+        self.assertDataArrayIdentical(actual, expected)
+
+    @unittest.skip('needs to be fixed for shortcut=False, keep_attrs=False')
+    def test_groupby_reduce_attrs(self):
+        array = self.make_groupby_example_array()
+        array.attrs['foo'] = 'bar'
+
+        for shortcut in [True, False]:
+            for keep_attrs in [True, False]:
+                print('shortcut=%s, keep_attrs=%s' % (shortcut, keep_attrs))
+                actual = array.groupby('abc').reduce(
+                    np.mean, keep_attrs=keep_attrs, shortcut=shortcut)
+                expected = array.groupby('abc').mean()
+                if keep_attrs:
+                    expected.attrs['foo'] = 'bar'
+                self.assertDataArrayIdentical(expected, actual)
+
+    def test_groupby_apply_center(self):
+        def center(x):
+            return x - np.mean(x)
+
+        array = self.make_groupby_example_array()
+        grouped = array.groupby('abc')
+
+        expected_ds = array.to_dataset()
+        exp_data = np.hstack([center(array.values[:, :9]),
+                              center(array.values[:, 9:10]),
+                              center(array.values[:, 10:])])
+        expected_ds['foo'] = (['x', 'y'], exp_data)
+        expected_centered = expected_ds['foo']
+        self.assertDataArrayAllClose(expected_centered, grouped.apply(center))
+
+    def test_groupby_apply_ndarray(self):
+        # regression test for #326
+        array = self.make_groupby_example_array()
+        grouped = array.groupby('abc')
+        actual = grouped.apply(np.asarray)
+        self.assertDataArrayEqual(array, actual)
+
+    def test_groupby_apply_changes_metadata(self):
+        def change_metadata(x):
+            x.coords['x'] = x.coords['x'] * 2
+            x.attrs['fruit'] = 'lemon'
+            return x
+
+        array = self.make_groupby_example_array()
+        grouped = array.groupby('abc')
+        actual = grouped.apply(change_metadata)
+        expected = array.copy()
+        expected = change_metadata(expected)
+        self.assertDataArrayEqual(expected, actual)
+
+    def test_groupby_math(self):
+        array = self.make_groupby_example_array()
+        for squeeze in [True, False]:
+            grouped = array.groupby('x', squeeze=squeeze)
+
+            expected = array + array.coords['x']
+            actual = grouped + array.coords['x']
+            self.assertDataArrayIdentical(expected, actual)
+
+            actual = array.coords['x'] + grouped
+            self.assertDataArrayIdentical(expected, actual)
+
+            ds = array.coords['x'].to_dataset('X')
+            expected = array + ds
+            actual = grouped + ds
+            self.assertDatasetIdentical(expected, actual)
+
+            actual = ds + grouped
+            self.assertDatasetIdentical(expected, actual)
+
+        grouped = array.groupby('abc')
+        expected_agg = (grouped.mean() - np.arange(3)).rename(None)
+        actual = grouped - xr.DataArray(range(3), [('abc', ['a', 'b', 'c'])])
+        actual_agg = actual.groupby('abc').mean()
+        self.assertDataArrayAllClose(expected_agg, actual_agg)
+
+        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
+            grouped + 1
+        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
+            grouped + grouped
+        with self.assertRaisesRegexp(TypeError, 'in-place operations'):
+            array += grouped
+
+    def test_groupby_math_not_aligned(self):
+        array = xr.DataArray(range(4), {'b': ('x', [0, 0, 1, 1])}, dims='x')
+        other = xr.DataArray([10], dims='b')
+        actual = array.groupby('b') + other
+        expected = xr.DataArray([10, 11, np.nan, np.nan], array.coords)
+        self.assertDataArrayIdentical(expected, actual)
+
+        other = xr.DataArray([10], coords={'c': 123}, dims='b')
+        actual = array.groupby('b') + other
+        expected.coords['c'] = (['x'], [123] * 2 + [np.nan] * 2)
+        self.assertDataArrayIdentical(expected, actual)
+
+        other = xr.Dataset({'a': ('b', [10])})
+        actual = array.groupby('b') + other
+        expected = xr.Dataset({'a': ('x', [10, 11, np.nan, np.nan])},
+                           array.coords)
+        self.assertDatasetIdentical(expected, actual)
+
+    def test_groupby_restore_dim_order(self):
+        array = xr.DataArray(np.random.randn(5, 3),
+                          coords={'a': ('x', range(5)), 'b': ('y', range(3))},
+                          dims=['x', 'y'])
+        for by, expected_dims in [('x', ('x', 'y')),
+                                  ('y', ('x', 'y')),
+                                  ('a', ('a', 'y')),
+                                  ('b', ('x', 'b'))]:
+            result = array.groupby(by).apply(lambda x: x.squeeze())
+            self.assertEqual(result.dims, expected_dims)
+
+    def test_groupby_first_and_last(self):
+        array = xr.DataArray([1, 2, 3, 4, 5], dims='x')
+        by = xr.DataArray(['a'] * 2 + ['b'] * 3, dims='x', name='ab')
+
+        expected = xr.DataArray([1, 3], [('ab', ['a', 'b'])])
+        actual = array.groupby(by).first()
+        self.assertDataArrayIdentical(expected, actual)
+
+        expected = xr.DataArray([2, 5], [('ab', ['a', 'b'])])
+        actual = array.groupby(by).last()
+        self.assertDataArrayIdentical(expected, actual)
+
+        array = xr.DataArray(np.random.randn(5, 3), dims=['x', 'y'])
+        expected = xr.DataArray(array[[0, 2]], {'ab': ['a', 'b']}, ['ab', 'y'])
+        actual = array.groupby(by).first()
+        self.assertDataArrayIdentical(expected, actual)
+
+        actual = array.groupby('x').first()
+        expected = array  # should be a no-op
+        self.assertDataArrayIdentical(expected, actual)
+
+    def make_groupby_multidim_example_array(self):
+        return xr.DataArray([[[0, 1], [2, 3]], [[5, 10], [15, 20]]],
+                        coords={'lon': (['ny', 'nx'], [[30., 40.], [40., 50.]]),
+                                'lat': (['ny', 'nx'],
+                                        [[10., 10.], [20., 20.]])},
+                        dims=['time', 'ny', 'nx'])
+
+    def test_groupby_multidim(self):
+        array = self.make_groupby_multidim_example_array()
+        for dim, expected_sum in [
+                ('lon', xr.DataArray([5, 28, 23],
+                                     coords={'lon': [30., 40., 50.]})),
+                ('lat', xr.DataArray([16, 40], coords={'lat': [10., 20.]}))]:
+            actual_sum = array.groupby(dim).sum()
+            self.assertDataArrayIdentical(expected_sum, actual_sum)
+
+    def test_groupby_multidim_apply(self):
+        array = self.make_groupby_multidim_example_array()
+        actual = array.groupby('lon').apply(
+            lambda x: x - x.mean(), shortcut=False)
+        expected = xr.DataArray([[[-2.5, -6.], [-5., -8.5]],
+                                 [[2.5,  3.], [8., 8.5]]],
+                                coords=array.coords, dims=array.dims)
+        self.assertDataArrayIdentical(expected, actual)
+
+    def test_groupby_bins(self):
+        array = xr.DataArray(np.arange(4), dims='dim_0')
+        # the first value should not be part of any group ("right" binning)
+        array[0] = 99
+        # bins follow conventions for pandas.cut
+        # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
+        bins = [0, 1.5, 5]
+        bin_coords = ['(0, 1.5]', '(1.5, 5]']
+        expected = xr.DataArray([1, 5], dims='dim_0_bins',
+                                coords={'dim_0_bins': bin_coords})
+        # the problem with this is that it overwrites the dimensions of array!
+        #actual = array.groupby('dim_0', bins=bins).sum()
+        actual = array.groupby_bins('dim_0', bins).apply(
+            lambda x: x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+        # make sure original array dims are unchanged
+        # (would fail with shortcut=True above)
+        self.assertEqual(len(array.dim_0), 4)
+
+    def test_groupby_bins_multidim(self):
+        array = self.make_groupby_multidim_example_array()
+        bins = [0, 15, 20]
+        bin_coords = ['(0, 15]', '(15, 20]']
+        expected = xr.DataArray([16, 40], dims='lat_bins',
+                                coords={'lat_bins': bin_coords})
+        actual = array.groupby_bins('lat', bins).apply(
+            lambda x: x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+        # modify the array coordinates to be non-monotonic after unstacking
+        array['lat'].data = np.array([[10., 20.], [20., 10.]])
+        expected = xr.DataArray([28, 28], dims='lat_bins',
+                                coords={'lat_bins': bin_coords})
+        actual = array.groupby_bins('lat', bins).apply(
+            lambda x: x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+
+
 def test_multi_index_groupby_apply():
     # regression test for GH873
     ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(3, 4))},
@@ -43,4 +461,20 @@ def test_multi_index_groupby_sum():
     assert expected.equals(actual)
 
 
-# TODO: move other groupby tests from test_dataset and test_dataarray over here
+class TestMultipleArgumentGroupby(TestCase):
+
+    def test_apply_identity(self):
+        ds = xr.Dataset({'foo': ('x', [1, 2, 3])}, {'y': ('x', list('abc'))})
+        roundtripped = ds.groupby(['x', 'y']).apply(lambda x: x)
+        pandas_roundtripped = xr.Dataset.from_dataframe(
+            ds.to_dataframe().reset_index().groupby(['x', 'y']).apply(lambda x: x))
+        self.assertDatasetIdentical(ds, roundtripped)
+        self.assertDatasetIdentical(ds, pandas_roundtripped)
+
+    def test_sum_identity(self):
+        ds = xr.Dataset({'foo': ('x', [1, 2, 3])}, {'y': ('x', list('abc'))})
+        roundtripped = ds.groupby(['x', 'y']).sum()
+        pandas_roundtripped = xr.Dataset.from_dataframe(
+            ds.to_dataframe().reset_index().groupby(['x', 'y']).sum())
+        self.assertDatasetIdentical(ds, roundtripped)
+        self.assertDatasetIdentical(ds, pandas_roundtripped)