pydata · shoyer · Sep 21, 2017 · Aug 28, 2017 · Aug 28, 2017 · Aug 28, 2017
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -74,6 +74,10 @@ Bug fixes
   ``rtol`` arguments when called on ``DataArray`` objects.
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 
+- Stop repr() and the Jupyter Notebook from automatically computing dask
+  variables (:issue:`1522`).
+  By `Guido Imperiale <https://github.com/crusaderky>`_.
+
 .. _whats-new.0.9.6:
 
 v0.9.6 (8 June 2017)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -447,8 +447,8 @@ def _level_coords(self):
         """
         level_coords = OrderedDict()
         for cname, var in self._coords.items():
-            if var.ndim == 1:
-                level_names = var.to_index_variable().level_names
+            if var.ndim == 1 and isinstance(var, IndexVariable):
+                level_names = var.level_names
                 if level_names is not None:
                     dim, = var.dims
                     level_coords.update({lname: dim for lname in level_names})

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -627,8 +627,8 @@ def _level_coords(self):
         level_coords = OrderedDict()
         for cname in self._coord_names:
             var = self.variables[cname]
-            if var.ndim == 1:
-                level_names = var.to_index_variable().level_names
+            if var.ndim == 1 and isinstance(var, IndexVariable):
+                level_names = var.level_names
                 if level_names is not None:
                     dim, = var.dims
                     level_coords.update({lname: dim for lname in level_names})
@@ -1641,12 +1641,12 @@ def expand_dims(self, dim, axis=None):
         for d in dim:
             if d in self.dims:
                 raise ValueError(
-                            'Dimension {dim} already exists.'.format(dim=d))
+                    'Dimension {dim} already exists.'.format(dim=d))
             if (d in self._variables and
                     not utils.is_scalar(self._variables[d])):
                 raise ValueError(
-                            '{dim} already exists as coordinate or'
-                            ' variable name.'.format(dim=d))
+                    '{dim} already exists as coordinate or'
+                    ' variable name.'.format(dim=d))
 
         if len(dim) != len(set(dim)):
             raise ValueError('dims should not contain duplicate values.')
@@ -1663,7 +1663,7 @@ def expand_dims(self, dim, axis=None):
                             raise IndexError(
                                 'Axis {a} is out of bounds of the expanded'
                                 ' dimension size {dim}.'.format(
-                                               a=a, v=k, dim=result_ndim))
+                                    a=a, v=k, dim=result_ndim))
 
                     axis_pos = [a if a >= 0 else result_ndim + a
                                 for a in axis]
@@ -2980,8 +2980,8 @@ def filter_by_attrs(self, **kwargs):
         for var_name, variable in self.data_vars.items():
             for attr_name, pattern in kwargs.items():
                 attr_value = variable.attrs.get(attr_name)
-                if ((callable(pattern) and pattern(attr_value))
-                        or attr_value == pattern):
+                if ((callable(pattern) and pattern(attr_value)) or
+                        attr_value == pattern):
                     selection.append(var_name)
         return self[selection]
 

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -196,8 +196,8 @@ def format_array_flat(items_ndarray, max_width):
     return pprint_str
 
 
-def _summarize_var_or_coord(name, var, col_width, show_values=True,
-                            marker=' ', max_width=None):
+def summarize_variable(name, var, col_width, show_values=True,
+                       marker=' ', max_width=None):
     if max_width is None:
         max_width = OPTIONS['display_width']
     first_col = pretty_print(u'  %s %s ' % (marker, name), col_width)
@@ -222,38 +222,28 @@ def _summarize_coord_multiindex(coord, col_width, marker):
 def _summarize_coord_levels(coord, col_width, marker=u'-'):
     relevant_coord = coord[:30]
     return u'\n'.join(
-        [_summarize_var_or_coord(lname,
-                                 relevant_coord.get_level_variable(lname),
-                                 col_width, marker=marker)
+        [summarize_variable(lname,
+                            relevant_coord.get_level_variable(lname),
+                            col_width, marker=marker)
          for lname in coord.level_names])
 
 
-def _not_remote(var):
-    """Helper function to identify if array is positively identifiable as
-    coming from a remote source.
-    """
-    source = var.encoding.get('source')
-    if source and source.startswith('http') and not var._in_memory:
-        return False
-    return True
-
-
-def summarize_var(name, var, col_width):
-    show_values = _not_remote(var)
-    return _summarize_var_or_coord(name, var, col_width, show_values)
+def summarize_datavar(name, var, col_width):
+    show_values = var._in_memory
 @property 
 def _in_memory(self): 
     return (isinstance(self._data, (np.ndarray, PandasIndexAdapter)) or 
             (isinstance(self._data, indexing.MemoryCachedArray) and 
              isinstance(self._data.array, np.ndarray))) 
 @property 
 def _in_memory(self): 
     return (isinstance(self._data, (np.ndarray, PandasIndexAdapter)) or 
             (isinstance(self._data, indexing.MemoryCachedArray) and 
              isinstance(self._data.array, np.ndarray))) 
+    return summarize_variable(name, var.variable, col_width, show_values)
 
 
 def summarize_coord(name, var, col_width):
     is_index = name in var.dims
-    show_values = is_index or _not_remote(var)
+    show_values = var._in_memory
     marker = u'*' if is_index else u' '
     if is_index:
         coord = var.variable.to_index_variable()
         if coord.level_names is not None:
             return u'\n'.join(
                 [_summarize_coord_multiindex(coord, col_width, marker),
                  _summarize_coord_levels(coord, col_width)])
-    return _summarize_var_or_coord(name, var, col_width, show_values, marker)
+    return summarize_variable(name, var.variable, col_width, show_values, marker)
 
 
 def summarize_attr(key, value, col_width=None):
@@ -307,7 +297,7 @@ def _mapping_repr(mapping, title, summarizer, col_width=None):
 
 
 data_vars_repr = functools.partial(_mapping_repr, title=u'Data variables',
-                                   summarizer=summarize_var)
+                                   summarizer=summarize_datavar)
 
 
 attrs_repr = functools.partial(_mapping_repr, title=u'Attributes',

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -113,7 +113,7 @@ def merge_variables(
         list_of_variables_dicts,  # type: List[Mapping[Any, Variable]]
         priority_vars=None,       # type: Optional[Mapping[Any, Variable]]
         compat='minimal',         # type: str
-        ):
+):
     # type: (...) -> OrderedDict[Any, Variable]
     """Merge dicts of variables, while resolving conflicts appropriately.
 
@@ -180,7 +180,7 @@ def expand_variable_dicts(list_of_variable_dicts):
     Parameters
     ----------
     list_of_variable_dicts : list of dict or Dataset objects
-        The each value for the mappings must be of the following types:
+        Each value for the mappings must be of the following types:
         - an xarray.Variable
         - a tuple `(dims, data[, attrs[, encoding]])` that can be converted in
           an xarray.Variable

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -284,7 +284,7 @@ def nbytes(self):
 
     @property
     def _in_memory(self):
-        return (isinstance(self._data, (np.ndarray, PandasIndexAdapter)) or
+        return (isinstance(self._data, (np.ndarray, np.number, PandasIndexAdapter)) or
                 (isinstance(self._data, indexing.MemoryCachedArray) and
                  isinstance(self._data.array, np.ndarray)))
 
@@ -1189,6 +1189,7 @@ def func(self, other):
             return self
         return func
 
+
 ops.inject_all_ops_and_reduce_methods(Variable)
 
 
@@ -1353,6 +1354,7 @@ def name(self):
     def name(self, value):
         raise AttributeError('cannot modify name of IndexVariable in-place')
 
+
 # for backwards compatibility
 Coordinate = utils.alias(IndexVariable, 'Coordinate')
 

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -1,7 +1,9 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import pickle
+from textwrap import dedent
 import numpy as np
 import pandas as pd
 
@@ -130,6 +132,25 @@ def test_binary_op(self):
         self.assertLazyAndIdentical(u + u, v + v)
         self.assertLazyAndIdentical(u[0] + u, v[0] + v)
 
+    def test_repr(self):
+        expected = dedent("""\
+        <xarray.Variable (x: 4, y: 6)>
+        dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2)>""")
+        self.assertEqual(expected, repr(self.lazy_var))
+
+    def test_pickle(self):
+        # Test that pickling/unpickling does not convert the dask
+        # backend to numpy
+        a1 = Variable(['x'], build_dask_array('x'))
+        a1.compute()
+        self.assertFalse(a1._in_memory)
+        self.assertEquals(kernel_call_count, 1)
+        a2 = pickle.loads(pickle.dumps(a1))
+        self.assertEquals(kernel_call_count, 1)
+        self.assertVariableIdentical(a1, a2)
+        self.assertFalse(a1._in_memory)
+        self.assertFalse(a2._in_memory)
+
     def test_reduce(self):
         u = self.eager_var
         v = self.lazy_var
@@ -341,47 +362,98 @@ def test_dot(self):
         lazy = self.lazy_array.dot(self.lazy_array[0])
         self.assertLazyAndAllClose(eager, lazy)
 
-    def test_variable_pickle(self):
-        # Test that pickling/unpickling does not convert the dask
-        # backend to numpy
-        a1 = Variable(['x'], build_dask_array())
-        a1.compute()
-        self.assertFalse(a1._in_memory)
-        self.assertEquals(kernel_call_count, 1)
-        a2 = pickle.loads(pickle.dumps(a1))
-        self.assertEquals(kernel_call_count, 1)
-        self.assertVariableIdentical(a1, a2)
-        self.assertFalse(a1._in_memory)
-        self.assertFalse(a2._in_memory)
+    def test_dataarray_repr(self):
+        # Test that __repr__ converts the dask backend to numpy
+        # in neither the data variable nor the non-index coords
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
+        expected = dedent("""\
+        <xarray.DataArray 'data' (x: 1)>
+        dask.array<data, shape=(1,), dtype=int64, chunksize=(1,)>
+        Coordinates:
+            y        (x) int64 ...
+        Dimensions without coordinates: x""")
+        self.assertEqual(expected, repr(a))
+        self.assertEquals(kernel_call_count, 0)
+
+    def test_dataset_repr(self):
+        # Test that pickling/unpickling converts the dask backend
+        # to numpy in neither the data variables nor the non-index coords
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        ds = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)})
+        expected = dedent("""\
+        <xarray.Dataset>
+        Dimensions:  (x: 1)
+        Coordinates:
+            y        (x) int64 ...
+        Dimensions without coordinates: x
+        Data variables:
+            a        (x) int64 ...""")
+        self.assertEqual(expected, repr(ds))
+        self.assertEquals(kernel_call_count, 0)
 
     def test_dataarray_pickle(self):
-        # Test that pickling/unpickling does not convert the dask
-        # backend to numpy
-        a1 = DataArray(build_dask_array())
+        # Test that pickling/unpickling converts the dask backend
+        # to numpy in neither the data variable nor the non-index coords
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        a1 = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
         a1.compute()
         self.assertFalse(a1._in_memory)
-        self.assertEquals(kernel_call_count, 1)
+        self.assertFalse(a1.coords['y']._in_memory)
+        self.assertEquals(kernel_call_count, 2)
         a2 = pickle.loads(pickle.dumps(a1))
-        self.assertEquals(kernel_call_count, 1)
+        self.assertEquals(kernel_call_count, 2)
         self.assertDataArrayIdentical(a1, a2)
         self.assertFalse(a1._in_memory)
         self.assertFalse(a2._in_memory)
+        self.assertFalse(a1.coords['y']._in_memory)
+        self.assertFalse(a2.coords['y']._in_memory)
 
     def test_dataset_pickle(self):
-        ds1 = Dataset({'a': DataArray(build_dask_array())})
+        # Test that pickling/unpickling converts the dask backend
+        # to numpy in neither the data variables nor the non-index coords
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        ds1 = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)})
         ds1.compute()
         self.assertFalse(ds1['a']._in_memory)
-        self.assertEquals(kernel_call_count, 1)
+        self.assertFalse(ds1['y']._in_memory)
+        self.assertEquals(kernel_call_count, 2)
         ds2 = pickle.loads(pickle.dumps(ds1))
-        self.assertEquals(kernel_call_count, 1)
+        self.assertEquals(kernel_call_count, 2)
         self.assertDatasetIdentical(ds1, ds2)
         self.assertFalse(ds1['a']._in_memory)
         self.assertFalse(ds2['a']._in_memory)
+        self.assertFalse(ds1['y']._in_memory)
+        self.assertFalse(ds2['y']._in_memory)
+
+    def test_dataarray_getattr(self):
+        # ipython/jupyter does a long list of getattr() calls to when trying to
+        # represent an object. Make sure we're not accidentally computing dask variables.
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
+        with suppress(AttributeError):
+            getattr(a, 'NOTEXIST')
+        self.assertEquals(kernel_call_count, 0)
+
+    def test_dataset_getattr(self):
+        # Test that pickling/unpickling converts the dask backend
+        # to numpy in neither the data variables nor the non-index coords
+        data = build_dask_array('data')
+        nonindex_coord = build_dask_array('coord')
+        ds = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)})
+        with suppress(AttributeError):
+            getattr(ds, 'NOTEXIST')
+        self.assertEquals(kernel_call_count, 0)
 
     def test_values(self):
         # Test that invoking the values property does not convert the dask
         # backend to numpy
-        a = DataArray([1,2]).chunk()
+        a = DataArray([1, 2]).chunk()
         self.assertFalse(a._in_memory)
         self.assertEquals(a.values.tolist(), [1, 2])
         self.assertFalse(a._in_memory)
@@ -395,17 +467,20 @@ def test_from_dask_variable(self):
 
 
 kernel_call_count = 0
+
+
 def kernel():
-    """Dask kernel to test pickling/unpickling.
+    """Dask kernel to test pickling/unpickling and __repr__.
     Must be global to make it pickleable.
     """
     global kernel_call_count
     kernel_call_count += 1
-    return np.ones(1)
+    return np.ones(1, dtype=np.int64)
+
 
-def build_dask_array():
+def build_dask_array(name):
     global kernel_call_count
     kernel_call_count = 0
     return dask.array.Array(
-        dask={('foo', 0): (kernel, )}, name='foo',
-        chunks=((1,),), dtype=int)
+        dask={(name, 0): (kernel, )}, name=name,
+        chunks=((1,),), dtype=np.int64)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -2596,6 +2596,7 @@ def da(request):
             [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7],
             dims='time')
 
+
 def test_rolling_iter(da):
 
     rolling_obj = da.rolling(time=7)
@@ -2698,6 +2699,7 @@ def test_rolling_pandas_compat(da, center, window, min_periods):
     np.testing.assert_allclose(s_rolling.index,
                                da_rolling['index'])
 
+
 @pytest.mark.parametrize('da', (1, 2), indirect=True)
 @pytest.mark.parametrize('center', (True, False))
 @pytest.mark.parametrize('min_periods', (None, 1, 2, 3))