DOC: update the pd.DataFrame.memory_usage/empty docstring(Seoul) (#20102)

ohahohah · jorisvandenbossche · commit bf9e4f3b1aad · 2018-03-15T22:58:34.000+01:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2099,32 +2099,88 @@ def _sizeof_fmt(num, size_qualifier):
         fmt.buffer_put_lines(buf, lines)
 
     def memory_usage(self, index=True, deep=False):
-        """Memory usage of DataFrame columns.
+        """
+        Return the memory usage of each column in bytes.
+
+        The memory usage can optionally include the contribution of
+        the index and elements of `object` dtype.
+
+        This value is displayed in `DataFrame.info` by default. This can be
+        suppressed by setting ``pandas.options.display.memory_usage`` to False.
 
         Parameters
         ----------
-        index : bool
-            Specifies whether to include memory usage of DataFrame's
-            index in returned Series. If `index=True` (default is False)
-            the first index of the Series is `Index`.
-        deep : bool
-            Introspect the data deeply, interrogate
-            `object` dtypes for system-level memory consumption
+        index : bool, default True
+            Specifies whether to include the memory usage of the DataFrame's
+            index in returned Series. If ``index=True`` the memory usage of the
+            index the first item in the output.
+        deep : bool, default False
+            If True, introspect the data deeply by interrogating
+            `object` dtypes for system-level memory consumption, and include
+            it in the returned values.
 
         Returns
         -------
         sizes : Series
-            A series with column names as index and memory usage of
-            columns with units of bytes.
-
-        Notes
-        -----
-        Memory usage does not include memory consumed by elements that
-        are not components of the array if deep=False
+            A Series whose index is the original column names and whose values
+            is the memory usage of each column in bytes.
 
         See Also
         --------
-        numpy.ndarray.nbytes
+        numpy.ndarray.nbytes : Total bytes consumed by the elements of an
+            ndarray.
+        Series.memory_usage : Bytes consumed by a Series.
+        pandas.Categorical : Memory-efficient array for string values with
+            many repeated values.
+        DataFrame.info : Concise summary of a DataFrame.
+
+        Examples
+        --------
+        >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
+        >>> data = dict([(t, np.ones(shape=5000).astype(t))
+        ...              for t in dtypes])
+        >>> df = pd.DataFrame(data)
+        >>> df.head()
+           int64  float64  complex128 object  bool
+        0      1      1.0      (1+0j)      1  True
+        1      1      1.0      (1+0j)      1  True
+        2      1      1.0      (1+0j)      1  True
+        3      1      1.0      (1+0j)      1  True
+        4      1      1.0      (1+0j)      1  True
+
+        >>> df.memory_usage()
+        Index            80
+        int64         40000
+        float64       40000
+        complex128    80000
+        object        40000
+        bool           5000
+        dtype: int64
+
+        >>> df.memory_usage(index=False)
+        int64         40000
+        float64       40000
+        complex128    80000
+        object        40000
+        bool           5000
+        dtype: int64
+
+        The memory footprint of `object` dtype columns is ignored by default:
+
+        >>> df.memory_usage(deep=True)
+        Index             80
+        int64          40000
+        float64        40000
+        complex128     80000
+        object        160000
+        bool            5000
+        dtype: int64
+
+        Use a Categorical for efficient storage of an object-dtype column with
+        many repeated values.
+
+        >>> df['object'].astype('category').memory_usage(deep=True)
+        5168
         """
         result = Series([c.memory_usage(index=False, deep=deep)
                          for col, c in self.iteritems()], index=self.columns)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1489,12 +1489,20 @@ def __contains__(self, key):
 
     @property
     def empty(self):
-        """True if NDFrame is entirely empty [no items], meaning any of the
+        """
+        Indicator whether DataFrame is empty.
+
+        True if DataFrame is entirely empty (no items), meaning any of the
         axes are of length 0.
 
+        Returns
+        -------
+        bool
+            If DataFrame is empty, return True, if not return False.
+
         Notes
         -----
-        If NDFrame contains only NaNs, it is still not considered empty. See
+        If DataFrame contains only NaNs, it is still not considered empty. See
         the example below.
 
         Examples