diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a66d00fff9714..af3d5a0f93cce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1943,32 +1943,88 @@ def _sizeof_fmt(num, size_qualifier): _put_lines(buf, lines) def memory_usage(self, index=True, deep=False): - """Memory usage of DataFrame columns. + """ + Return the memory usage of each column in bytes. + + The memory usage can optionally include the contribution of + the index and elements of `object` dtype. + + This value is displayed in `DataFrame.info` by default. This can be + suppressed by setting ``pandas.options.display.memory_usage`` to False. Parameters ---------- - index : bool - Specifies whether to include memory usage of DataFrame's - index in returned Series. If `index=True` (default is False) - the first index of the Series is `Index`. - deep : bool - Introspect the data deeply, interrogate - `object` dtypes for system-level memory consumption + index : bool, default True + Specifies whether to include the memory usage of the DataFrame's + index in returned Series. If ``index=True`` the memory usage of the + index the first item in the output. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned values. Returns ------- sizes : Series - A series with column names as index and memory usage of - columns with units of bytes. - - Notes - ----- - Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + A Series whose index is the original column names and whose values + is the memory usage of each column in bytes. See Also -------- - numpy.ndarray.nbytes + numpy.ndarray.nbytes : Total bytes consumed by the elements of an + ndarray. + Series.memory_usage : Bytes consumed by a Series. + pandas.Categorical : Memory-efficient array for string values with + many repeated values. + DataFrame.info : Concise summary of a DataFrame. + + Examples + -------- + >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool'] + >>> data = dict([(t, np.ones(shape=5000).astype(t)) + ... for t in dtypes]) + >>> df = pd.DataFrame(data) + >>> df.head() + int64 float64 complex128 object bool + 0 1 1.0 (1+0j) 1 True + 1 1 1.0 (1+0j) 1 True + 2 1 1.0 (1+0j) 1 True + 3 1 1.0 (1+0j) 1 True + 4 1 1.0 (1+0j) 1 True + + >>> df.memory_usage() + Index 80 + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + >>> df.memory_usage(index=False) + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + The memory footprint of `object` dtype columns is ignored by default: + + >>> df.memory_usage(deep=True) + Index 80 + int64 40000 + float64 40000 + complex128 80000 + object 160000 + bool 5000 + dtype: int64 + + Use a Categorical for efficient storage of an object-dtype column with + many repeated values. + + >>> df['object'].astype('category').memory_usage(deep=True) + 5168 """ result = Series([c.memory_usage(index=False, deep=deep) for col, c in self.iteritems()], index=self.columns) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..4a2698290166f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1436,12 +1436,20 @@ def __contains__(self, key): @property def empty(self): - """True if NDFrame is entirely empty [no items], meaning any of the + """ + Indicator whether DataFrame is empty. + + True if DataFrame is entirely empty (no items), meaning any of the axes are of length 0. + Returns + ------- + bool + If DataFrame is empty, return True, if not return False. + Notes ----- - If NDFrame contains only NaNs, it is still not considered empty. See + If DataFrame contains only NaNs, it is still not considered empty. See the example below. Examples