@@ -2099,32 +2099,88 @@ def _sizeof_fmt(num, size_qualifier):
2099
2099
fmt .buffer_put_lines (buf , lines )
2100
2100
2101
2101
def memory_usage (self , index = True , deep = False ):
2102
- """Memory usage of DataFrame columns.
2102
+ """
2103
+ Return the memory usage of each column in bytes.
2104
+
2105
+ The memory usage can optionally include the contribution of
2106
+ the index and elements of `object` dtype.
2107
+
2108
+ This value is displayed in `DataFrame.info` by default. This can be
2109
+ suppressed by setting ``pandas.options.display.memory_usage`` to False.
2103
2110
2104
2111
Parameters
2105
2112
----------
2106
- index : bool
2107
- Specifies whether to include memory usage of DataFrame's
2108
- index in returned Series. If `index=True` (default is False)
2109
- the first index of the Series is `Index`.
2110
- deep : bool
2111
- Introspect the data deeply, interrogate
2112
- `object` dtypes for system-level memory consumption
2113
+ index : bool, default True
2114
+ Specifies whether to include the memory usage of the DataFrame's
2115
+ index in returned Series. If ``index=True`` the memory usage of the
2116
+ index the first item in the output.
2117
+ deep : bool, default False
2118
+ If True, introspect the data deeply by interrogating
2119
+ `object` dtypes for system-level memory consumption, and include
2120
+ it in the returned values.
2113
2121
2114
2122
Returns
2115
2123
-------
2116
2124
sizes : Series
2117
- A series with column names as index and memory usage of
2118
- columns with units of bytes.
2119
-
2120
- Notes
2121
- -----
2122
- Memory usage does not include memory consumed by elements that
2123
- are not components of the array if deep=False
2125
+ A Series whose index is the original column names and whose values
2126
+ is the memory usage of each column in bytes.
2124
2127
2125
2128
See Also
2126
2129
--------
2127
- numpy.ndarray.nbytes
2130
+ numpy.ndarray.nbytes : Total bytes consumed by the elements of an
2131
+ ndarray.
2132
+ Series.memory_usage : Bytes consumed by a Series.
2133
+ pandas.Categorical : Memory-efficient array for string values with
2134
+ many repeated values.
2135
+ DataFrame.info : Concise summary of a DataFrame.
2136
+
2137
+ Examples
2138
+ --------
2139
+ >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
2140
+ >>> data = dict([(t, np.ones(shape=5000).astype(t))
2141
+ ... for t in dtypes])
2142
+ >>> df = pd.DataFrame(data)
2143
+ >>> df.head()
2144
+ int64 float64 complex128 object bool
2145
+ 0 1 1.0 (1+0j) 1 True
2146
+ 1 1 1.0 (1+0j) 1 True
2147
+ 2 1 1.0 (1+0j) 1 True
2148
+ 3 1 1.0 (1+0j) 1 True
2149
+ 4 1 1.0 (1+0j) 1 True
2150
+
2151
+ >>> df.memory_usage()
2152
+ Index 80
2153
+ int64 40000
2154
+ float64 40000
2155
+ complex128 80000
2156
+ object 40000
2157
+ bool 5000
2158
+ dtype: int64
2159
+
2160
+ >>> df.memory_usage(index=False)
2161
+ int64 40000
2162
+ float64 40000
2163
+ complex128 80000
2164
+ object 40000
2165
+ bool 5000
2166
+ dtype: int64
2167
+
2168
+ The memory footprint of `object` dtype columns is ignored by default:
2169
+
2170
+ >>> df.memory_usage(deep=True)
2171
+ Index 80
2172
+ int64 40000
2173
+ float64 40000
2174
+ complex128 80000
2175
+ object 160000
2176
+ bool 5000
2177
+ dtype: int64
2178
+
2179
+ Use a Categorical for efficient storage of an object-dtype column with
2180
+ many repeated values.
2181
+
2182
+ >>> df['object'].astype('category').memory_usage(deep=True)
2183
+ 5168
2128
2184
"""
2129
2185
result = Series ([c .memory_usage (index = False , deep = deep )
2130
2186
for col , c in self .iteritems ()], index = self .columns )
0 commit comments