From ad7f06fc58be0d0c63be381b04d5fcd49ce447e5 Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Fri, 9 Mar 2018 22:22:12 +0100 Subject: [PATCH 1/6] Docstring for memory_usage now follows Pandas convention. --- pandas/core/series.py | 48 +++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 069f0372ab6e1..1d8c2f0fa1bdf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2696,28 +2696,50 @@ def reindex_axis(self, labels, axis=0, **kwargs): return self.reindex(index=labels, **kwargs) def memory_usage(self, index=True, deep=False): - """Memory usage of the Series + """ + Return the memory usage of the Series. + + The memory usage can optionally include the contribution of + the index and of elements of `object` dtype. Parameters ---------- - index : bool - Specifies whether to include memory usage of Series index - deep : bool - Introspect the data deeply, interrogate - `object` dtypes for system-level memory consumption + index : bool, default True + Specifies whether to include memory usage of Series index. + deep : bool, default False + Specifies whether to introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption, and include + it in the returned value. Returns ------- - scalar bytes of memory consumed - - Notes - ----- - Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + int + Bytes of memory consumed. See Also -------- - numpy.ndarray.nbytes + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + + Examples + -------- + + >>> s = pd.Series(range(3)) + >>> s.memory_usage() + 104 + + Not including the index gives the size of the rest of the data: + + >>> s.memory_usage(index=False) + 24 + + The memory footprint of `object` values is ignored by default: + + >>> s = pd.Series(object()) + >>> s.memory_usage() + 88 + >>> s.memory_usage(deep=True) # Footprint of object() included + 104 """ v = super(Series, self).memory_usage(deep=deep) if index: From 601ffd76eca95f674c928fceb9d7af0d1d97c18f Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Fri, 9 Mar 2018 22:27:31 +0100 Subject: [PATCH 2/6] Clearer wording. --- pandas/core/series.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1d8c2f0fa1bdf..2125393d0b1ca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2705,9 +2705,9 @@ def memory_usage(self, index=True, deep=False): Parameters ---------- index : bool, default True - Specifies whether to include memory usage of Series index. + Specifies whether to include the memory usage of the Series index. deep : bool, default False - Specifies whether to introspect the data deeply, interrogate + If True, introspect the data deeply by interrogating `object` dtypes for system-level memory consumption, and include it in the returned value. @@ -2728,7 +2728,8 @@ def memory_usage(self, index=True, deep=False): >>> s.memory_usage() 104 - Not including the index gives the size of the rest of the data: + Not including the index gives the size of the rest of the data, which + is necessarily smaller: >>> s.memory_usage(index=False) 24 @@ -2736,6 +2737,9 @@ def memory_usage(self, index=True, deep=False): The memory footprint of `object` values is ignored by default: >>> s = pd.Series(object()) + >>> s + 0 + dtype: object >>> s.memory_usage() 88 >>> s.memory_usage(deep=True) # Footprint of object() included From da08897e6aa34955d36fe7aa4e0f82f93443e87e Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Fri, 9 Mar 2018 22:28:14 +0100 Subject: [PATCH 3/6] Removed useless concrete address. --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2125393d0b1ca..be8d88b00122d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2738,7 +2738,7 @@ def memory_usage(self, index=True, deep=False): >>> s = pd.Series(object()) >>> s - 0 + 0 dtype: object >>> s.memory_usage() 88 From 6c0205dd61cc2052875dc979a0ad5c18861c6e17 Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Fri, 9 Mar 2018 22:30:44 +0100 Subject: [PATCH 4/6] Slightly more realistic example. --- pandas/core/series.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index be8d88b00122d..96605ed172337 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2736,14 +2736,15 @@ def memory_usage(self, index=True, deep=False): The memory footprint of `object` values is ignored by default: - >>> s = pd.Series(object()) + >>> class MyClass: pass + >>> s = pd.Series(MyClass()) >>> s - 0 + 0 <__main__.MyClass object at ...> dtype: object >>> s.memory_usage() 88 - >>> s.memory_usage(deep=True) # Footprint of object() included - 104 + >>> s.memory_usage(deep=True) + 120 """ v = super(Series, self).memory_usage(deep=deep) if index: From 6626d67839b620200ba6ab45aba7f0a980104bc9 Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Sat, 10 Mar 2018 17:20:45 +0100 Subject: [PATCH 5/6] Added related function. --- pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 96605ed172337..393bb3ab027e9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2720,6 +2720,7 @@ def memory_usage(self, index=True, deep=False): -------- numpy.ndarray.nbytes : Total bytes consumed by the elements of the array. + DataFrame.memory_usage : Bytes consumed by a DataFrame. Examples -------- From cea218f6adfc335272b7c8ff9923207e51212b7c Mon Sep 17 00:00:00 2001 From: "Eric O. LEBIGOT (EOL)" Date: Sat, 10 Mar 2018 17:26:29 +0100 Subject: [PATCH 6/6] More common example of a Series of objects. Also an interesting Pandas gotcha. --- pandas/core/series.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 393bb3ab027e9..99ae07f2d006d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2737,15 +2737,13 @@ def memory_usage(self, index=True, deep=False): The memory footprint of `object` values is ignored by default: - >>> class MyClass: pass - >>> s = pd.Series(MyClass()) - >>> s - 0 <__main__.MyClass object at ...> - dtype: object + >>> s = pd.Series(["a", "b"]) + >>> s.values + array(['a', 'b'], dtype=object) >>> s.memory_usage() - 88 + 96 >>> s.memory_usage(deep=True) - 120 + 212 """ v = super(Series, self).memory_usage(deep=deep) if index: