From 3474043a4a8511523e71479e8c3dee4d8b31b6e7 Mon Sep 17 00:00:00 2001 From: Manju080 Date: Thu, 6 Mar 2025 17:16:58 +0000 Subject: [PATCH 01/11] DOC: Update warning in Index.values docstring to clarify index modification issues (#60954) --- pandas/core/indexes/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 852049804a4f5..3342e67f111ca 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4912,6 +4912,10 @@ def values(self) -> ArrayLike: :meth:`Index.to_numpy`, depending on whether you need a reference to the underlying data or a NumPy array. + Modifying 'Index.values' directly is not supported and can lead to memory + corruption or segmentation faults. This is because 'Index.values' provides + a direct reference to internal NumPy data. + Returns ------- array: numpy.ndarray or ExtensionArray From d070b066ffd6595816bee39eb5acffe147829067 Mon Sep 17 00:00:00 2001 From: Manju080 Date: Fri, 7 Mar 2025 18:34:26 +0000 Subject: [PATCH 02/11] DOC: Update warning in Index.values docstring to clarify index modification issues (#60954) with changes --- pandas/core/indexes/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3342e67f111ca..3ad788507dab8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4912,9 +4912,8 @@ def values(self) -> ArrayLike: :meth:`Index.to_numpy`, depending on whether you need a reference to the underlying data or a NumPy array. - Modifying 'Index.values' directly is not supported and can lead to memory - corruption or segmentation faults. This is because 'Index.values' provides - a direct reference to internal NumPy data. + .. versionchanged:: 3.0.0 + The returned array is read-only. Returns ------- From b00ba12da01c6f96541b6400f112fea9fb673e84 Mon Sep 17 00:00:00 2001 From: Manjunath L <84699147+Manju080@users.noreply.github.com> Date: Sat, 8 Mar 2025 10:19:51 +0530 Subject: [PATCH 03/11] Update pandas/core/indexes/base.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/core/indexes/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3ad788507dab8..fe89a524735ce 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4912,8 +4912,9 @@ def values(self) -> ArrayLike: :meth:`Index.to_numpy`, depending on whether you need a reference to the underlying data or a NumPy array. - .. versionchanged:: 3.0.0 - The returned array is read-only. + .. versionchanged:: 3.0.0 + + The returned array is read-only. Returns ------- From 390c8be44e5320bd4d761fde8c98fcd099fd05ff Mon Sep 17 00:00:00 2001 From: Manju080 Date: Mon, 10 Mar 2025 17:10:58 +0000 Subject: [PATCH 04/11] DOC : Fixing the whitespace which was causing error --- pandas/core/indexes/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fe89a524735ce..2993837e41edb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4912,9 +4912,9 @@ def values(self) -> ArrayLike: :meth:`Index.to_numpy`, depending on whether you need a reference to the underlying data or a NumPy array. - .. versionchanged:: 3.0.0 - - The returned array is read-only. + .. versionchanged:: 3.0.0 + + The returned array is read-only. Returns ------- From e58f383ad20a1501b200301262090e1e43c7b0ae Mon Sep 17 00:00:00 2001 From: Manju080 Date: Tue, 11 Mar 2025 14:43:27 +0000 Subject: [PATCH 05/11] Fixed docstring validation and formatting issues --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2993837e41edb..855425c3cb636 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4913,7 +4913,7 @@ def values(self) -> ArrayLike: a reference to the underlying data or a NumPy array. .. versionchanged:: 3.0.0 - + The returned array is read-only. Returns From a505d35d92bc0ce22367980fee12ccc2de3f0c24 Mon Sep 17 00:00:00 2001 From: Manju080 Date: Wed, 9 Apr 2025 18:52:15 +0000 Subject: [PATCH 06/11] BUG: Fix array creation for string dtype with inconsistent list lengths (#61155) --- pandas/core/arrays/string_.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b3aa782341c77..d8823290a1473 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -639,7 +639,7 @@ def _from_sequence( dtype = StringDtype(storage="python") from pandas.core.arrays.masked import BaseMaskedArray - + na_value = dtype.na_value if isinstance(scalars, BaseMaskedArray): # avoid costly conversion to object dtype @@ -655,6 +655,8 @@ def _from_sequence( # zero_copy_only to True which caused problems see GH#52076 scalars = np.array(scalars) # convert non-na-likes to str, and nan-likes to StringDtype().na_value + if isinstance(scalars, list) and all(isinstance(x,list) for x in scalars): + scalars =[str(x) for x in scalars] result = lib.ensure_string_array(scalars, na_value=na_value, copy=copy) # Manually creating new array avoids the validation step in the __init__, so is From 6f5c4d473bf68d342d7fb323198354393ba4826b Mon Sep 17 00:00:00 2001 From: Manju080 Date: Wed, 9 Apr 2025 18:59:51 +0000 Subject: [PATCH 07/11] BUG: Fix array creation for string dtype with inconsistent list lengths (#61155) --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d8823290a1473..1622a392e284c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -655,8 +655,8 @@ def _from_sequence( # zero_copy_only to True which caused problems see GH#52076 scalars = np.array(scalars) # convert non-na-likes to str, and nan-likes to StringDtype().na_value - if isinstance(scalars, list) and all(isinstance(x,list) for x in scalars): - scalars =[str(x) for x in scalars] + if isinstance(scalars, list) and all(isinstance(x, list) for x in scalars): + scalars = [str(x) for x in scalars] result = lib.ensure_string_array(scalars, na_value=na_value, copy=copy) # Manually creating new array avoids the validation step in the __init__, so is From fc4653df7f85f74f5513aca96806804e3c3daee6 Mon Sep 17 00:00:00 2001 From: Manju080 Date: Tue, 15 Apr 2025 16:30:17 +0000 Subject: [PATCH 08/11] BUG fix GH#61155 v2 --- pandas/_libs/lib.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index de7d9af731010..8bc696285c939 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -769,7 +769,10 @@ cpdef ndarray[object] ensure_string_array( return out arr = arr.to_numpy(dtype=object) elif not util.is_array(arr): - arr = np.array(arr, dtype="object") + # GH#61155: Guarantee a 1-d result when array is a list of lists + input_arr = arr + arr = np.empty(len(arr), dtype="object") + arr[:] = input_arr result = np.asarray(arr, dtype="object") From ae36cf7acb439b17e288664038c78d0374a8de9d Mon Sep 17 00:00:00 2001 From: Manju080 Date: Tue, 15 Apr 2025 17:02:14 +0000 Subject: [PATCH 09/11] BUG fix GH#61155 with test case for list of lists handling --- pandas/tests/arrays/test_string_array.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 pandas/tests/arrays/test_string_array.py diff --git a/pandas/tests/arrays/test_string_array.py b/pandas/tests/arrays/test_string_array.py new file mode 100644 index 0000000000000..4a4db327d232f --- /dev/null +++ b/pandas/tests/arrays/test_string_array.py @@ -0,0 +1,5 @@ +import pandas as pd +print(pd.array([list('test'), list('words')], dtype='string')) +print(pd.array([list('test'), list('word')], dtype='string')) + + From fb965e7ce0ccac3c03682c243fd28de33aac7bce Mon Sep 17 00:00:00 2001 From: Manju080 Date: Wed, 16 Apr 2025 17:48:45 +0000 Subject: [PATCH 10/11] Fix formatting in test_string_array.py (pre-commit autofix) --- pandas/tests/arrays/test_string_array.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/arrays/test_string_array.py b/pandas/tests/arrays/test_string_array.py index 4a4db327d232f..e5e856ec9e161 100644 --- a/pandas/tests/arrays/test_string_array.py +++ b/pandas/tests/arrays/test_string_array.py @@ -1,5 +1,3 @@ import pandas as pd print(pd.array([list('test'), list('words')], dtype='string')) -print(pd.array([list('test'), list('word')], dtype='string')) - - +print(pd.array([list('test'), list('word')], dtype='string')) \ No newline at end of file From d3bbeaffe3784535be7d3ebfbef10bf4b6986208 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Apr 2025 17:58:10 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/arrays/string_.py | 2 +- pandas/tests/arrays/test_string_array.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 1622a392e284c..b5bca7675c350 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -639,7 +639,7 @@ def _from_sequence( dtype = StringDtype(storage="python") from pandas.core.arrays.masked import BaseMaskedArray - + na_value = dtype.na_value if isinstance(scalars, BaseMaskedArray): # avoid costly conversion to object dtype diff --git a/pandas/tests/arrays/test_string_array.py b/pandas/tests/arrays/test_string_array.py index e5e856ec9e161..66f432a73d79d 100644 --- a/pandas/tests/arrays/test_string_array.py +++ b/pandas/tests/arrays/test_string_array.py @@ -1,3 +1,4 @@ import pandas as pd -print(pd.array([list('test'), list('words')], dtype='string')) -print(pd.array([list('test'), list('word')], dtype='string')) \ No newline at end of file + +print(pd.array([list("test"), list("words")], dtype="string")) +print(pd.array([list("test"), list("word")], dtype="string"))