diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8c475791df64d..80d86805ded49 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -618,6 +618,7 @@ Indexing - Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`) - Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`) - Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`) +- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c233295b25700..4be7e17035128 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -68,6 +68,7 @@ from pandas.core.construction import ( array as pd_array, extract_array, + sanitize_array, ) from pandas.core.indexers import ( check_array_indexer, @@ -1876,7 +1877,13 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"): return self.obj[key] = empty_value - + elif not is_list_like(value): + # Find our empty_value dtype by constructing an array + # from our value and doing a .take on it + arr = sanitize_array(value, Index(range(1)), copy=False) + taker = -1 * np.ones(len(self.obj), dtype=np.intp) + empty_value = algos.take_nd(arr, taker) + self.obj[key] = empty_value else: # FIXME: GH#42099#issuecomment-864326014 self.obj[key] = infer_fill_value(value) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4be5be77b015c..97e7ae15c6c63 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1935,6 +1935,26 @@ def test_adding_new_conditional_column() -> None: tm.assert_frame_equal(df, expected) +@pytest.mark.parametrize( + ("dtype", "infer_string"), + [ + (object, False), + ("string[pyarrow_numpy]", True), + ], +) +def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None: + # https://github.com/pandas-dev/pandas/issues/56204 + pytest.importorskip("pyarrow") + + df = DataFrame({"a": [1, 2], "b": [3, 4]}) + with pd.option_context("future.infer_string", infer_string): + df.loc[df["a"] == 1, "c"] = "1" + expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", float("nan")]}).astype( + {"a": "int64", "b": "int64", "c": dtype} + ) + tm.assert_frame_equal(df, expected) + + def test_add_new_column_infer_string(): # GH#55366 pytest.importorskip("pyarrow") diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 1e3c793c8449f..ce771280bc264 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -32,10 +32,7 @@ def test_set_value_resize(self, float_frame, using_infer_string): else: assert res["baz"].dtype == np.object_ res = float_frame.copy() - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - res._set_value("foobar", "baz", True) + res._set_value("foobar", "baz", True) assert res["baz"].dtype == np.object_ res = float_frame.copy()