diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index b0f287cf0b9f6..5ae777ca68eba 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -24,6 +24,8 @@ Fixed Regressions - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) + .. _whatsnew_0242.enhancements: Enhancements diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index fac42dbd9c7c8..1f4fb39f76c7c 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -69,6 +69,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da638e24dfce5..f5535096c967d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4661,7 +4661,7 @@ def duplicated(self, subset=None, keep='first'): from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series() + return Series(dtype=bool) def f(vals): labels, shape = algorithms.factorize( diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index f61dbbdb989e4..3396670fb5879 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -182,6 +182,17 @@ def test_drop_duplicates(): assert df.duplicated(keep=keep).sum() == 0 +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=['a', 'b']) + dupes = df.duplicated('a') + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_drop_duplicates_with_duplicate_column_names(): # GH17836 df = DataFrame([