diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index d44824183f05f..35c88d544c985 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -178,6 +178,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
+- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.bug_fixes:
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 3add32293ea06..730b84fdc6201 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -271,7 +271,7 @@ def axis(request):
     return request.param
 
 
-@pytest.fixture(params=[True, False, None])
+@pytest.fixture(params=[True, False])
 def observed(request):
     """
     Pass in the observed keyword to groupby for [True, False]
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 668c9863612d8..9768a932f73f6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -596,7 +596,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
 
     @final
     def __len__(self) -> int:
-        return len(self.groups)
+        return self._grouper.ngroups
 
     @final
     def __repr__(self) -> str:
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 46a307e2f28d9..d02e22c29159f 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -149,6 +149,29 @@ def test_len_nan_group():
     assert len(df.groupby(["a", "b"])) == 0
 
 
+@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
+def test_len_categorical(dropna, observed, keys):
+    # GH#57595
+    df = DataFrame(
+        {
+            "a": Categorical([1, 1, 2, np.nan], categories=[1, 2, 3]),
+            "b": Categorical([1, 1, 2, np.nan], categories=[1, 2, 3]),
+            "c": 1,
+        }
+    )
+    gb = df.groupby(keys, observed=observed, dropna=dropna)
+    result = len(gb)
+    if observed and dropna:
+        expected = 2
+    elif observed and not dropna:
+        expected = 3
+    elif len(keys) == 1:
+        expected = 3 if dropna else 4
+    else:
+        expected = 9 if dropna else 16
+    assert result == expected, f"{result} vs {expected}"
+
+
 def test_basic_regression():
     # regression
     result = Series([1.0 * x for x in list(range(1, 10)) * 10])