Skip to content

ENH: Allow performance warnings to be disabled #56921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Other enhancements
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1983,6 +1983,16 @@ def indexer_ial(request):
return request.param


@pytest.fixture(params=[True, False])
def performance_warning(request) -> Iterator[bool | type[Warning]]:
"""
Fixture to check if performance warnings are enabled. Either produces
``PerformanceWarning`` if they are enabled, otherwise ``False``.
"""
with pd.option_context("mode.performance_warnings", request.param):
yield pd.errors.PerformanceWarning if request.param else False


@pytest.fixture
def using_infer_string() -> bool:
"""
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/arrays/arrow/_arrow_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pyarrow

from pandas._config.config import _get_option

from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level

Expand All @@ -14,10 +16,11 @@ def fallback_performancewarning(version: str | None = None) -> None:
Raise a PerformanceWarning for falling back to ExtensionArray's
non-pyarrow method
"""
msg = "Falling back on a non-pyarrow code path which may decrease performance."
if version is not None:
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
if _get_option("performance_warnings"):
msg = "Falling back on a non-pyarrow code path which may decrease performance."
if version is not None:
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
Copy link
Member

@mroeschke mroeschke Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going forward how can we ensure that PerformanceWarnings are not issued without checking the option?

Copy link
Member Author

@rhshadrach rhshadrach Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently fail if a warning is emitted in a test outside of tm.assert_produces_warning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, as long as we have a test for it :)



def pyarrow_array_to_numpy_and_mask(
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
algos,
lib,
Expand Down Expand Up @@ -1332,12 +1334,13 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray
# If both 1D then broadcasting is unambiguous
return op(self, other[0])

warnings.warn(
"Adding/subtracting object-dtype array to "
f"{type(self).__name__} not vectorized.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"Adding/subtracting object-dtype array to "
f"{type(self).__name__} not vectorized.",
PerformanceWarning,
stacklevel=find_stack_level(),
)

# Caller is responsible for broadcasting if necessary
assert self.shape == other.shape, (self.shape, other.shape)
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
lib,
tslib,
Expand Down Expand Up @@ -818,11 +820,13 @@ def _add_offset(self, offset: BaseOffset) -> Self:
# "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"
res_values = res_values.view(values.dtype) # type: ignore[arg-type]
except NotImplementedError:
warnings.warn(
"Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"Non-vectorized DateOffset being applied to Series or "
"DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
res_values = self.astype("O") + offset
# TODO(GH#55564): as_unit will be unnecessary
result = type(self)._from_sequence(res_values).as_unit(self.unit)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import lib
import pandas._libs.sparse as splib
from pandas._libs.sparse import (
Expand Down Expand Up @@ -1154,8 +1156,9 @@ def searchsorted(
side: Literal["left", "right"] = "left",
sorter: NumpySorter | None = None,
) -> npt.NDArray[np.intp] | np.intp:
msg = "searchsorted requires high memory usage."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
if _get_option("performance_warnings"):
msg = "searchsorted requires high memory usage."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
v = np.asarray(v)
return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter)

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
lib,
missing as libmissing,
Expand Down Expand Up @@ -343,7 +345,8 @@ def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
):
if flags:
fallback_performancewarning()
if _get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_contains(pat, case, flags, na, regex)

if regex:
Expand Down Expand Up @@ -403,7 +406,8 @@ def _str_replace(
regex: bool = True,
):
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
fallback_performancewarning()
if _get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_replace(pat, repl, n, case, flags, regex)

func = pc.replace_substring_regex if regex else pc.replace_substring
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/computation/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -124,7 +126,11 @@ def _align_core(terms):
reindexer_size = len(reindexer)

ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
if ordm >= 1 and reindexer_size >= 10000:
if (
_get_option("performance_warnings")
and ordm >= 1
and reindexer_size >= 10000
):
w = (
f"Alignment difference on axis {axis} is larger "
f"than an order of magnitude on term {terms[i].name!r}, "
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,19 @@ def is_terminal() -> bool:
validator=is_one_of_factory([None, "warn", "raise"]),
)

performance_warnings = """
: boolean
Whether to show or hide PerformanceWarnings.
"""

with cf.config_prefix("mode"):
cf.register_option(
"performance_warnings",
True,
performance_warnings,
validator=is_bool,
)


string_storage_doc = """
: string
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import numpy as np
import pytz

from pandas._config.config import _get_option

from pandas._libs import (
lib,
missing as libmissing,
Expand Down Expand Up @@ -2028,7 +2030,9 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:

# np.nan isn't a singleton, so we may end up with multiple
# NaNs here, so we ignore the all NA case too.
if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
if _get_option("performance_warnings") and (
not (len(set(fill_values)) == 1 or isna(fill_values).all())
):
warnings.warn(
"Concatenating sparse arrays with multiple fill "
f"values: '{fill_values}'. Picking the first and "
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np

from pandas._config import get_option
from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
Expand Down Expand Up @@ -2356,7 +2357,7 @@ def drop( # type: ignore[override]
step = loc.step if loc.step is not None else 1
inds.extend(range(loc.start, loc.stop, step))
elif com.is_bool_indexer(loc):
if self._lexsort_depth == 0:
if _get_option("performance_warnings") and self._lexsort_depth == 0:
warnings.warn(
"dropping on a non-lexsorted multi-index "
"without a level parameter may impact performance.",
Expand Down Expand Up @@ -3018,11 +3019,12 @@ def _maybe_to_slice(loc):
if not follow_key:
return slice(start, stop)

warnings.warn(
"indexing past lexsort depth may impact performance.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"indexing past lexsort depth may impact performance.",
PerformanceWarning,
stacklevel=find_stack_level(),
)

loc = np.arange(start, stop, dtype=np.intp)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
internals as libinternals,
Expand Down Expand Up @@ -1526,7 +1528,10 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:

self._known_consolidated = False

if sum(not block.is_extension for block in self.blocks) > 100:
if (
_get_option("performance_warnings")
and sum(not block.is_extension for block in self.blocks) > 100
):
warnings.warn(
"DataFrame is highly fragmented. This is usually the result "
"of calling `frame.insert` many times, which has poor performance. "
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import numpy as np

from pandas._config.config import _get_option

import pandas._libs.reshape as libreshape
from pandas.errors import PerformanceWarning
from pandas.util._decorators import cache_readonly
Expand Down Expand Up @@ -144,7 +146,7 @@ def __init__(
num_cells = num_rows * num_columns

# GH 26314: Previous ValueError raised was too restrictive for many users.
if num_cells > np.iinfo(np.int32).max:
if _get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max:
warnings.warn(
f"The following operation may generate {num_cells} cells "
f"in the resulting pandas object.",
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
get_option,
using_pyarrow_string_dtype,
)
from pandas._config.config import _get_option

from pandas._libs import (
lib,
Expand Down Expand Up @@ -3143,7 +3144,7 @@ def write_array(
pass
elif inferred_type == "string":
pass
else:
elif _get_option("performance_warnings"):
ws = performance_doc % (inferred_type, key, items)
warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level())

Expand Down
Loading