Skip to content

ENH: Allow performance warnings to be disabled #56921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 23, 2024
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Other enhancements
^^^^^^^^^^^^^^^^^^
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1966,6 +1966,16 @@ def using_copy_on_write() -> bool:
return True


@pytest.fixture(params=[True, False])
def performance_warning(request) -> Iterator[bool | type[Warning]]:
"""
Fixture to check if performance warnings are enabled. Either produces
``PerformanceWarning`` if they are enabled, otherwise ``False``.
"""
with pd.option_context("mode.performance_warnings", request.param):
yield pd.errors.PerformanceWarning if request.param else False


@pytest.fixture
def using_infer_string() -> bool:
"""
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/arrays/arrow/_arrow_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pyarrow

from pandas._config.config import _get_option

from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level

Expand All @@ -14,10 +16,11 @@ def fallback_performancewarning(version: str | None = None) -> None:
Raise a PerformanceWarning for falling back to ExtensionArray's
non-pyarrow method
"""
msg = "Falling back on a non-pyarrow code path which may decrease performance."
if version is not None:
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
if _get_option("performance_warnings"):
msg = "Falling back on a non-pyarrow code path which may decrease performance."
if version is not None:
msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
Copy link
Member

@mroeschke mroeschke Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going forward how can we ensure that PerformanceWarnings are not issued without checking the option?

Copy link
Member Author

@rhshadrach rhshadrach Feb 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently fail if a warning is emitted in a test outside of tm.assert_produces_warning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, as long as we have a test for it :)



def pyarrow_array_to_numpy_and_mask(
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
algos,
lib,
Expand Down Expand Up @@ -1331,12 +1333,13 @@ def _addsub_object_array(self, other: npt.NDArray[np.object_], op) -> np.ndarray
# If both 1D then broadcasting is unambiguous
return op(self, other[0])

warnings.warn(
"Adding/subtracting object-dtype array to "
f"{type(self).__name__} not vectorized.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"Adding/subtracting object-dtype array to "
f"{type(self).__name__} not vectorized.",
PerformanceWarning,
stacklevel=find_stack_level(),
)

# Caller is responsible for broadcasting if necessary
assert self.shape == other.shape, (self.shape, other.shape)
Expand Down
14 changes: 9 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
lib,
tslib,
Expand Down Expand Up @@ -819,11 +821,13 @@ def _add_offset(self, offset: BaseOffset) -> Self:
# "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"
res_values = res_values.view(values.dtype) # type: ignore[arg-type]
except NotImplementedError:
warnings.warn(
"Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"Non-vectorized DateOffset being applied to Series or "
"DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
res_values = self.astype("O") + offset
# TODO(GH#55564): as_unit will be unnecessary
result = type(self)._from_sequence(res_values).as_unit(self.unit)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import lib
import pandas._libs.sparse as splib
from pandas._libs.sparse import (
Expand Down Expand Up @@ -1154,8 +1156,9 @@ def searchsorted(
side: Literal["left", "right"] = "left",
sorter: NumpySorter | None = None,
) -> npt.NDArray[np.intp] | np.intp:
msg = "searchsorted requires high memory usage."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
if _get_option("performance_warnings"):
msg = "searchsorted requires high memory usage."
warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
v = np.asarray(v)
return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter)

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas._libs import (
lib,
missing as libmissing,
Expand Down Expand Up @@ -354,7 +356,8 @@ def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
):
if flags:
fallback_performancewarning()
if _get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_contains(pat, case, flags, na, regex)

if regex:
Expand Down Expand Up @@ -414,7 +417,8 @@ def _str_replace(
regex: bool = True,
):
if isinstance(pat, re.Pattern) or callable(repl) or not case or flags:
fallback_performancewarning()
if _get_option("mode.performance_warnings"):
fallback_performancewarning()
return super()._str_replace(pat, repl, n, case, flags, regex)

func = pc.replace_substring_regex if regex else pc.replace_substring
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/computation/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

import numpy as np

from pandas._config.config import _get_option

from pandas.errors import PerformanceWarning
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -124,7 +126,11 @@ def _align_core(terms):
reindexer_size = len(reindexer)

ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
if ordm >= 1 and reindexer_size >= 10000:
if (
_get_option("performance_warnings")
and ordm >= 1
and reindexer_size >= 10000
):
w = (
f"Alignment difference on axis {axis} is larger "
f"than an order of magnitude on term {terms[i].name!r}, "
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,19 @@ def use_inf_as_na_cb(key) -> None:
validator=is_one_of_factory([None, "warn", "raise"]),
)

performance_warnings = """
: boolean
Whether to show or hide PerformanceWarnings.
"""

with cf.config_prefix("mode"):
cf.register_option(
"performance_warnings",
True,
performance_warnings,
validator=is_bool,
)


string_storage_doc = """
: string
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import numpy as np
import pytz

from pandas._config.config import _get_option

from pandas._libs import (
lib,
missing as libmissing,
Expand Down Expand Up @@ -2028,7 +2030,9 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:

# np.nan isn't a singleton, so we may end up with multiple
# NaNs here, so we ignore the all NA case too.
if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
if _get_option("performance_warnings") and (
not (len(set(fill_values)) == 1 or isna(fill_values).all())
):
warnings.warn(
"Concatenating sparse arrays with multiple fill "
f"values: '{fill_values}'. Picking the first and "
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np

from pandas._config import get_option
from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
Expand Down Expand Up @@ -2430,7 +2431,7 @@ def drop( # type: ignore[override]
step = loc.step if loc.step is not None else 1
inds.extend(range(loc.start, loc.stop, step))
elif com.is_bool_indexer(loc):
if self._lexsort_depth == 0:
if _get_option("performance_warnings") and self._lexsort_depth == 0:
warnings.warn(
"dropping on a non-lexsorted multi-index "
"without a level parameter may impact performance.",
Expand Down Expand Up @@ -3088,11 +3089,12 @@ def _maybe_to_slice(loc):
if not follow_key:
return slice(start, stop)

warnings.warn(
"indexing past lexsort depth may impact performance.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
if _get_option("performance_warnings"):
warnings.warn(
"indexing past lexsort depth may impact performance.",
PerformanceWarning,
stacklevel=find_stack_level(),
)

loc = np.arange(start, stop, dtype=np.intp)

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np

from pandas._config import using_copy_on_write
from pandas._config.config import _get_option

from pandas._libs import (
algos as libalgos,
Expand Down Expand Up @@ -1567,7 +1568,10 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:

self._known_consolidated = False

if sum(not block.is_extension for block in self.blocks) > 100:
if (
_get_option("performance_warnings")
and sum(not block.is_extension for block in self.blocks) > 100
):
warnings.warn(
"DataFrame is highly fragmented. This is usually the result "
"of calling `frame.insert` many times, which has poor performance. "
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import numpy as np

from pandas._config.config import _get_option

import pandas._libs.reshape as libreshape
from pandas.errors import PerformanceWarning
from pandas.util._decorators import cache_readonly
Expand Down Expand Up @@ -144,7 +146,7 @@ def __init__(
num_cells = num_rows * num_columns

# GH 26314: Previous ValueError raised was too restrictive for many users.
if num_cells > np.iinfo(np.int32).max:
if _get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max:
warnings.warn(
f"The following operation may generate {num_cells} cells "
f"in the resulting pandas object.",
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
using_copy_on_write,
using_pyarrow_string_dtype,
)
from pandas._config.config import _get_option

from pandas._libs import (
lib,
Expand Down Expand Up @@ -3158,7 +3159,7 @@ def write_array(
pass
elif inferred_type == "string":
pass
else:
elif _get_option("performance_warnings"):
ws = performance_doc % (inferred_type, key, items)
warnings.warn(ws, PerformanceWarning, stacklevel=find_stack_level())

Expand Down
Loading