|
3 | 3 | from unittest import TestCase
|
4 | 4 | from unittest.mock import ANY, MagicMock, patch
|
5 | 5 |
|
| 6 | +import time_machine |
6 | 7 | from django.conf import settings
|
7 | 8 | from redis.client import Pipeline
|
8 | 9 |
|
@@ -320,6 +321,172 @@ def test_fixes_mismatched_state_durations(self, mock_logger: MagicMock):
|
320 | 321 | assert breaker.recovery_duration == 500
|
321 | 322 |
|
322 | 323 |
|
| 324 | +@freeze_time() |
| 325 | +class RecordErrorTest(TestCase): |
| 326 | + def setUp(self) -> None: |
| 327 | + self.config = DEFAULT_CONFIG |
| 328 | + self.breaker = MockCircuitBreaker("dogs_are_great", self.config) |
| 329 | + |
| 330 | + # Clear all existing keys from redis |
| 331 | + self.breaker.redis_pipeline.flushall() |
| 332 | + self.breaker.redis_pipeline.execute() |
| 333 | + |
| 334 | + def test_increments_error_count(self): |
| 335 | + config = self.config |
| 336 | + breaker = self.breaker |
| 337 | + |
| 338 | + # The breaker starts with a clean slate |
| 339 | + assert breaker._get_remaining_error_quota() == config["error_limit"] |
| 340 | + |
| 341 | + breaker.record_error() |
| 342 | + |
| 343 | + # The error has been tallied |
| 344 | + assert breaker._get_remaining_error_quota() == config["error_limit"] - 1 |
| 345 | + |
| 346 | + def test_no_error_recorded_in_broken_state(self): |
| 347 | + breaker = self.breaker |
| 348 | + |
| 349 | + breaker._set_breaker_state(CircuitBreakerState.BROKEN) |
| 350 | + breaker._add_quota_usage(breaker.primary_quota, breaker.error_limit) |
| 351 | + |
| 352 | + # Because we're in the BROKEN state, we start with the main quota maxed out and the |
| 353 | + # RECOVERY quota yet to be used |
| 354 | + assert breaker._get_remaining_error_quota(breaker.primary_quota) == 0 |
| 355 | + assert ( |
| 356 | + breaker._get_remaining_error_quota(breaker.recovery_quota) |
| 357 | + == breaker.recovery_error_limit |
| 358 | + ) |
| 359 | + |
| 360 | + breaker.record_error() |
| 361 | + |
| 362 | + # Neither quota is incremented |
| 363 | + assert breaker._get_remaining_error_quota(breaker.primary_quota) == 0 |
| 364 | + assert ( |
| 365 | + breaker._get_remaining_error_quota(breaker.recovery_quota) |
| 366 | + == breaker.recovery_error_limit |
| 367 | + ) |
| 368 | + |
| 369 | + @patch("sentry.utils.circuit_breaker2.logger") |
| 370 | + def test_logs_a_warning_in_broken_state(self, mock_logger: MagicMock): |
| 371 | + breaker = self.breaker |
| 372 | + |
| 373 | + seconds_ellapsed_since_circuit_break = 2 |
| 374 | + breaker._set_breaker_state( |
| 375 | + CircuitBreakerState.BROKEN, |
| 376 | + seconds_left=breaker.broken_state_duration - seconds_ellapsed_since_circuit_break, |
| 377 | + ) |
| 378 | + |
| 379 | + breaker.record_error() |
| 380 | + |
| 381 | + # No log - we just switched into BROKEN state, and even though we're not supposed to land in |
| 382 | + # the `record_error` method in that state, there's a small buffer to account for race |
| 383 | + # conditions |
| 384 | + assert mock_logger.warning.call_count == 0 |
| 385 | + |
| 386 | + seconds_ellapsed_since_circuit_break = 20 |
| 387 | + breaker._set_breaker_state( |
| 388 | + CircuitBreakerState.BROKEN, |
| 389 | + seconds_left=breaker.broken_state_duration - seconds_ellapsed_since_circuit_break, |
| 390 | + ) |
| 391 | + |
| 392 | + breaker.record_error() |
| 393 | + |
| 394 | + # Now we do log a warning, because at this point we can no longer blame a race condition - |
| 395 | + # it's been too long since the circuit broke |
| 396 | + mock_logger.warning.assert_called_with( |
| 397 | + "Attempt to record circuit breaker error while circuit is in BROKEN state", |
| 398 | + extra={"key": "dogs_are_great", "time_in_state": 20}, |
| 399 | + ) |
| 400 | + |
| 401 | + @patch("sentry.utils.circuit_breaker2.logger") |
| 402 | + def test_handles_hitting_max_errors_in_non_broken_state(self, mock_logger: MagicMock): |
| 403 | + config = self.config |
| 404 | + breaker = self.breaker |
| 405 | + now = int(time.time()) |
| 406 | + |
| 407 | + for state, quota, limit in [ |
| 408 | + (CircuitBreakerState.OK, breaker.primary_quota, breaker.error_limit), |
| 409 | + (CircuitBreakerState.RECOVERY, breaker.recovery_quota, breaker.recovery_error_limit), |
| 410 | + ]: |
| 411 | + |
| 412 | + breaker._set_breaker_state(state) |
| 413 | + breaker._add_quota_usage(quota, limit - 1) |
| 414 | + assert breaker._get_remaining_error_quota(quota) == 1 |
| 415 | + assert breaker._get_controlling_quota() == quota |
| 416 | + |
| 417 | + breaker.record_error() |
| 418 | + |
| 419 | + # Hitting the limit puts us into the BROKEN state |
| 420 | + assert breaker._get_remaining_error_quota(quota) == 0 |
| 421 | + assert breaker._get_controlling_quota() is None |
| 422 | + assert breaker._get_state_and_remaining_time() == ( |
| 423 | + CircuitBreakerState.BROKEN, |
| 424 | + breaker.broken_state_duration, |
| 425 | + ) |
| 426 | + mock_logger.warning.assert_called_with( |
| 427 | + "Circuit breaker '%s' error limit hit", |
| 428 | + "dogs_are_great", |
| 429 | + extra={ |
| 430 | + "current_state": state, |
| 431 | + "error_limit": limit, |
| 432 | + "error_limit_window": config["error_limit_window"], |
| 433 | + }, |
| 434 | + ) |
| 435 | + |
| 436 | + # Now jump to one second after the BROKEN state has expired to see that we're in |
| 437 | + # RECOVERY |
| 438 | + with time_machine.travel(now + breaker.broken_state_duration + 1, tick=False): |
| 439 | + assert breaker._get_controlling_quota() is breaker.recovery_quota |
| 440 | + assert breaker._get_state_and_remaining_time() == ( |
| 441 | + CircuitBreakerState.RECOVERY, |
| 442 | + breaker.recovery_duration - 1, |
| 443 | + ) |
| 444 | + |
| 445 | + @patch("sentry.utils.circuit_breaker2.logger") |
| 446 | + def test_stays_in_current_state_if_redis_call_changing_state_fails( |
| 447 | + self, mock_logger: MagicMock |
| 448 | + ): |
| 449 | + breaker = self.breaker |
| 450 | + |
| 451 | + for current_state, quota, limit, seconds_left in [ |
| 452 | + # The case where the current state is the BROKEN state isn't included here because the |
| 453 | + # switch from BROKEN state to RECOVERY state happens passively (by `broken_state_key` |
| 454 | + # expiring), rather than through an active call to redis |
| 455 | + ( |
| 456 | + CircuitBreakerState.OK, |
| 457 | + breaker.primary_quota, |
| 458 | + breaker.error_limit, |
| 459 | + None, |
| 460 | + ), |
| 461 | + ( |
| 462 | + CircuitBreakerState.RECOVERY, |
| 463 | + breaker.recovery_quota, |
| 464 | + breaker.recovery_error_limit, |
| 465 | + 1231, |
| 466 | + ), |
| 467 | + ]: |
| 468 | + |
| 469 | + breaker._set_breaker_state(current_state, seconds_left) |
| 470 | + breaker._add_quota_usage(quota, limit - 1) |
| 471 | + assert breaker._get_remaining_error_quota(quota) == 1 |
| 472 | + assert breaker._get_controlling_quota() == quota |
| 473 | + |
| 474 | + with patch( |
| 475 | + "sentry.utils.circuit_breaker2.CircuitBreaker._set_in_redis", side_effect=Exception |
| 476 | + ): |
| 477 | + breaker.record_error() |
| 478 | + |
| 479 | + # We've recorded the error, but the state hasn't changed |
| 480 | + assert breaker._get_remaining_error_quota(quota) == 0 |
| 481 | + assert breaker._get_controlling_quota() == quota |
| 482 | + assert breaker._get_state_and_remaining_time() == (current_state, seconds_left) |
| 483 | + mock_logger.exception.assert_called_with( |
| 484 | + "Couldn't set state-change keys in redis for circuit breaker '%s'", |
| 485 | + breaker.key, |
| 486 | + extra={"current_state": current_state}, |
| 487 | + ) |
| 488 | + |
| 489 | + |
323 | 490 | @freeze_time()
|
324 | 491 | class ShouldAllowRequestTest(TestCase):
|
325 | 492 | def setUp(self) -> None:
|
|
0 commit comments