Skip to content

Commit 9b1c396

Browse files
committed
healthcheck: fix grpc inline removal crashes (#749)
Signed-off-by: Matt Klein <[email protected]> Signed-off-by: Pradeep Rao <[email protected]>
1 parent cb4ef0b commit 9b1c396

File tree

2 files changed

+83
-7
lines changed

2 files changed

+83
-7
lines changed

source/common/upstream/health_checker_impl.cc

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -815,10 +815,17 @@ void GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::onGoAway(
815815
// Even if we have active health check probe, fail it on GOAWAY and schedule new one.
816816
if (request_encoder_) {
817817
handleFailure(envoy::data::core::v3::NETWORK);
818-
expect_reset_ = true;
819-
request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset);
818+
// request_encoder_ can already be destroyed if the host was removed during the failure callback
819+
// above.
820+
if (request_encoder_ != nullptr) {
821+
expect_reset_ = true;
822+
request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset);
823+
}
824+
}
825+
// client_ can already be destroyed if the host was removed during the failure callback above.
826+
if (client_ != nullptr) {
827+
client_->close();
820828
}
821-
client_->close();
822829
}
823830

824831
bool GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::isHealthCheckSucceeded(
@@ -852,12 +859,17 @@ void GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::onRpcComplete(
852859
if (end_stream) {
853860
resetState();
854861
} else {
855-
// resetState() will be called by onResetStream().
856-
expect_reset_ = true;
857-
request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset);
862+
// request_encoder_ can already be destroyed if the host was removed during the failure callback
863+
// above.
864+
if (request_encoder_ != nullptr) {
865+
// resetState() will be called by onResetStream().
866+
expect_reset_ = true;
867+
request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset);
868+
}
858869
}
859870

860-
if (!parent_.reuse_connection_ || goaway) {
871+
// client_ can already be destroyed if the host was removed during the failure callback above.
872+
if (client_ != nullptr && (!parent_.reuse_connection_ || goaway)) {
861873
client_->close();
862874
}
863875
}

test/common/upstream/health_checker_impl_test.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4737,6 +4737,70 @@ TEST_F(GrpcHealthCheckerImplTest, SuccessStartFailedFailFirst) {
47374737
expectHostHealthy(true);
47384738
}
47394739

4740+
// Verify functionality when a host is removed inline with a failure via RPC that was proceeded
4741+
// by a GOAWAY.
4742+
TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaRpcRemoveHostInCallback) {
4743+
setupHC();
4744+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {
4745+
makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())};
4746+
4747+
expectSessionCreate();
4748+
expectHealthcheckStart(0);
4749+
EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true));
4750+
health_checker_->start();
4751+
4752+
EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed))
4753+
.WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) {
4754+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {};
4755+
cluster_->prioritySet().runUpdateCallbacks(0, {}, {host});
4756+
}));
4757+
EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _));
4758+
test_sessions_[0]->codec_client_->raiseGoAway(Http::GoAwayErrorCode::NoError);
4759+
respondServiceStatus(0, grpc::health::v1::HealthCheckResponse::NOT_SERVING);
4760+
}
4761+
4762+
// Verify functionality when a host is removed inline with a failure via an error GOAWAY.
4763+
TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaGoawayRemoveHostInCallback) {
4764+
setupHCWithUnhealthyThreshold(/*threshold=*/1);
4765+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {
4766+
makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())};
4767+
4768+
expectSessionCreate();
4769+
expectHealthcheckStart(0);
4770+
EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true));
4771+
health_checker_->start();
4772+
4773+
EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed))
4774+
.WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) {
4775+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {};
4776+
cluster_->prioritySet().runUpdateCallbacks(0, {}, {host});
4777+
}));
4778+
EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _));
4779+
test_sessions_[0]->codec_client_->raiseGoAway(Http::GoAwayErrorCode::Other);
4780+
}
4781+
4782+
// Verify functionality when a host is removed inline with by a bad RPC response.
4783+
TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaBadResponseRemoveHostInCallback) {
4784+
setupHCWithUnhealthyThreshold(/*threshold=*/1);
4785+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {
4786+
makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())};
4787+
4788+
expectSessionCreate();
4789+
expectHealthcheckStart(0);
4790+
EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true));
4791+
health_checker_->start();
4792+
4793+
EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed))
4794+
.WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) {
4795+
cluster_->prioritySet().getMockHostSet(0)->hosts_ = {};
4796+
cluster_->prioritySet().runUpdateCallbacks(0, {}, {host});
4797+
}));
4798+
EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _));
4799+
std::unique_ptr<Http::TestResponseHeaderMapImpl> response_headers(
4800+
new Http::TestResponseHeaderMapImpl{{":status", "500"}});
4801+
test_sessions_[0]->stream_response_callbacks_->decodeHeaders(std::move(response_headers), false);
4802+
}
4803+
47404804
// Test host recovery after explicit check failure requires several successful checks.
47414805
TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFail) {
47424806
setupHC();

0 commit comments

Comments
 (0)