Skip to content

Commit cdd6040

Browse files
authored
Fix logic issues during graceful shutdown (#171)
* Fix logic issues during graceful shutdown # Motivation This should fix the remaining issues raised in #166. The problem here was that if a service finished/threw out of order then we were wrongly treating this as if the service that we are currently shutting down finished. # Modification This PR ensures that we use the same `services` array during the graceful shutdown to nil out services that have finished. This way we correctly keep track of any service that finished. Additionally, there was a separate bug where we started to shutdown the next service to early if another service threw and had the termination behaviour of `shutdownGracefully`. # Result No more incorrect shutdown orderings. * Fix one more race condition
1 parent b21c43a commit cdd6040

File tree

2 files changed

+336
-29
lines changed

2 files changed

+336
-29
lines changed

Sources/ServiceLifecycle/ServiceGroup.swift

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ public actor ServiceGroup: Sendable {
327327
services[index] = nil
328328
do {
329329
try await self.shutdownGracefully(
330-
services: services,
330+
services: &services,
331331
cancellationTimeoutTask: &cancellationTimeoutTask,
332332
group: &group,
333333
gracefulShutdownManagers: gracefulShutdownManagers
@@ -380,7 +380,7 @@ public actor ServiceGroup: Sendable {
380380

381381
do {
382382
try await self.shutdownGracefully(
383-
services: services,
383+
services: &services,
384384
cancellationTimeoutTask: &cancellationTimeoutTask,
385385
group: &group,
386386
gracefulShutdownManagers: gracefulShutdownManagers
@@ -421,7 +421,7 @@ public actor ServiceGroup: Sendable {
421421
)
422422
do {
423423
try await self.shutdownGracefully(
424-
services: services,
424+
services: &services,
425425
cancellationTimeoutTask: &cancellationTimeoutTask,
426426
group: &group,
427427
gracefulShutdownManagers: gracefulShutdownManagers
@@ -448,7 +448,7 @@ public actor ServiceGroup: Sendable {
448448

449449
do {
450450
try await self.shutdownGracefully(
451-
services: services,
451+
services: &services,
452452
cancellationTimeoutTask: &cancellationTimeoutTask,
453453
group: &group,
454454
gracefulShutdownManagers: gracefulShutdownManagers
@@ -489,7 +489,7 @@ public actor ServiceGroup: Sendable {
489489
}
490490

491491
private func shutdownGracefully(
492-
services: [ServiceGroupConfiguration.ServiceConfiguration?],
492+
services: inout [ServiceGroupConfiguration.ServiceConfiguration?],
493493
cancellationTimeoutTask: inout Task<Void, Never>?,
494494
group: inout ThrowingTaskGroup<ChildTaskResult, Error>,
495495
gracefulShutdownManagers: [GracefulShutdownManager]
@@ -519,7 +519,7 @@ public actor ServiceGroup: Sendable {
519519
self.logger.debug(
520520
"Service already finished. Skipping shutdown"
521521
)
522-
continue
522+
continue gracefulShutdownLoop
523523
}
524524
self.logger.debug(
525525
"Triggering graceful shutdown for service",
@@ -533,6 +533,7 @@ public actor ServiceGroup: Sendable {
533533
while let result = try await group.next() {
534534
switch result {
535535
case .serviceFinished(let service, let index):
536+
services[index] = nil
536537
if group.isCancelled {
537538
// The group is cancelled and we expect all services to finish
538539
continue gracefulShutdownLoop
@@ -561,7 +562,8 @@ public actor ServiceGroup: Sendable {
561562
throw ServiceGroupError.serviceFinishedUnexpectedly()
562563
}
563564

564-
case .serviceThrew(let service, _, let serviceError):
565+
case .serviceThrew(let service, let index, let serviceError):
566+
services[index] = nil
565567
switch service.failureTerminationBehavior.behavior {
566568
case .cancelGroup:
567569
self.logger.debug(
@@ -575,32 +577,58 @@ public actor ServiceGroup: Sendable {
575577
throw serviceError
576578

577579
case .gracefullyShutdownGroup:
578-
self.logger.debug(
579-
"Service threw error during graceful shutdown.",
580-
metadata: [
581-
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
582-
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
583-
]
584-
)
585-
586580
if error == nil {
587581
error = serviceError
588582
}
589583

590-
// We can continue shutting down the next service now
591-
continue gracefulShutdownLoop
584+
if index == gracefulShutdownIndex {
585+
// The service that we were shutting down right now threw. Since it's failure
586+
// behaviour is to shutdown the group we can continue
587+
self.logger.debug(
588+
"The service that we were shutting down threw. Continuing with the next one.",
589+
metadata: [
590+
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
591+
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
592+
]
593+
)
594+
continue gracefulShutdownLoop
595+
} else {
596+
// Another service threw while we were waiting for a shutdown
597+
// We have to continue the iterating the task group's result
598+
self.logger.debug(
599+
"Another service than the service that we were shutting down threw. Continuing with the next one.",
600+
metadata: [
601+
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
602+
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
603+
]
604+
)
605+
break
606+
}
592607

593608
case .ignore:
594-
self.logger.debug(
595-
"Service threw error during graceful shutdown.",
596-
metadata: [
597-
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
598-
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
599-
]
600-
)
601-
602-
// We can continue shutting down the next service now
603-
continue gracefulShutdownLoop
609+
if index == gracefulShutdownIndex {
610+
// The service that we were shutting down right now threw. Since it's failure
611+
// behaviour is to shutdown the group we can continue
612+
self.logger.debug(
613+
"The service that we were shutting down threw. Continuing with the next one.",
614+
metadata: [
615+
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
616+
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
617+
]
618+
)
619+
continue gracefulShutdownLoop
620+
} else {
621+
// Another service threw while we were waiting for a shutdown
622+
// We have to continue the iterating the task group's result
623+
self.logger.debug(
624+
"Another service than the service that we were shutting down threw. Continuing with the next one.",
625+
metadata: [
626+
self.loggingConfiguration.keys.serviceKey: "\(service.service)",
627+
self.loggingConfiguration.keys.errorKey: "\(serviceError)",
628+
]
629+
)
630+
break
631+
}
604632
}
605633

606634
case .signalCaught(let signal):
@@ -635,7 +663,8 @@ public actor ServiceGroup: Sendable {
635663

636664
case .signalSequenceFinished, .gracefulShutdownCaught, .gracefulShutdownFinished:
637665
// We just have to tolerate this since signals and parent graceful shutdowns downs can race.
638-
// We are going to continue the
666+
// We are going to continue the result loop since we have to wait for our service
667+
// to finish.
639668
break
640669
}
641670
}

0 commit comments

Comments
 (0)