Skip to content

Commit fe47a72

Browse files
committed
Fix logic issues during graceful shutdown
# Motivation This should fix the remaining issues raised in #166. The problem here was that if a service finished/threw out of order then we were wrongly treating this as if the service that we are currently shutting down finished. # Modification This PR ensures that we use the same `services` array during the graceful shutdown to nil out services that have finished. This way we correctly keep track of any service that finished. Additionally, there was a separate bug where we started to shutdown the next service to early if another service threw and had the termination behaviour of `shutdownGracefully`. # Result No more incorrect shutdown orderings.
1 parent b21c43a commit fe47a72

File tree

2 files changed

+296
-11
lines changed

2 files changed

+296
-11
lines changed

Sources/ServiceLifecycle/ServiceGroup.swift

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ public actor ServiceGroup: Sendable {
327327
services[index] = nil
328328
do {
329329
try await self.shutdownGracefully(
330-
services: services,
330+
services: &services,
331331
cancellationTimeoutTask: &cancellationTimeoutTask,
332332
group: &group,
333333
gracefulShutdownManagers: gracefulShutdownManagers
@@ -380,7 +380,7 @@ public actor ServiceGroup: Sendable {
380380

381381
do {
382382
try await self.shutdownGracefully(
383-
services: services,
383+
services: &services,
384384
cancellationTimeoutTask: &cancellationTimeoutTask,
385385
group: &group,
386386
gracefulShutdownManagers: gracefulShutdownManagers
@@ -421,7 +421,7 @@ public actor ServiceGroup: Sendable {
421421
)
422422
do {
423423
try await self.shutdownGracefully(
424-
services: services,
424+
services: &services,
425425
cancellationTimeoutTask: &cancellationTimeoutTask,
426426
group: &group,
427427
gracefulShutdownManagers: gracefulShutdownManagers
@@ -448,7 +448,7 @@ public actor ServiceGroup: Sendable {
448448

449449
do {
450450
try await self.shutdownGracefully(
451-
services: services,
451+
services: &services,
452452
cancellationTimeoutTask: &cancellationTimeoutTask,
453453
group: &group,
454454
gracefulShutdownManagers: gracefulShutdownManagers
@@ -489,7 +489,7 @@ public actor ServiceGroup: Sendable {
489489
}
490490

491491
private func shutdownGracefully(
492-
services: [ServiceGroupConfiguration.ServiceConfiguration?],
492+
services: inout [ServiceGroupConfiguration.ServiceConfiguration?],
493493
cancellationTimeoutTask: inout Task<Void, Never>?,
494494
group: inout ThrowingTaskGroup<ChildTaskResult, Error>,
495495
gracefulShutdownManagers: [GracefulShutdownManager]
@@ -519,7 +519,7 @@ public actor ServiceGroup: Sendable {
519519
self.logger.debug(
520520
"Service already finished. Skipping shutdown"
521521
)
522-
continue
522+
continue gracefulShutdownLoop
523523
}
524524
self.logger.debug(
525525
"Triggering graceful shutdown for service",
@@ -533,6 +533,7 @@ public actor ServiceGroup: Sendable {
533533
while let result = try await group.next() {
534534
switch result {
535535
case .serviceFinished(let service, let index):
536+
services[index] = nil
536537
if group.isCancelled {
537538
// The group is cancelled and we expect all services to finish
538539
continue gracefulShutdownLoop
@@ -561,7 +562,8 @@ public actor ServiceGroup: Sendable {
561562
throw ServiceGroupError.serviceFinishedUnexpectedly()
562563
}
563564

564-
case .serviceThrew(let service, _, let serviceError):
565+
case .serviceThrew(let service, let index, let serviceError):
566+
services[index] = nil
565567
switch service.failureTerminationBehavior.behavior {
566568
case .cancelGroup:
567569
self.logger.debug(
@@ -587,8 +589,15 @@ public actor ServiceGroup: Sendable {
587589
error = serviceError
588590
}
589591

590-
// We can continue shutting down the next service now
591-
continue gracefulShutdownLoop
592+
if index == gracefulShutdownIndex {
593+
// The service that we were shutting down right now threw. Since it's failure
594+
// behaviour is to shutdown the group we can continue
595+
continue gracefulShutdownLoop
596+
} else {
597+
// Another service threw while we were waiting for a shutdown
598+
// We have to continue the iterating the task group's result
599+
break
600+
}
592601

593602
case .ignore:
594603
self.logger.debug(
@@ -635,7 +644,8 @@ public actor ServiceGroup: Sendable {
635644

636645
case .signalSequenceFinished, .gracefulShutdownCaught, .gracefulShutdownFinished:
637646
// We just have to tolerate this since signals and parent graceful shutdowns downs can race.
638-
// We are going to continue the
647+
// We are going to continue the result loop since we have to wait for our service
648+
// to finish.
639649
break
640650
}
641651
}

Tests/ServiceLifecycleTests/ServiceGroupTests.swift

Lines changed: 276 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,8 @@ final class ServiceGroupTests: XCTestCase {
943943
// Let's throw from the second service
944944
await service2.resumeRunContinuation(with: .failure(ExampleError()))
945945

946-
// The first service should still be running
946+
// The first service should still be running but seeing a graceful shutdown signal firsts
947+
await XCTAsyncAssertEqual(await eventIterator1.next(), .shutdownGracefully)
947948
service1.sendPing()
948949
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
949950

@@ -1194,6 +1195,280 @@ final class ServiceGroupTests: XCTestCase {
11941195
}
11951196
}
11961197

1198+
func testTriggerGracefulShutdown_serviceThrows_inOrder_gracefullyShutdownGroup() async throws {
1199+
let service1 = MockService(description: "Service1")
1200+
let service2 = MockService(description: "Service2")
1201+
let service3 = MockService(description: "Service3")
1202+
let serviceGroup = self.makeServiceGroup(
1203+
services: [.init(service: service1, failureTerminationBehavior: .gracefullyShutdownGroup),
1204+
.init(service: service2, failureTerminationBehavior: .gracefullyShutdownGroup),
1205+
.init(service: service3, failureTerminationBehavior: .gracefullyShutdownGroup)]
1206+
)
1207+
1208+
do {
1209+
try await withThrowingTaskGroup(of: Void.self) { group in
1210+
group.addTask {
1211+
try await serviceGroup.run()
1212+
}
1213+
1214+
var eventIterator1 = service1.events.makeAsyncIterator()
1215+
await XCTAsyncAssertEqual(await eventIterator1.next(), .run)
1216+
1217+
var eventIterator2 = service2.events.makeAsyncIterator()
1218+
await XCTAsyncAssertEqual(await eventIterator2.next(), .run)
1219+
1220+
var eventIterator3 = service3.events.makeAsyncIterator()
1221+
await XCTAsyncAssertEqual(await eventIterator3.next(), .run)
1222+
1223+
await serviceGroup.triggerGracefulShutdown()
1224+
1225+
// The last service should receive the shutdown signal first
1226+
await XCTAsyncAssertEqual(await eventIterator3.next(), .shutdownGracefully)
1227+
1228+
// Waiting to see that all three are still running
1229+
service1.sendPing()
1230+
service2.sendPing()
1231+
service3.sendPing()
1232+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1233+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1234+
await XCTAsyncAssertEqual(await eventIterator3.next(), .runPing)
1235+
1236+
// Let's exit from the last service
1237+
await service3.resumeRunContinuation(with: .success(()))
1238+
1239+
// The middle service should now receive the signal
1240+
await XCTAsyncAssertEqual(await eventIterator2.next(), .shutdownGracefully)
1241+
1242+
// Waiting to see that the two remaining are still running
1243+
service1.sendPing()
1244+
service2.sendPing()
1245+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1246+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1247+
1248+
// Let's exit from the second service
1249+
await service2.resumeRunContinuation(with: .failure(ExampleError()))
1250+
1251+
// The final service should now receive the signal
1252+
await XCTAsyncAssertEqual(await eventIterator1.next(), .shutdownGracefully)
1253+
1254+
// Waiting to see that the one remaining are still running
1255+
service1.sendPing()
1256+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1257+
1258+
// Let's exit from the first service
1259+
await service1.resumeRunContinuation(with: .success(()))
1260+
1261+
try await group.waitForAll()
1262+
}
1263+
1264+
XCTFail("Expected error not thrown")
1265+
} catch is ExampleError {
1266+
// expected error
1267+
}
1268+
}
1269+
1270+
func testTriggerGracefulShutdown_serviceThrows_inOrder_ignore() async throws {
1271+
let service1 = MockService(description: "Service1")
1272+
let service2 = MockService(description: "Service2")
1273+
let service3 = MockService(description: "Service3")
1274+
let serviceGroup = self.makeServiceGroup(
1275+
services: [.init(service: service1, failureTerminationBehavior: .ignore),
1276+
.init(service: service2, failureTerminationBehavior: .ignore),
1277+
.init(service: service3, failureTerminationBehavior: .ignore)]
1278+
)
1279+
1280+
try await withThrowingTaskGroup(of: Void.self) { group in
1281+
group.addTask {
1282+
try await serviceGroup.run()
1283+
}
1284+
1285+
var eventIterator1 = service1.events.makeAsyncIterator()
1286+
await XCTAsyncAssertEqual(await eventIterator1.next(), .run)
1287+
1288+
var eventIterator2 = service2.events.makeAsyncIterator()
1289+
await XCTAsyncAssertEqual(await eventIterator2.next(), .run)
1290+
1291+
var eventIterator3 = service3.events.makeAsyncIterator()
1292+
await XCTAsyncAssertEqual(await eventIterator3.next(), .run)
1293+
1294+
await serviceGroup.triggerGracefulShutdown()
1295+
1296+
// The last service should receive the shutdown signal first
1297+
await XCTAsyncAssertEqual(await eventIterator3.next(), .shutdownGracefully)
1298+
1299+
// Waiting to see that all three are still running
1300+
service1.sendPing()
1301+
service2.sendPing()
1302+
service3.sendPing()
1303+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1304+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1305+
await XCTAsyncAssertEqual(await eventIterator3.next(), .runPing)
1306+
1307+
// Let's exit from the last service
1308+
await service3.resumeRunContinuation(with: .success(()))
1309+
1310+
// The middle service should now receive the signal
1311+
await XCTAsyncAssertEqual(await eventIterator2.next(), .shutdownGracefully)
1312+
1313+
// Waiting to see that the two remaining are still running
1314+
service1.sendPing()
1315+
service2.sendPing()
1316+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1317+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1318+
1319+
// Let's exit from the second service
1320+
await service2.resumeRunContinuation(with: .failure(ExampleError()))
1321+
1322+
// The final service should now receive the signal
1323+
await XCTAsyncAssertEqual(await eventIterator1.next(), .shutdownGracefully)
1324+
1325+
// Waiting to see that the one remaining are still running
1326+
service1.sendPing()
1327+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1328+
1329+
// Let's exit from the first service
1330+
await service1.resumeRunContinuation(with: .success(()))
1331+
1332+
try await group.waitForAll()
1333+
}
1334+
}
1335+
1336+
func testTriggerGracefulShutdown_serviceThrows_outOfOrder_gracefullyShutdownGroup() async throws {
1337+
let service1 = MockService(description: "Service1")
1338+
let service2 = MockService(description: "Service2")
1339+
let service3 = MockService(description: "Service3")
1340+
let serviceGroup = self.makeServiceGroup(
1341+
services: [.init(service: service1, failureTerminationBehavior: .gracefullyShutdownGroup),
1342+
.init(service: service2, failureTerminationBehavior: .gracefullyShutdownGroup),
1343+
.init(service: service3, failureTerminationBehavior: .gracefullyShutdownGroup)]
1344+
)
1345+
1346+
do {
1347+
try await withThrowingTaskGroup(of: Void.self) { group in
1348+
group.addTask {
1349+
try await serviceGroup.run()
1350+
}
1351+
1352+
var eventIterator1 = service1.events.makeAsyncIterator()
1353+
await XCTAsyncAssertEqual(await eventIterator1.next(), .run)
1354+
1355+
var eventIterator2 = service2.events.makeAsyncIterator()
1356+
await XCTAsyncAssertEqual(await eventIterator2.next(), .run)
1357+
1358+
var eventIterator3 = service3.events.makeAsyncIterator()
1359+
await XCTAsyncAssertEqual(await eventIterator3.next(), .run)
1360+
1361+
await serviceGroup.triggerGracefulShutdown()
1362+
1363+
// The last service should receive the shutdown signal first
1364+
await XCTAsyncAssertEqual(await eventIterator3.next(), .shutdownGracefully)
1365+
1366+
// Waiting to see that all three are still running
1367+
service1.sendPing()
1368+
service2.sendPing()
1369+
service3.sendPing()
1370+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1371+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1372+
await XCTAsyncAssertEqual(await eventIterator3.next(), .runPing)
1373+
1374+
// Let's exit from the last service
1375+
await service3.resumeRunContinuation(with: .success(()))
1376+
1377+
// The middle service should now receive the signal
1378+
await XCTAsyncAssertEqual(await eventIterator2.next(), .shutdownGracefully)
1379+
1380+
// Waiting to see that the two remaining are still running
1381+
service1.sendPing()
1382+
service2.sendPing()
1383+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1384+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1385+
1386+
// Let's exit from the first service (even though the second service
1387+
// is gracefully shutting down)
1388+
await service1.resumeRunContinuation(with: .failure(ExampleError()))
1389+
1390+
// Waiting to see that the one remaining are still running
1391+
service2.sendPing()
1392+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1393+
1394+
// Let's exit from the second service
1395+
await service2.resumeRunContinuation(with: .success(()))
1396+
1397+
// The first service shutdown will be skipped
1398+
try await group.waitForAll()
1399+
}
1400+
1401+
XCTFail("Expected error not thrown")
1402+
} catch is ExampleError {
1403+
// expected error
1404+
}
1405+
}
1406+
1407+
func testTriggerGracefulShutdown_serviceThrows_outOfOrder_ignore() async throws {
1408+
let service1 = MockService(description: "Service1")
1409+
let service2 = MockService(description: "Service2")
1410+
let service3 = MockService(description: "Service3")
1411+
let serviceGroup = self.makeServiceGroup(
1412+
services: [.init(service: service1, failureTerminationBehavior: .ignore),
1413+
.init(service: service2, failureTerminationBehavior: .ignore),
1414+
.init(service: service3, failureTerminationBehavior: .ignore)]
1415+
)
1416+
1417+
try await withThrowingTaskGroup(of: Void.self) { group in
1418+
group.addTask {
1419+
try await serviceGroup.run()
1420+
}
1421+
1422+
var eventIterator1 = service1.events.makeAsyncIterator()
1423+
await XCTAsyncAssertEqual(await eventIterator1.next(), .run)
1424+
1425+
var eventIterator2 = service2.events.makeAsyncIterator()
1426+
await XCTAsyncAssertEqual(await eventIterator2.next(), .run)
1427+
1428+
var eventIterator3 = service3.events.makeAsyncIterator()
1429+
await XCTAsyncAssertEqual(await eventIterator3.next(), .run)
1430+
1431+
await serviceGroup.triggerGracefulShutdown()
1432+
1433+
// The last service should receive the shutdown signal first
1434+
await XCTAsyncAssertEqual(await eventIterator3.next(), .shutdownGracefully)
1435+
1436+
// Waiting to see that all three are still running
1437+
service1.sendPing()
1438+
service2.sendPing()
1439+
service3.sendPing()
1440+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1441+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1442+
await XCTAsyncAssertEqual(await eventIterator3.next(), .runPing)
1443+
1444+
// Let's exit from the last service
1445+
await service3.resumeRunContinuation(with: .success(()))
1446+
1447+
// The middle service should now receive the signal
1448+
await XCTAsyncAssertEqual(await eventIterator2.next(), .shutdownGracefully)
1449+
1450+
// Waiting to see that the two remaining are still running
1451+
service1.sendPing()
1452+
service2.sendPing()
1453+
await XCTAsyncAssertEqual(await eventIterator1.next(), .runPing)
1454+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1455+
1456+
// Let's exit from the first service (even though the second service
1457+
// is gracefully shutting down)
1458+
await service1.resumeRunContinuation(with: .failure(ExampleError()))
1459+
1460+
// Waiting to see that the one remaining are still running
1461+
service2.sendPing()
1462+
await XCTAsyncAssertEqual(await eventIterator2.next(), .runPing)
1463+
1464+
// Let's exit from the second service
1465+
await service2.resumeRunContinuation(with: .success(()))
1466+
1467+
// The first service shutdown will be skipped
1468+
try await group.waitForAll()
1469+
}
1470+
}
1471+
11971472
// MARK: - Helpers
11981473

11991474
private func makeServiceGroup(

0 commit comments

Comments
 (0)