Skip to content
This repository was archived by the owner on Jan 16, 2025. It is now read-only.

Commit 3658d6a

Browse files
author
Király Ádám
authored
feat: Support multi runner process support for runner scale down. (#1859)
* Implement multi runner process support for scale down. * Fix format and lint issues. * Minor fixes.
1 parent ae71c2b commit 3658d6a

File tree

2 files changed

+117
-39
lines changed

2 files changed

+117
-39
lines changed

modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts

+50-17
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ const mockOctokit = {
1818
listSelfHostedRunnersForOrg: jest.fn(),
1919
deleteSelfHostedRunnerFromOrg: jest.fn(),
2020
deleteSelfHostedRunnerFromRepo: jest.fn(),
21+
getSelfHostedRunnerForOrg: jest.fn(),
22+
getSelfHostedRunnerForRepo: jest.fn(),
2123
},
2224
paginate: jest.fn(),
2325
};
@@ -144,10 +146,14 @@ const DEFAULT_RUNNERS_ORIGINAL = [
144146
repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`,
145147
},
146148
{
147-
instanceId: 'i-busy-112',
148-
launchTime: moment(new Date())
149-
.subtract(minimumRunningTimeInMinutes + 27, 'minutes')
150-
.toDate(),
149+
instanceId: 'i-running-112',
150+
launchTime: moment(new Date()).subtract(25, 'minutes').toDate(),
151+
type: 'Repo',
152+
owner: `doe/another-repo`,
153+
},
154+
{
155+
instanceId: 'i-running-113',
156+
launchTime: moment(new Date()).subtract(25, 'minutes').toDate(),
151157
type: 'Org',
152158
owner: TEST_DATA.repositoryOwner,
153159
},
@@ -157,37 +163,42 @@ const DEFAULT_REGISTERED_RUNNERS = [
157163
{
158164
id: 101,
159165
name: 'i-idle-101',
160-
busy: false,
161166
},
162167
{
163168
id: 102,
164169
name: 'i-idle-102',
165-
busy: false,
166170
},
167171
{
168172
id: 103,
169173
name: 'i-oldest-idle-103',
170-
busy: false,
171174
},
172175
{
173176
id: 104,
174177
name: 'i-oldest-idle-104',
175-
busy: false,
176178
},
177179
{
178180
id: 105,
179181
name: 'i-running-105',
180-
busy: false,
181182
},
182183
{
183184
id: 106,
184185
name: 'i-running-106',
185-
busy: false,
186186
},
187187
{
188-
id: 112,
189-
name: 'i-busy-112',
190-
busy: true,
188+
id: 1121,
189+
name: 'i-running-112-1',
190+
},
191+
{
192+
id: 1122,
193+
name: 'i-running-112-2',
194+
},
195+
{
196+
id: 1131,
197+
name: 'i-running-113-1',
198+
},
199+
{
200+
id: 1132,
201+
name: 'i-running-113-2',
191202
},
192203
];
193204

@@ -235,6 +246,29 @@ describe('scaleDown', () => {
235246
}
236247
});
237248

249+
mockOctokit.actions.getSelfHostedRunnerForRepo.mockImplementation((repo) => {
250+
if (repo.runner_id === 1121) {
251+
return {
252+
data: { busy: true },
253+
};
254+
} else {
255+
return {
256+
data: { busy: false },
257+
};
258+
}
259+
});
260+
mockOctokit.actions.getSelfHostedRunnerForOrg.mockImplementation((repo) => {
261+
if (repo.runner_id === 1131) {
262+
return {
263+
data: { busy: true },
264+
};
265+
} else {
266+
return {
267+
data: { busy: false },
268+
};
269+
}
270+
});
271+
238272
const mockTerminateRunners = mocked(terminateRunner);
239273
mockTerminateRunners.mockImplementation(async () => {
240274
return;
@@ -279,8 +313,7 @@ describe('scaleDown', () => {
279313
);
280314

281315
RUNNERS_ALL_REMOVED = DEFAULT_RUNNERS_ORG.filter(
282-
(r) =>
283-
!r.instanceId.includes('running') && !r.instanceId.includes('registered') && !r.instanceId.includes('busy'),
316+
(r) => !r.instanceId.includes('running') && !r.instanceId.includes('registered'),
284317
);
285318
DEFAULT_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORIGINAL.filter(
286319
(r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'),
@@ -349,7 +382,7 @@ describe('scaleDown', () => {
349382
beforeEach(() => {
350383
process.env.SCALE_DOWN_CONFIG = JSON.stringify([
351384
{
352-
idleCount: 2,
385+
idleCount: 3,
353386
cron: '* * * * * *',
354387
timeZone: 'Europe/Amsterdam',
355388
},
@@ -479,7 +512,7 @@ describe('scaleDown', () => {
479512
beforeEach(() => {
480513
process.env.SCALE_DOWN_CONFIG = JSON.stringify([
481514
{
482-
idleCount: 2,
515+
idleCount: 3,
483516
cron: '* * * * * *',
484517
timeZone: 'Europe/Amsterdam',
485518
},

modules/runners/lambdas/runners/src/scale-runners/scale-down.ts

+67-22
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,27 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise<Octokit> {
4747
return octokit;
4848
}
4949

50+
async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise<boolean> {
51+
const state =
52+
ec2runner.type === 'Org'
53+
? await client.actions.getSelfHostedRunnerForOrg({
54+
runner_id: runnerId,
55+
org: ec2runner.owner,
56+
})
57+
: await client.actions.getSelfHostedRunnerForRepo({
58+
runner_id: runnerId,
59+
owner: ec2runner.owner.split('/')[0],
60+
repo: ec2runner.owner.split('/')[1],
61+
});
62+
63+
logger.info(
64+
`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`,
65+
LogFields.print(),
66+
);
67+
68+
return state.data.busy;
69+
}
70+
5071
async function listGitHubRunners(runner: RunnerInfo): Promise<GhRunners> {
5172
const key = runner.owner as string;
5273
const cachedRunners = githubCache.runners.get(key);
@@ -86,29 +107,48 @@ function bootTimeExceeded(ec2Runner: RunnerInfo): boolean {
86107
return launchTimePlusBootTime < moment(new Date()).utc();
87108
}
88109

89-
async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise<void> {
110+
async function removeRunner(ec2runner: RunnerInfo, ghRunnerIds: number[]): Promise<void> {
90111
const githubAppClient = await getOrCreateOctokit(ec2runner);
91112
try {
92-
const result =
93-
ec2runner.type === 'Org'
94-
? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({
95-
runner_id: ghRunnerId,
96-
org: ec2runner.owner,
97-
})
98-
: await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({
99-
runner_id: ghRunnerId,
100-
owner: ec2runner.owner.split('/')[0],
101-
repo: ec2runner.owner.split('/')[1],
102-
});
103-
104-
if (result.status == 204) {
105-
await terminateRunner(ec2runner.instanceId);
113+
const states = await Promise.all(
114+
ghRunnerIds.map(async (ghRunnerId) => {
115+
// Get busy state instead of using the output of listGitHubRunners(...) to minimize to race condition.
116+
return await getGitHubRunnerBusyState(githubAppClient, ec2runner, ghRunnerId);
117+
}),
118+
);
119+
120+
if (states.every((busy) => busy === false)) {
121+
const statuses = await Promise.all(
122+
ghRunnerIds.map(async (ghRunnerId) => {
123+
return (
124+
ec2runner.type === 'Org'
125+
? await githubAppClient.actions.deleteSelfHostedRunnerFromOrg({
126+
runner_id: ghRunnerId,
127+
org: ec2runner.owner,
128+
})
129+
: await githubAppClient.actions.deleteSelfHostedRunnerFromRepo({
130+
runner_id: ghRunnerId,
131+
owner: ec2runner.owner.split('/')[0],
132+
repo: ec2runner.owner.split('/')[1],
133+
})
134+
).status;
135+
}),
136+
);
137+
138+
if (statuses.every((status) => status == 204)) {
139+
await terminateRunner(ec2runner.instanceId);
140+
logger.info(
141+
`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`,
142+
LogFields.print(),
143+
);
144+
} else {
145+
logger.error(`Failed to de-register GitHub runner: ${statuses}`, LogFields.print());
146+
}
147+
} else {
106148
logger.info(
107-
`AWS runner instance '${ec2runner.instanceId}' is terminated and GitHub runner is de-registered.`,
149+
`Runner '${ec2runner.instanceId}' cannot be de-registered, because it is still busy.`,
108150
LogFields.print(),
109151
);
110-
} else {
111-
logger.error(`Failed to de-register GitHub runner: ${result.status}`, LogFields.print());
112152
}
113153
} catch (e) {
114154
logger.error(`Runner '${ec2runner.instanceId}' cannot be de-registered. Error: ${e}`, LogFields.print());
@@ -130,15 +170,20 @@ async function evaluateAndRemoveRunners(
130170
);
131171
for (const ec2Runner of ec2RunnersFiltered) {
132172
const ghRunners = await listGitHubRunners(ec2Runner);
133-
const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId);
134-
if (ghRunner) {
135-
if (!ghRunner.busy && runnerMinimumTimeExceeded(ec2Runner)) {
173+
const ghRunnersFiltered = ghRunners.filter((runner: { name: string }) =>
174+
runner.name.startsWith(ec2Runner.instanceId),
175+
);
176+
if (ghRunnersFiltered.length) {
177+
if (runnerMinimumTimeExceeded(ec2Runner)) {
136178
if (idleCounter > 0) {
137179
idleCounter--;
138180
logger.info(`Runner '${ec2Runner.instanceId}' will be kept idle.`, LogFields.print());
139181
} else {
140182
logger.info(`Runner '${ec2Runner.instanceId}' will be terminated.`, LogFields.print());
141-
await removeRunner(ec2Runner, ghRunner.id);
183+
await removeRunner(
184+
ec2Runner,
185+
ghRunnersFiltered.map((runner: { id: number }) => runner.id),
186+
);
142187
}
143188
}
144189
} else {

0 commit comments

Comments
 (0)