From 1cba4efe1193c0b45bef1cb9a04d5e2fc95c8735 Mon Sep 17 00:00:00 2001 From: Nathaniel McAuliffe Date: Wed, 18 Aug 2021 10:58:11 -0400 Subject: [PATCH 1/4] Terminate legacy runners --- .../runners/src/scale-runners/runners.ts | 15 +++- .../src/scale-runners/scale-down.test.ts | 72 ++++++++++++++----- .../runners/src/scale-runners/scale-down.ts | 25 ++++++- 3 files changed, 89 insertions(+), 23 deletions(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/runners.ts b/modules/runners/lambdas/runners/src/scale-runners/runners.ts index f8ee13bef2..202d72246d 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/runners.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/runners.ts @@ -1,5 +1,14 @@ import { EC2, SSM } from 'aws-sdk'; +export interface RunnerList { + instanceId: string; + launchTime?: Date; + owner?: string; + type?: string; + Repo?: string; + Org?: string; +} + export interface RunnerInfo { instanceId: string; launchTime?: Date; @@ -20,7 +29,7 @@ export interface RunnerInputParameters { runnerOwner: string; } -export async function listEC2Runners(filters: ListRunnerFilters | undefined = undefined): Promise { +export async function listEC2Runners(filters: ListRunnerFilters | undefined = undefined): Promise { const ec2 = new EC2(); const ec2Filters = [ { Name: 'tag:Application', Values: ['github-action-runner'] }, @@ -36,7 +45,7 @@ export async function listEC2Runners(filters: ListRunnerFilters | undefined = un } } const runningInstances = await ec2.describeInstances({ Filters: ec2Filters }).promise(); - const runners: RunnerInfo[] = []; + const runners: RunnerList[] = []; if (runningInstances.Reservations) { for (const r of runningInstances.Reservations) { if (r.Instances) { @@ -46,6 +55,8 @@ export async function listEC2Runners(filters: ListRunnerFilters | undefined = un launchTime: i.LaunchTime, owner: i.Tags?.find((e) => e.Key === 'Owner')?.Value as string, type: i.Tags?.find((e) => e.Key === 'Type')?.Value as string, + Repo: i.Tags?.find((e) => e.Key === 'Repo')?.Value as string, + Org: i.Tags?.find((e) => e.Key === 'Org')?.Value as string, }); } } diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts index e934d6e1ce..5cf04b3918 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts @@ -1,6 +1,6 @@ import moment from 'moment'; import { mocked } from 'ts-jest/utils'; -import { listEC2Runners, terminateRunner, RunnerInfo } from './runners'; +import { listEC2Runners, terminateRunner, RunnerInfo, RunnerList } from './runners'; import { scaleDown } from './scale-down'; import * as ghAuth from './gh-auth'; import nock from 'nock'; @@ -48,7 +48,7 @@ const TEST_DATA: TestData = { repositoryOwner: 'Codertocat', }; -let DEFAULT_RUNNERS: RunnerInfo[]; +let DEFAULT_RUNNERS: RunnerList[]; let RUNNERS_ALL_REMOVED: RunnerInfo[]; let DEFAULT_RUNNERS_REPO_TO_BE_REMOVED: RunnerInfo[]; let RUNNERS_ORG_TO_BE_REMOVED_WITH_AUTO_SCALING_CONFIG: RunnerInfo[]; @@ -57,6 +57,9 @@ let RUNNERS_ORG_WITH_AUTO_SCALING_CONFIG: RunnerInfo[]; let DEFAULT_RUNNERS_REPO: RunnerInfo[]; let DEFAULT_RUNNERS_ORG: RunnerInfo[]; let DEFAULT_RUNNERS_ORG_TO_BE_REMOVED: RunnerInfo[]; +let DEFAULT_RUNNERS_ORPHANED: RunnerInfo[]; +let DEFAULT_REPO_RUNNERS_ORPHANED: RunnerInfo[]; +let DEFAULT_ORG_RUNNERS_ORPHANED: RunnerInfo[]; const DEFAULT_RUNNERS_ORIGINAL = [ { instanceId: 'i-idle-101', @@ -126,6 +129,13 @@ const DEFAULT_RUNNERS_ORIGINAL = [ type: 'Org', owner: TEST_DATA.repositoryOwner, }, + { + instanceId: 'i-legacy-110', + launchTime: moment(new Date()) + .subtract(minimumRunningTimeInMinutes + 5, 'minutes') + .toDate(), + Repo: `${TEST_DATA.repositoryOwner}/${TEST_DATA.repositoryName}`, + }, ]; const DEFAULT_REGISTERED_RUNNERS = [ @@ -215,8 +225,8 @@ describe('scaleDown', () => { }); mockCreateClient.mockResolvedValue(new mocktokit()); DEFAULT_RUNNERS = JSON.parse(JSON.stringify(DEFAULT_RUNNERS_ORIGINAL)); - DEFAULT_RUNNERS_REPO = DEFAULT_RUNNERS.filter((r) => r.type === 'Repo'); - DEFAULT_RUNNERS_ORG = DEFAULT_RUNNERS.filter((r) => r.type === 'Org'); + DEFAULT_RUNNERS_REPO = DEFAULT_RUNNERS.filter((r) => r.type === 'Repo') as RunnerInfo[]; + DEFAULT_RUNNERS_ORG = DEFAULT_RUNNERS.filter((r) => r.type === 'Org') as RunnerInfo[]; DEFAULT_RUNNERS_REPO_TO_BE_REMOVED = DEFAULT_RUNNERS_REPO.filter( (r) => r.instanceId.includes('idle') || r.instanceId.includes('orphan'), ); @@ -239,6 +249,15 @@ describe('scaleDown', () => { RUNNERS_ALL_REMOVED = DEFAULT_RUNNERS_ORG.filter( (r) => !r.instanceId.includes('running') && !r.instanceId.includes('registered'), ); + DEFAULT_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORIGINAL.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ) as RunnerInfo[]; + DEFAULT_REPO_RUNNERS_ORPHANED = DEFAULT_RUNNERS_REPO.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ); + DEFAULT_ORG_RUNNERS_ORPHANED = DEFAULT_RUNNERS_ORG.filter( + (r) => r.instanceId.includes('orphan') && !r.instanceId.includes('not-registered'), + ); }); describe('github.com', () => { @@ -258,8 +277,7 @@ describe('scaleDown', () => { }); }); - it('Terminates 3 of 5 runners owned by repos.', async () => { - // This will not terminate the orphan runners that have not yet reached their minimum running time. + it('Terminates 3 of 5 runners owned by repos and all orphaned', async () => { mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_REPO); await scaleDown(); expect(listEC2Runners).toBeCalledWith({ @@ -268,14 +286,16 @@ describe('scaleDown', () => { expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); + expect(terminateRunner).toBeCalledTimes(4); for (const toTerminate of DEFAULT_RUNNERS_REPO_TO_BE_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_REPO_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); - it('Terminates 2 of 3 runners owned by orgs.', async () => { - // This will not terminate the orphan runners that have not yet reached their minimum running time. + it('Terminates 2 of 3 runners owned by orgs and all orphaned', async () => { mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_ORG); await scaleDown(); expect(listEC2Runners).toBeCalledWith({ @@ -283,10 +303,13 @@ describe('scaleDown', () => { }); expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(2); + expect(terminateRunner).toBeCalledTimes(3); for (const toTerminate of DEFAULT_RUNNERS_ORG_TO_BE_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_ORG_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); describe('With idle config', () => { @@ -329,7 +352,7 @@ describe('scaleDown', () => { }); }); - it('Terminates 6 runners amongst all owners', async () => { + it('Terminates 6 runners amongst all owners and all orphaned', async () => { mockListRunners.mockResolvedValue(DEFAULT_RUNNERS); await scaleDown(); @@ -339,10 +362,13 @@ describe('scaleDown', () => { expect(mockOctokit.apps.getRepoInstallation).toBeCalledTimes(2); expect(mockOctokit.apps.getOrgInstallation).toBeCalledTimes(1); - expect(terminateRunner).toBeCalledTimes(5); + expect(terminateRunner).toBeCalledTimes(8); for (const toTerminate of RUNNERS_ALL_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); }); @@ -366,8 +392,7 @@ describe('scaleDown', () => { }); }); - it('Terminates 3 of 5 runners owned by repos.', async () => { - // This will not terminate the orphan runners that have not yet reached their minimum running time. + it('Terminates 3 of 5 runners owned by repos and all orphaned', async () => { mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_REPO); await scaleDown(); expect(listEC2Runners).toBeCalledWith({ @@ -375,13 +400,16 @@ describe('scaleDown', () => { }); expect(mockOctokit.apps.getRepoInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(3); + expect(terminateRunner).toBeCalledTimes(4); for (const toTerminate of DEFAULT_RUNNERS_REPO_TO_BE_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_REPO_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); - it('Terminates 2 of 3 runners owned by orgs.', async () => { + it('Terminates 2 of 3 runners owned by orgs and all orphaned', async () => { // This will not terminate the orphan runners that have not yet reached their minimum running time. mockListRunners.mockResolvedValue(DEFAULT_RUNNERS_ORG); await scaleDown(); @@ -390,10 +418,13 @@ describe('scaleDown', () => { }); expect(mockOctokit.apps.getOrgInstallation).toBeCalled(); - expect(terminateRunner).toBeCalledTimes(2); + expect(terminateRunner).toBeCalledTimes(3); for (const toTerminate of DEFAULT_RUNNERS_ORG_TO_BE_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_ORG_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); describe('With idle config', () => { @@ -436,7 +467,7 @@ describe('scaleDown', () => { }); }); - it('Terminates 6 runners amongst all owners', async () => { + it('Terminates 6 runners amongst all owners and all orphaned', async () => { mockListRunners.mockResolvedValue(DEFAULT_RUNNERS); await scaleDown(); @@ -446,10 +477,13 @@ describe('scaleDown', () => { expect(mockOctokit.apps.getRepoInstallation).toBeCalledTimes(2); expect(mockOctokit.apps.getOrgInstallation).toBeCalledTimes(1); - expect(terminateRunner).toBeCalledTimes(5); + expect(terminateRunner).toBeCalledTimes(8); for (const toTerminate of RUNNERS_ALL_REMOVED) { expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); } + for (const toTerminate of DEFAULT_RUNNERS_ORPHANED) { + expect(terminateRunner).toHaveBeenCalledWith(toTerminate.instanceId); + } }); }); }); diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 8dd6d65e2f..f8a26f982f 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -1,6 +1,6 @@ import { Octokit } from '@octokit/rest'; import moment from 'moment'; -import { listEC2Runners, RunnerInfo, terminateRunner } from './runners'; +import { listEC2Runners, RunnerInfo, RunnerList, terminateRunner } from './runners'; import { getIdleRunnerCount, ScalingDownConfig } from './scale-down-config'; import { createOctoClient, createGithubAppAuth, createGithubInstallationAuth } from './gh-auth'; import { githubCache, GhRunners } from './cache'; @@ -116,6 +116,8 @@ async function evaluateAndRemoveRunners( if (idleCounter > 0) { idleCounter--; console.debug(`Runner '${ec2runner.instanceId}' will kept idle.`); + const instanceIndex = ec2Runners.findIndex((runner) => runner.instanceId === ec2runner.instanceId); + ec2Runners.splice(instanceIndex, 1); } else { await removeRunner(ec2runner, ghRunner.id); const instanceIndex = ec2Runners.findIndex((runner) => runner.instanceId === ec2runner.instanceId); @@ -130,6 +132,12 @@ async function evaluateAndRemoveRunners( } } } + if (!(ec2Runners.length === 0)) { + console.info(`${ec2Runners.length} runners itentified as orphans.`); + ec2Runners.forEach((runner) => { + terminateOrphan(runner.instanceId); + }); + } } async function terminateOrphan(instanceId: string): Promise { @@ -154,6 +162,14 @@ async function listAndSortRunners(environment: string) { }); } +function filterLegacyRunners(ec2runners: RunnerList[]): RunnerList[] { + return ec2runners.filter((ec2Runner) => ec2Runner.Repo || ec2Runner.Org) as RunnerList[]; +} + +function filterNewRunners(ec2runners: RunnerList[]): RunnerInfo[] { + return ec2runners.filter((ec2Runner) => ec2Runner.type) as RunnerInfo[]; +} + export async function scaleDown(): Promise { const scaleDownConfigs = JSON.parse(process.env.SCALE_DOWN_CONFIG) as [ScalingDownConfig]; const environment = process.env.ENVIRONMENT; @@ -166,6 +182,11 @@ export async function scaleDown(): Promise { console.debug(`No active runners found for environment: '${environment}'`); return; } + const legacyRunners = filterLegacyRunners(ec2Runners); + const newRunners = filterNewRunners(ec2Runners); - await evaluateAndRemoveRunners(ec2Runners, scaleDownConfigs, minimumRunningTimeInMinutes); + await evaluateAndRemoveRunners(newRunners, scaleDownConfigs, minimumRunningTimeInMinutes); + legacyRunners.forEach((runner) => { + terminateOrphan(runner.instanceId); + }); } From 04775a594fa1e1045bc0337dd7b1fec7972b47de Mon Sep 17 00:00:00 2001 From: Nathaniel McAuliffe Date: Fri, 20 Aug 2021 09:55:56 -0400 Subject: [PATCH 2/4] Update modules/runners/lambdas/runners/src/scale-runners/scale-down.ts Co-authored-by: Gertjan Maas --- modules/runners/lambdas/runners/src/scale-runners/scale-down.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index f8a26f982f..00066c7ef4 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -133,7 +133,7 @@ async function evaluateAndRemoveRunners( } } if (!(ec2Runners.length === 0)) { - console.info(`${ec2Runners.length} runners itentified as orphans.`); + console.info(`${ec2Runners.length} runners identified as orphans.`); ec2Runners.forEach((runner) => { terminateOrphan(runner.instanceId); }); From 70c9735c79afe087638c49fdb5c3e298f7ef9f98 Mon Sep 17 00:00:00 2001 From: Nathaniel McAuliffe Date: Fri, 20 Aug 2021 10:08:58 -0400 Subject: [PATCH 3/4] Move find index to new function --- .../runners/src/scale-runners/scale-down.ts | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index f8a26f982f..af7c315b03 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -97,6 +97,10 @@ async function removeRunner(ec2runner: RunnerInfo, ghRunnerId: number): Promise< } } +function getIndex(ec2Runners: RunnerInfo[], ec2Runner: RunnerInfo): number { + return ec2Runners.findIndex((runner) => runner.instanceId === ec2Runner.instanceId); +} + async function evaluateAndRemoveRunners( ec2Runners: RunnerInfo[], scaleDownConfigs: ScalingDownConfig[], @@ -107,27 +111,24 @@ async function evaluateAndRemoveRunners( for (const ownerTag of ownerTags) { const ec2RunnersFiltered = ec2Runners.filter((runner) => runner.owner === ownerTag); - for (const ec2runner of ec2RunnersFiltered) { + for (const ec2Runner of ec2RunnersFiltered) { // TODO: This is a bug. Orphaned runners should be terminated no matter how long they have been running. - if (runnerMinimumTimeExceeded(ec2runner, minimumRunningTimeInMinutes)) { - const ghRunners = await listGitHubRunners(ec2runner); - const ghRunner = ghRunners.find((runner) => runner.name === ec2runner.instanceId); + if (runnerMinimumTimeExceeded(ec2Runner, minimumRunningTimeInMinutes)) { + const ghRunners = await listGitHubRunners(ec2Runner); + const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId); if (ghRunner) { if (idleCounter > 0) { idleCounter--; - console.debug(`Runner '${ec2runner.instanceId}' will kept idle.`); - const instanceIndex = ec2Runners.findIndex((runner) => runner.instanceId === ec2runner.instanceId); - ec2Runners.splice(instanceIndex, 1); + console.debug(`Runner '${ec2Runner.instanceId}' will kept idle.`); + ec2Runners.splice(getIndex(ec2Runners, ec2Runner), 1); } else { - await removeRunner(ec2runner, ghRunner.id); - const instanceIndex = ec2Runners.findIndex((runner) => runner.instanceId === ec2runner.instanceId); - ec2Runners.splice(instanceIndex, 1); + await removeRunner(ec2Runner, ghRunner.id); + ec2Runners.splice(getIndex(ec2Runners, ec2Runner), 1); } } else { - console.debug(`Runner '${ec2runner.instanceId}' is orphaned and will be removed.`); - terminateOrphan(ec2runner.instanceId); - const instanceIndex = ec2Runners.findIndex((runner) => runner.instanceId === ec2runner.instanceId); - ec2Runners.splice(instanceIndex, 1); + console.debug(`Runner '${ec2Runner.instanceId}' is orphaned and will be removed.`); + terminateOrphan(ec2Runner.instanceId); + ec2Runners.splice(getIndex(ec2Runners, ec2Runner), 1); } } } From b54117f475096967f073857006634d82542a288b Mon Sep 17 00:00:00 2001 From: Nathaniel McAuliffe Date: Fri, 20 Aug 2021 10:20:03 -0400 Subject: [PATCH 4/4] Removing old comment --- modules/runners/lambdas/runners/src/scale-runners/scale-down.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts index 8dd18cec95..bb4e11035c 100644 --- a/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts +++ b/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts @@ -112,7 +112,6 @@ async function evaluateAndRemoveRunners( for (const ownerTag of ownerTags) { const ec2RunnersFiltered = ec2Runners.filter((runner) => runner.owner === ownerTag); for (const ec2Runner of ec2RunnersFiltered) { - // TODO: This is a bug. Orphaned runners should be terminated no matter how long they have been running. if (runnerMinimumTimeExceeded(ec2Runner, minimumRunningTimeInMinutes)) { const ghRunners = await listGitHubRunners(ec2Runner); const ghRunner = ghRunners.find((runner) => runner.name === ec2Runner.instanceId);