Skip to content

Commit d17a388

Browse files
committed
feat: add lambda function to cleanup org runners
Add a lambda function to cleanup offline runners in a GitHub organization. Normally runners will be cleaned up automatically, but when using ephemeral runners and spot instances, the call to Github API to remove the runner may not happen, and the runner will stay in the list in offline state. This lambda function will be triggered by a CloudWatch event and will remove any organization runners that are offline, and their labels match the config.
1 parent 0999ea5 commit d17a388

File tree

10 files changed

+627
-1
lines changed

10 files changed

+627
-1
lines changed

lambdas/.vscode/settings.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"jest.jestCommandLine": "yarn run test --",
3+
}

lambdas/functions/control-plane/src/lambda.test.ts

+34-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
import { captureLambdaHandler, logger } from '@aws-github-runner/aws-powertools-util';
22
import { Context, SQSEvent, SQSRecord } from 'aws-lambda';
33

4-
import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck } from './lambda';
4+
import {
5+
addMiddleware,
6+
adjustPool,
7+
scaleDownHandler,
8+
scaleUpHandler,
9+
ssmHousekeeper,
10+
jobRetryCheck,
11+
cleanupOrgRunnersHandler,
12+
} from './lambda';
513
import { adjust } from './pool/pool';
614
import ScaleError from './scale-runners/ScaleError';
715
import { scaleDown } from './scale-runners/scale-down';
816
import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up';
917
import { cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1018
import { checkAndRetryJob } from './scale-runners/job-retry';
1119
import { describe, it, expect, vi, MockedFunction } from 'vitest';
20+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1221

1322
const body: ActionRequestMessage = {
1423
eventType: 'workflow_job',
@@ -66,6 +75,7 @@ vi.mock('./scale-runners/scale-down');
6675
vi.mock('./scale-runners/scale-up');
6776
vi.mock('./scale-runners/ssm-housekeeper');
6877
vi.mock('./scale-runners/job-retry');
78+
vi.mock('./scale-runners/cleanup-org-runners');
6979
vi.mock('@aws-github-runner/aws-powertools-util');
7080
vi.mock('@aws-github-runner/aws-ssm-util');
7181

@@ -226,3 +236,26 @@ describe('Test job retry check wrapper', () => {
226236
expect(logSpyWarn).toHaveBeenCalledWith(`Error processing job retry: ${error.message}`, { error });
227237
});
228238
});
239+
240+
describe('Test cleanupOrgRunnersHandler lambda wrapper', () => {
241+
it('Cleanup without error should resolve.', async () => {
242+
const mock = mocked(cleanupOrgRunners);
243+
mock.mockImplementation(() => {
244+
return new Promise((resolve) => {
245+
resolve();
246+
});
247+
});
248+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
249+
});
250+
251+
it('Cleanup with error should resolve and log error.', async () => {
252+
const logSpyError = jest.spyOn(logger, 'error');
253+
254+
const mock = mocked(cleanupOrgRunners);
255+
const error = new Error('Error cleaning up org runners.');
256+
mock.mockRejectedValue(error);
257+
258+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
259+
expect(logSpyError).toHaveBeenCalledWith(expect.stringContaining(error.message), expect.anything());
260+
});
261+
});

lambdas/functions/control-plane/src/lambda.ts

+14
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { scaleDown } from './scale-runners/scale-down';
99
import { scaleUp } from './scale-runners/scale-up';
1010
import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1111
import { checkAndRetryJob } from './scale-runners/job-retry';
12+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1213

1314
export async function scaleUpHandler(event: SQSEvent, context: Context): Promise<void> {
1415
setContext(context, 'lambda.ts');
@@ -64,6 +65,8 @@ export const addMiddleware = () => {
6465
middy(scaleDownHandler).use(handler);
6566
middy(adjustPool).use(handler);
6667
middy(ssmHousekeeper).use(handler);
68+
middy(jobRetryCheck).use(handler);
69+
middy(cleanupOrgRunnersHandler).use(handler);
6770
};
6871
addMiddleware();
6972

@@ -91,3 +94,14 @@ export async function jobRetryCheck(event: SQSEvent, context: Context): Promise<
9194
}
9295
return Promise.resolve();
9396
}
97+
98+
export async function cleanupOrgRunnersHandler(event: unknown, context: Context): Promise<void> {
99+
setContext(context, 'lambda.ts');
100+
logger.logEventIfEnabled(event);
101+
102+
try {
103+
await cleanupOrgRunners();
104+
} catch (e) {
105+
logger.error(`${(e as Error).message}`, { error: e as Error });
106+
}
107+
}

0 commit comments

Comments
 (0)