Skip to content

feat(runners): add support for looking up pre-built runner AMI ID from an SSM parameter at instance launch time #2520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 31, 2022
Merged
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ export interface GithubWorkflowEvent {
This extendible format allows to add more fields to be added if needed.
You can configure the queue by setting properties to `workflow_job_events_queue_config`

NOTE: By default, a runner AMI update requires a re-apply of this terraform config (the runner AMI ID is looked up by a terraform data source). To avoid this, you can use `ami_id_ssm_parameter_name` to have the scale-up lambda dynamically lookup the runner AMI ID from an SSM parameter at instance launch time. Said SSM parameter is managed outside of this module (e.g. by a runner AMI build workflow).

## Examples

Examples are located in the [examples](./examples) directory. The following examples are provided:
Expand Down Expand Up @@ -419,6 +421,7 @@ We welcome any improvement to the standard module to make the default as secure
|------|-------------|------|---------|:--------:|
| <a name="input_ami_filter"></a> [ami\_filter](#input\_ami\_filter) | List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used. | `map(list(string))` | `null` | no |
| <a name="input_ami_owners"></a> [ami\_owners](#input\_ami\_owners) | The list of owners used to select the AMI of action runner instances. | `list(string)` | <pre>[<br> "amazon"<br>]</pre> | no |
| <a name="input_ami_id_ssm_parameter_name"></a> [ami\_id\_ssm\_parameter\_name](#input\_ami\_id\_ssm\_parameter\_name) | Optional SSM parameter that contains the runner AMI ID to launch instances from. Overrides `ami_filter`. The parameter value is managed outside of this module (e.g. in a runner AMI build workflow). This allows for AMI updates without having to re-apply this terraform config. | `string` | `null` | no |
| <a name="input_aws_partition"></a> [aws\_partition](#input\_aws\_partition) | (optiona) partition in the arn namespace to use if not 'aws' | `string` | `"aws"` | no |
| <a name="input_aws_region"></a> [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes |
| <a name="input_block_device_mappings"></a> [block\_device\_mappings](#input\_block\_device\_mappings) | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`. | <pre>list(object({<br> delete_on_termination = bool<br> device_name = string<br> encrypted = bool<br> iops = number<br> kms_key_id = string<br> snapshot_id = string<br> throughput = number<br> volume_size = number<br> volume_type = string<br> }))</pre> | <pre>[<br> {<br> "delete_on_termination": true,<br> "device_name": "/dev/xvda",<br> "encrypted": true,<br> "iops": null,<br> "kms_key_id": null,<br> "snapshot_id": null,<br> "throughput": null,<br> "volume_size": 30,<br> "volume_type": "gp3"<br> }<br>]</pre> | no |
Expand Down
4 changes: 4 additions & 0 deletions examples/prebuilt/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ module "runners" {
ami_filter = { name = [var.ami_name_filter] }
ami_owners = [data.aws_caller_identity.current.account_id]

# Look up runner AMI ID from an AWS SSM parameter (overrides ami_filter at instance launch time)
# NOTE: the parameter must be managed outside of this module (e.g. in a runner AMI build workflow)
# ami_id_ssm_parameter_name = "my-runner-ami-id"

# disable binary syncer since github agent is already installed in the AMI.
enable_runner_binaries_syncer = false

Expand Down
7 changes: 4 additions & 3 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,10 @@ module "runners" {
instance_max_spot_price = var.instance_max_spot_price
block_device_mappings = var.block_device_mappings

runner_architecture = var.runner_architecture
ami_filter = var.ami_filter
ami_owners = var.ami_owners
runner_architecture = var.runner_architecture
ami_filter = var.ami_filter
ami_owners = var.ami_owners
ami_id_ssm_parameter_name = var.ami_id_ssm_parameter_name

sqs_build_queue = aws_sqs_queue.queued_builds
github_app_parameters = local.github_app_parameters
Expand Down
58 changes: 56 additions & 2 deletions modules/runners/lambdas/runners/src/aws/runners.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import ScaleError from './../scale-runners/ScaleError';
import { RunnerInfo, RunnerInputParameters, createRunner, listEC2Runners, terminateRunner } from './runners';

const mockEC2 = { describeInstances: jest.fn(), createFleet: jest.fn(), terminateInstances: jest.fn() };
const mockSSM = { putParameter: jest.fn() };
const mockSSM = { putParameter: jest.fn(), getParameter: jest.fn() };
jest.mock('aws-sdk', () => ({
EC2: jest.fn().mockImplementation(() => mockEC2),
SSM: jest.fn().mockImplementation(() => mockSSM),
Expand Down Expand Up @@ -170,6 +170,8 @@ describe('terminate runner', () => {
describe('create runner', () => {
const mockCreateFleet = { promise: jest.fn() };
const mockPutParameter = { promise: jest.fn() };
const mockGetParameter = { promise: jest.fn() };

const defaultRunnerConfig: RunnerConfig = {
allocationStrategy: 'capacity-optimized',
capacityType: 'spot',
Expand All @@ -191,6 +193,8 @@ describe('create runner', () => {
Instances: [{ InstanceIds: ['i-1234'] }],
});
mockSSM.putParameter.mockImplementation(() => mockPutParameter);

mockSSM.getParameter.mockImplementation(() => mockGetParameter);
});

it('calls create fleet of 1 instance with the correct config for repo', async () => {
Expand Down Expand Up @@ -259,6 +263,21 @@ describe('create runner', () => {
await expect(createRunner(createRunnerConfig(defaultRunnerConfig))).rejects.toThrowError(Error);
expect(mockSSM.putParameter).not.toBeCalled();
});

it('uses ami id from ssm parameter when ami id ssm param is specified', async () => {
const paramValue: AWS.SSM.GetParameterResult = {
Parameter: {
Value: 'ami-123',
},
};
mockGetParameter.promise.mockReturnValue(paramValue);
await createRunner(createRunnerConfig({ ...defaultRunnerConfig, amiIdSsmParameterName: 'my-ami-id-param' }));
const expectedRequest = expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, imageId: 'ami-123' });
expect(mockEC2.createFleet).toBeCalledWith(expectedRequest);
expect(mockSSM.getParameter).toBeCalledWith({
Name: 'my-ami-id-param',
});
});
});

describe('create runner with errors', () => {
Expand All @@ -279,6 +298,10 @@ describe('create runner with errors', () => {
const mockPutParameter = { promise: jest.fn() };

mockSSM.putParameter.mockImplementation(() => mockPutParameter);

const mockGetParameter = { promise: jest.fn() };

mockSSM.getParameter.mockImplementation(() => mockGetParameter);
});

it('test ScaleError with one error.', async () => {
Expand Down Expand Up @@ -326,6 +349,22 @@ describe('create runner with errors', () => {
expect(mockEC2.createFleet).toBeCalledWith(expectedCreateFleetRequest(defaultExpectedFleetRequestValues));
expect(mockSSM.putParameter).not.toBeCalled();
});

it('test error in ami id lookup from ssm parameter', async () => {
mockSSM.getParameter.mockImplementation(() => {
return {
promise: jest.fn().mockImplementation(() => {
throw Error('Wow, such transient');
}),
};
});

await expect(
createRunner(createRunnerConfig({ ...defaultRunnerConfig, amiIdSsmParameterName: 'my-ami-id-param' })),
).rejects.toBeInstanceOf(Error);
expect(mockEC2.createFleet).not.toBeCalled();
expect(mockSSM.putParameter).not.toBeCalled();
});
});

function createFleetMockWithErrors(errors: string[], instances?: string[]) {
Expand Down Expand Up @@ -354,6 +393,7 @@ interface RunnerConfig {
capacityType: EC2.DefaultTargetCapacityType;
allocationStrategy: EC2.AllocationStrategy;
maxSpotPrice?: string;
amiIdSsmParameterName?: string;
}

function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
Expand All @@ -370,6 +410,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
instanceAllocationStrategy: runnerConfig.allocationStrategy,
},
subnets: ['subnet-123', 'subnet-456'],
amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName,
};
}

Expand All @@ -379,10 +420,11 @@ interface ExpectedFleetRequestValues {
allocationStrategy: EC2.AllocationStrategy;
maxSpotPrice?: string;
totalTargetCapacity: number;
imageId?: string;
}

function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): AWS.EC2.CreateFleetRequest {
return {
const request: AWS.EC2.CreateFleetRequest = {
LaunchTemplateConfigs: [
{
LaunchTemplateSpecification: {
Expand Down Expand Up @@ -429,4 +471,16 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues):
},
Type: 'instant',
};

if (expectedValues.imageId) {
for (const config of request.LaunchTemplateConfigs) {
if (config.Overrides) {
for (const override of config.Overrides) {
override.ImageId = expectedValues.imageId;
}
}
}
}

return request;
}
42 changes: 37 additions & 5 deletions modules/runners/lambdas/runners/src/aws/runners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface RunnerInputParameters {
instanceAllocationStrategy: EC2.SpotAllocationStrategy;
};
numberOfRunners?: number;
amiIdSsmParameterName?: string;
}

export async function listEC2Runners(filters: ListRunnerFilters | undefined = undefined): Promise<RunnerList[]> {
Expand Down Expand Up @@ -107,17 +108,27 @@ export async function terminateRunner(instanceId: string): Promise<void> {
logger.info(`Runner ${instanceId} has been terminated.`, LogFields.print());
}

function generateFleeOverrides(
function generateFleetOverrides(
subnetIds: string[],
instancesTypes: string[],
amiId?: string,
): EC2.FleetLaunchTemplateOverridesListRequest {
type Override = {
SubnetId: string;
InstanceType: string;
ImageId?: string;
};
const result: EC2.FleetLaunchTemplateOverridesListRequest = [];
subnetIds.forEach((s) => {
instancesTypes.forEach((i) => {
result.push({
const item: Override = {
SubnetId: s,
InstanceType: i,
});
};
if (amiId) {
item.ImageId = amiId;
}
result.push(item);
});
});
return result;
Expand All @@ -127,6 +138,27 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro
logger.debug('Runner configuration: ' + JSON.stringify(runnerParameters), LogFields.print());

const ec2 = new EC2();
const ssm = new SSM();

let amiIdOverride = undefined;

if (runnerParameters.amiIdSsmParameterName) {
logger.debug(`Looking up runner AMI ID from SSM parameter: ${runnerParameters.amiIdSsmParameterName}`);
try {
const result: AWS.SSM.GetParameterResult = await ssm
.getParameter({ Name: runnerParameters.amiIdSsmParameterName })
.promise();
amiIdOverride = result.Parameter?.Value;
} catch (e) {
logger.error(
`Failed to lookup runner AMI ID from SSM parameter: ${runnerParameters.amiIdSsmParameterName}. ` +
'Please ensure that the given parameter exists on this region and contains a valid runner AMI ID',
e,
);
throw e;
}
}

const numberOfRunners = runnerParameters.numberOfRunners ? runnerParameters.numberOfRunners : 1;

let fleet: AWS.EC2.CreateFleetResult;
Expand All @@ -140,9 +172,10 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro
LaunchTemplateName: runnerParameters.launchTemplateName,
Version: '$Default',
},
Overrides: generateFleeOverrides(
Overrides: generateFleetOverrides(
runnerParameters.subnets,
runnerParameters.ec2instanceCriteria.instanceTypes,
amiIdOverride,
),
},
],
Expand Down Expand Up @@ -202,7 +235,6 @@ export async function createRunner(runnerParameters: RunnerInputParameters): Pro

logger.info('Created instance(s): ', instances.join(','), LogFields.print());

const ssm = new SSM();
for (const instance of instances) {
await ssm
.putParameter({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ describe('scaleUp with GHES', () => {
];
expect(createRunner).toBeCalledWith(expectedRunnerParams);
});

it('creates a runner with ami id override from ssm parameter', async () => {
process.env.AMI_ID_SSM_PARAMETER_NAME = 'my-ami-id-param';
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
expect(createRunner).toBeCalledWith({ ...expectedRunnerParams, amiIdSsmParameterName: 'my-ami-id-param' });
});
});

describe('on repo level', () => {
Expand Down
3 changes: 3 additions & 0 deletions modules/runners/lambdas/runners/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ interface CreateEC2RunnerConfig {
launchTemplateName: string;
ec2instanceCriteria: RunnerInputParameters['ec2instanceCriteria'];
numberOfRunners?: number;
amiIdSsmParameterName?: string;
}

function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfig, token: string) {
Expand Down Expand Up @@ -159,6 +160,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
const instanceMaxSpotPrice = process.env.INSTANCE_MAX_SPOT_PRICE;
const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default
const enableJobQueuedCheck = yn(process.env.ENABLE_JOB_QUEUED_CHECK, { default: true });
const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME;

if (ephemeralEnabled && payload.eventType !== 'workflow_job') {
logger.warn(
Expand Down Expand Up @@ -222,6 +224,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
environment,
launchTemplateName,
subnets,
amiIdSsmParameterName,
},
githubInstallationClient,
);
Expand Down
23 changes: 23 additions & 0 deletions modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ resource "aws_lambda_function" "scale_up" {
RUNNER_GROUP_NAME = var.runner_group_name
RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count
SUBNET_IDS = join(",", var.subnet_ids)
AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name
}
}

Expand Down Expand Up @@ -110,3 +111,25 @@ resource "aws_iam_role_policy_attachment" "scale_up_vpc_execution_role" {
role = aws_iam_role.scale_up.name
policy_arn = "arn:${var.aws_partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole"
}

resource "aws_iam_role_policy" "ami_id_ssm_parameter_read" {
count = var.ami_id_ssm_parameter_name != null ? 1 : 0
name = "${var.prefix}-ami-id-ssm-parameter-read"
role = aws_iam_role.scale_up.name
policy = <<-JSON
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ssm:GetParameter"
],
"Resource": [
"arn:aws:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter/${trimprefix(var.ami_id_ssm_parameter_name, "/")}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the partition might need to be parameterized here, so that it can run in the various govcloud implementations
(I believe it's already part of some of the options passed in to various modules, maybe not this one)

]
}
]
}
JSON
}
6 changes: 6 additions & 0 deletions modules/runners/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ variable "ami_owners" {
default = ["amazon"]
}

variable "ami_id_ssm_parameter_name" {
description = "Externally managed SSM parameter (of data type aws:ec2:image) that contains the AMI ID to launch runner instances from. Overrides ami_filter"
type = string
default = null
}

variable "enabled_userdata" {
description = "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI"
type = bool
Expand Down
8 changes: 8 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -302,11 +302,19 @@ variable "ami_filter" {
type = map(list(string))
default = null
}

variable "ami_owners" {
description = "The list of owners used to select the AMI of action runner instances."
type = list(string)
default = ["amazon"]
}

variable "ami_id_ssm_parameter_name" {
description = "Externally managed SSM parameter (of data type aws:ec2:image) that contains the AMI ID to launch runner instances from. Overrides ami_filter"
type = string
default = null
}

variable "lambda_s3_bucket" {
description = "S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly."
default = null
Expand Down