Skip to content

Commit 8aaad0c

Browse files
feat: add docker autoscaler executor (#1118)
## Description Provides a new executor using the new GitLab autoscaler executor. I've been using the fleeting plugin for AWS only. Prerequisite: Docker must already be installed on the AMI used by worker machines (the Docker autoscaler does not install it, unlike the Docker machine). Additionally, the user used to connect to the workers must also be added to the Docker group. Related to issue #624 ## Verification Built an AMI with Docker based on Amazon Linux 2023. Set up the new executor according to the example. Works! --------- Co-authored-by: Matthias Kay <[email protected]>
1 parent 8e92bba commit 8aaad0c

30 files changed

+1002
-410
lines changed

.cspell.json

+8
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,22 @@
1010
"awscli",
1111
"boto",
1212
"botocore",
13+
"buildx",
1314
"certdir",
1415
"checkmarx",
1516
"concat",
17+
"containerd",
1618
"devskim",
1719
"dind",
20+
"dpkg",
1821
"endfor",
1922
"filesha",
2023
"formatlist",
2124
"glrt",
2225
"glrunners",
2326
"hmarr",
2427
"instancelifecycle",
28+
"keyrings",
2529
"kics",
2630
"joho",
2731
"jsonencode",
@@ -57,22 +61,26 @@
5761
"tonumber",
5862
"trimprefix",
5963
"trivy",
64+
"usermod",
6065
"userns",
6166
"xanzy",
6267
"xvda"
6368
],
6469
"words": [
6570
"aquasecurity",
6671
"automerge",
72+
"autoscaler",
6773
"backports",
6874
"blockquotes",
75+
"bluegreen",
6976
"codeowners",
7077
"cpu",
7178
"cpus",
7279
"cpuset",
7380
"gitter",
7481
"Niek",
7582
"oxsecurity",
83+
"rebalance",
7684
"signoff",
7785
"typecheck",
7886
"userdata",

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
[
4040
"runner-default",
4141
"runner-docker",
42-
"runner-pre-registered",
42+
"runner-fleeting-plugin",
4343
"runner-public",
4444
"runner-certificates",
4545
]

data.tf

+16
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,19 @@ data "aws_ami" "docker-machine" {
4141

4242
owners = var.runner_worker_docker_machine_ami_owners
4343
}
44+
45+
data "aws_ami" "docker-autoscaler" {
46+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
47+
48+
most_recent = "true"
49+
50+
dynamic "filter" {
51+
for_each = var.runner_worker_docker_autoscaler_ami_filter
52+
content {
53+
name = filter.key
54+
values = filter.value
55+
}
56+
}
57+
58+
owners = var.runner_worker_docker_autoscaler_ami_owners
59+
}

docker_autoscaler.tf

+198
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
#
2+
# This file is responsible for creating the resources needed to run the docker autoscaler plugin from GitLab. It replaces the
3+
# outdated docker+machine driver. The docker+machine driver is a legacy driver that is no longer maintained by GitLab.
4+
#
5+
6+
resource "aws_security_group" "docker_autoscaler" {
7+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
8+
9+
description = "Docker autoscaler security group"
10+
vpc_id = var.vpc_id
11+
name = "${local.name_sg}-docker-autoscaler"
12+
13+
tags = merge(
14+
local.tags,
15+
{
16+
"Name" = format("%s", local.name_sg)
17+
},
18+
)
19+
}
20+
21+
resource "aws_security_group_rule" "autoscaler_egress" {
22+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
23+
24+
description = "All egress traffic docker autoscaler"
25+
type = "egress"
26+
from_port = 0
27+
to_port = 0
28+
protocol = "-1"
29+
cidr_blocks = ["0.0.0.0/0"]
30+
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
31+
}
32+
33+
resource "aws_security_group_rule" "autoscaler_ingress" {
34+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
35+
36+
description = "All ingress traffic from runner security group"
37+
type = "ingress"
38+
from_port = 0
39+
to_port = 0
40+
protocol = "-1"
41+
source_security_group_id = aws_security_group.runner.id
42+
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
43+
}
44+
45+
resource "aws_security_group_rule" "extra_autoscaler_ingress" {
46+
count = var.runner_worker.type == "docker-autoscaler" ? length(var.runner_worker_docker_autoscaler_asg.sg_ingresses) : 0
47+
48+
description = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].description
49+
type = "ingress"
50+
from_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].from_port
51+
to_port = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].to_port
52+
protocol = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].protocol
53+
cidr_blocks = var.runner_worker_docker_autoscaler_asg.sg_ingresses[count.index].cidr_blocks
54+
security_group_id = join("", aws_security_group.docker_autoscaler[*].id)
55+
}
56+
57+
####################################
58+
###### Launch template Workers #####
59+
####################################
60+
resource "aws_launch_template" "this" {
61+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
62+
63+
name = "${local.name_runner_agent_instance}-worker-launch-template"
64+
user_data = base64gzip(var.runner_worker_docker_autoscaler_instance.start_script)
65+
image_id = data.aws_ami.docker-autoscaler[0].id
66+
instance_type = var.runner_worker_docker_autoscaler_asg.types[0]
67+
key_name = aws_key_pair.autoscaler[0].key_name
68+
ebs_optimized = var.runner_worker_docker_autoscaler_instance.ebs_optimized
69+
70+
monitoring {
71+
enabled = var.runner_worker_docker_autoscaler_instance.monitoring
72+
}
73+
74+
iam_instance_profile {
75+
name = aws_iam_instance_profile.docker_autoscaler[0].name
76+
}
77+
78+
network_interfaces {
79+
security_groups = [aws_security_group.docker_autoscaler[0].id]
80+
associate_public_ip_address = !var.runner_worker_docker_autoscaler_instance.private_address_only
81+
}
82+
83+
block_device_mappings {
84+
device_name = var.runner_worker_docker_autoscaler_instance.root_device_name
85+
86+
ebs {
87+
volume_size = var.runner_worker_docker_autoscaler_instance.root_size
88+
volume_type = var.runner_worker_docker_autoscaler_instance.volume_type
89+
iops = contains(["gp3", "io1", "io2"], var.runner_worker_docker_autoscaler_instance.volume_type) ? var.runner_worker_docker_autoscaler_instance.volume_iops : null
90+
throughput = var.runner_worker_docker_autoscaler_instance.volume_type == "gp3" ? var.runner_worker_docker_autoscaler_instance.volume_throughput : null
91+
}
92+
}
93+
94+
tag_specifications {
95+
resource_type = "instance"
96+
tags = local.tags
97+
}
98+
99+
tag_specifications {
100+
resource_type = "volume"
101+
tags = local.tags
102+
}
103+
104+
tags = local.tags
105+
106+
metadata_options {
107+
http_tokens = var.runner_worker_docker_autoscaler_instance.http_tokens
108+
http_put_response_hop_limit = var.runner_worker_docker_autoscaler_instance.http_put_response_hop_limit
109+
instance_metadata_tags = "enabled"
110+
}
111+
112+
lifecycle {
113+
create_before_destroy = true
114+
}
115+
}
116+
117+
#########################################
118+
# Autoscaling group with launch template
119+
#########################################
120+
# false positive, tags are created with "dynamic" block
121+
# kics-scan ignore-line
122+
resource "aws_autoscaling_group" "autoscaler" {
123+
count = var.runner_worker.type == "docker-autoscaler" ? 1 : 0
124+
125+
name = "${local.name_runner_agent_instance}-asg"
126+
capacity_rebalance = false
127+
protect_from_scale_in = true
128+
129+
dynamic "launch_template" {
130+
for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [] : [1]
131+
content {
132+
id = aws_launch_template.this[0].id
133+
version = aws_launch_template.this[0].latest_version
134+
}
135+
}
136+
137+
dynamic "mixed_instances_policy" {
138+
for_each = var.runner_worker_docker_autoscaler_asg.enable_mixed_instances_policy ? [1] : []
139+
140+
content {
141+
instances_distribution {
142+
on_demand_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_base_capacity
143+
on_demand_percentage_above_base_capacity = var.runner_worker_docker_autoscaler_asg.on_demand_percentage_above_base_capacity
144+
spot_allocation_strategy = var.runner_worker_docker_autoscaler_asg.spot_allocation_strategy
145+
spot_instance_pools = var.runner_worker_docker_autoscaler_asg.spot_instance_pools
146+
}
147+
launch_template {
148+
launch_template_specification {
149+
launch_template_id = aws_launch_template.this[0].id
150+
version = aws_launch_template.this[0].latest_version
151+
}
152+
dynamic "override" {
153+
for_each = var.runner_worker_docker_autoscaler_asg.types
154+
content {
155+
instance_type = override.value
156+
}
157+
}
158+
}
159+
}
160+
}
161+
162+
dynamic "instance_refresh" {
163+
for_each = var.runner_worker_docker_autoscaler_asg.upgrade_strategy == "rolling" ? [1] : []
164+
content {
165+
strategy = "Rolling"
166+
preferences {
167+
min_healthy_percentage = var.runner_worker_docker_autoscaler_asg.instance_refresh_min_healthy_percentage
168+
}
169+
triggers = var.runner_worker_docker_autoscaler_asg.instance_refresh_triggers
170+
}
171+
}
172+
173+
vpc_zone_identifier = var.runner_worker_docker_autoscaler_asg.subnet_ids
174+
max_size = var.runner_worker.max_jobs
175+
min_size = 0
176+
desired_capacity = 0 # managed by the fleeting plugin
177+
health_check_grace_period = var.runner_worker_docker_autoscaler_asg.health_check_grace_period
178+
health_check_type = var.runner_worker_docker_autoscaler_asg.health_check_type
179+
force_delete = true
180+
181+
dynamic "tag" {
182+
for_each = local.tags
183+
content {
184+
key = tag.key
185+
value = tag.value
186+
propagate_at_launch = true
187+
}
188+
}
189+
190+
lifecycle {
191+
# do not change these values as we would immediately scale up/down, which is not wanted
192+
ignore_changes = [
193+
desired_capacity,
194+
min_size,
195+
max_size
196+
]
197+
}
198+
}

docs/usage.md

+73
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,79 @@ module "runner" {
144144
}
145145
```
146146

147+
### Scenario: Use of Docker autoscaler
148+
149+
As docker machine is no longer maintained by docker, gitlab recently developed docker autoscaler to replace docker machine (still in beta). An option is available to test it out.
150+
151+
Tested with amazon-linux-2-x86 as runner manager and ubuntu-server-22-lts-x86 for runner worker. The following commands have been added to the original AMI for the runner worker for the docker-autoscaler to work correctly:
152+
153+
```bash
154+
# Install docker
155+
# Add Docker's official GPG key:
156+
apt-get update
157+
apt-get install -y ca-certificates curl
158+
install -m 0755 -d /etc/apt/keyrings
159+
curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
160+
chmod a+r /etc/apt/keyrings/docker.asc
161+
162+
# Add the repository to Apt sources:
163+
echo \
164+
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
165+
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
166+
tee /etc/apt/sources.list.d/docker.list > /dev/null
167+
apt-get update
168+
169+
apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
170+
usermod -aG docker ubuntu
171+
```
172+
173+
#### Configuration example
174+
175+
```hcl
176+
module "runner" {
177+
# https://registry.terraform.io/modules/cattle-ops/gitlab-runner/aws/
178+
source = "cattle-ops/gitlab-runner/aws"
179+
180+
vpc_id = module.vpc.vpc_id
181+
subnet_id = element(module.vpc.private_subnets, 0)
182+
183+
runner_gitlab = {
184+
tag_list = "runner_worker"
185+
type = "instance"
186+
url = "https://gitlab.com"
187+
188+
preregistered_runner_token_ssm_parameter_name = "my-gitlab-runner-token-ssm-parameter-name"
189+
}
190+
191+
runner_manager = {
192+
maximum_concurrent_jobs = 5
193+
}
194+
195+
runner_worker = {
196+
max_jobs = 5
197+
request_concurrency = 5
198+
type = "docker_autoscaler"
199+
}
200+
201+
runner_worker_docker_autoscaler_asg = {
202+
on_demand_percentage_above_base_capacity = 0
203+
enable_mixed_instances_policy = true
204+
idle_time = 600
205+
subnet_ids = vpc.private_subnets_ids
206+
types = ["t3a.medium", "t3.medium"]
207+
volume_type = "gp3"
208+
private_address_only = true
209+
ebs_optimized = true
210+
root_size = 40
211+
}
212+
213+
runner_worker_docker_autoscaler = {
214+
connector_config_user = "ubuntu"
215+
}
216+
217+
}
218+
```
219+
147220
## Examples
148221

149222
A few [examples](https://github.com/cattle-ops/terraform-aws-gitlab-runner/tree/main/examples/) are provided. Use the

examples/README.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
Please see the readme in per example for more details:
44

5+
- [runner-certificates](runner-certificates/) Example showing how to add custom TLS certificates to the runner
56
- [runner-default](runner-default/) The default setup, private subnet, auto register, runner on spot instances.
6-
- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances.
77
- [runner-docker](runner-docker/) Runners run on the same instance as the agent.
8-
- [runner-pre-registered](runner-pre-registered) Runner needs to be preregistered, old setup DEPRECATED.
8+
- [runner-fleeting](runner-fleeting-plugin/) Runners using the AWS fleeting plugin
9+
- [runner-public](runner-public/) Runner in a public subnet, auto register, runner on spot instances.
10+

examples/runner-certificates/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Create a PEM-encoded `.crt` file containing the public certificate of your Gitla
3333
```hcl
3434
module {
3535
# ...
36-
# Public cert of my companys gitlab instance
36+
# Public cert of my company's gitlab instance
3737
runner_gitlab = {
3838
certificate = file("${path.module}/my_gitlab_instance_cert.crt")
3939
}

examples/runner-default/generated/.gitkeep

Whitespace-only changes.

0 commit comments

Comments
 (0)