1
1
"""
2
- AWS Lambda function to terminate orphaned GitLab runners.
2
+ AWS Lambda function to terminate orphaned GitLab runners and remove unused resources .
3
3
4
- This checks for running GitLab runner instances and terminates them,
5
- intended to be triggered by an ASG life cycle hook at instance termination.
4
+ - This checks for running GitLab runner instances and terminates them, intended to be triggered by an ASG life cycle hook at
5
+ instance termination.
6
+ - Removes all unused SSH keys
6
7
7
- https://github.com/npalm/terraform-aws-gitlab-runner/issues/317 has some
8
- discussion about this scenario.
8
+ https://github.com/npalm/terraform-aws-gitlab-runner/issues/317 has some discussion about this scenario.
9
9
10
10
This is rudimentary and doesn't check if a build runner has a current job.
11
11
"""
12
12
import boto3
13
+ import botocore
13
14
import json
15
+ import os
14
16
15
- def ec2_list (client , ** args ):
16
17
18
+ def ec2_list (client , ** args ):
17
19
print (json .dumps ({
18
20
"Level" : "info" ,
19
21
"Message" : f"Searching for children of GitLab runner instance { args ['parent' ]} "
20
22
}))
21
23
22
24
ec2_instances = client .describe_instances (Filters = [
23
- {
24
- "Name" : "instance-state-name" ,
25
- "Values" : ['running' , 'pending' ],
26
- },
27
- {
28
- "Name" : "tag:gitlab-runner-parent-id" ,
29
- "Values" : ["*" ]
30
- }
31
- ]).get ("Reservations" )
25
+ {
26
+ "Name" : "instance-state-name" ,
27
+ "Values" : ['running' , 'pending' ],
28
+ },
29
+ {
30
+ "Name" : "tag:gitlab-runner-parent-id" ,
31
+ "Values" : ["*" ]
32
+ }
33
+ ]).get ("Reservations" )
32
34
33
35
_terminate_list = []
34
36
for _instances in ec2_instances :
@@ -87,34 +89,92 @@ def ec2_list(client, **args):
87
89
88
90
return _terminate_list
89
91
92
+
93
+ def remove_unused_ssh_key_pairs (client , executor_name_part ):
94
+ print (json .dumps ({
95
+ "Level" : "info" ,
96
+ "Message" : f"Removing unused SSH key pairs for agent { executor_name_part } "
97
+ }))
98
+
99
+ # build list of SSH keys to keep
100
+ paginator = client .get_paginator ('describe_instances' )
101
+ reservations = paginator .paginate (Filters = [
102
+ {
103
+ "Name" : "key-name" ,
104
+ "Values" : ['runner-*' ],
105
+ },
106
+ {
107
+ "Name" : "instance-state-name" ,
108
+ "Values" : ['pending' , 'running' ],
109
+ },
110
+ ]).build_full_result ().get ("Reservations" )
111
+
112
+ used_key_pairs = []
113
+
114
+ for reservation in reservations :
115
+ for instance in reservation ["Instances" ]:
116
+ used_key_pairs .append (instance ['KeyName' ])
117
+
118
+ all_key_pairs = client .describe_key_pairs (Filters = [
119
+ {
120
+ "Name" : "key-name" ,
121
+ "Values" : ['runner-*' ],
122
+ },
123
+ ])
124
+
125
+ for key_pair in all_key_pairs ['KeyPairs' ]:
126
+ key_name = key_pair ['KeyName' ]
127
+
128
+ if key_name not in used_key_pairs :
129
+ # make sure to delete only those keys which belongs to our module
130
+ # unfortunately there are no tags set on the keys and GitLab runner is not able to do that
131
+ if executor_name_part in key_name :
132
+ try :
133
+ client .delete_key_pair (KeyName = key_name )
134
+
135
+ print (json .dumps ({
136
+ "Level" : "info" ,
137
+ "Message" : f"Key pair deleted: { key_name } "
138
+ }))
139
+ except botocore .exceptions .ClientError as error :
140
+ print (json .dumps ({
141
+ "Level" : "error" ,
142
+ "Message" : f"Unable to delete key pair: { key_name } " ,
143
+ "Exception" : str (error )
144
+ }))
145
+
146
+
90
147
def handler (event , context ):
91
148
response = []
92
149
event_detail = event ['detail' ]
93
150
client = boto3 .client ("ec2" , region_name = event ['region' ])
94
151
if event_detail ['LifecycleTransition' ] != "autoscaling:EC2_INSTANCE_TERMINATING" :
95
152
exit ()
96
153
97
- _terminate_list = ec2_list (client = client ,parent = event_detail ['EC2InstanceId' ])
154
+ # find the executors connected to this agent and terminate them as well
155
+ _terminate_list = ec2_list (client = client , parent = event_detail ['EC2InstanceId' ])
98
156
if len (_terminate_list ) > 0 :
99
157
print (json .dumps ({
100
158
"Level" : "info" ,
101
159
"Message" : f"Terminating instances { ', ' .join (_terminate_list )} "
102
160
}))
103
161
try :
104
162
client .terminate_instances (InstanceIds = _terminate_list , DryRun = False )
105
- return f"Terminated instances { ', ' .join (_terminate_list )} "
106
163
except Exception as e :
107
164
print (json .dumps ({
108
165
"Level" : "exception" ,
109
166
"Exception" : str (e )
110
167
}))
111
- raise Exception (f"Encountered exception when terminating instances: { str (e )} " )
112
168
else :
113
169
print (json .dumps ({
114
170
"Level" : "info" ,
115
171
"Message" : "No instances to terminate."
116
172
}))
117
- return "No instances to terminate."
173
+
174
+ remove_unused_ssh_key_pairs (client = client , executor_name_part = os .environ ['NAME_EXECUTOR_INSTANCE' ])
175
+
176
+ return f"Housekeeping done"
177
+
118
178
119
179
if __name__ == "__main__" :
120
- handler (None , None )
180
+ handler (None , None )
0 commit comments