|
15 | 15 | import os
|
16 | 16 |
|
17 | 17 |
|
| 18 | + |
18 | 19 | def ec2_list(client, **args):
|
19 | 20 | print(json.dumps({
|
20 | 21 | "Level": "info",
|
@@ -90,6 +91,63 @@ def ec2_list(client, **args):
|
90 | 91 | return _terminate_list
|
91 | 92 |
|
92 | 93 |
|
| 94 | +def cancel_active_spot_requests(ec2_client, executor_name_part): |
| 95 | + print(json.dumps({ |
| 96 | + "Level": "info", |
| 97 | + "Message": f"Removing open spot requests for environment {executor_name_part}" |
| 98 | + })) |
| 99 | + |
| 100 | + spot_requests_to_cancel = [] |
| 101 | + |
| 102 | + next_token = '' |
| 103 | + has_more_spot_requests = True |
| 104 | + |
| 105 | + while has_more_spot_requests: |
| 106 | + response = ec2_client.describe_spot_instance_requests(Filters=[ |
| 107 | + { |
| 108 | + "Name": "state", |
| 109 | + "Values": ['active', 'open'] |
| 110 | + }, |
| 111 | + { |
| 112 | + "Name": "launch.key-name", |
| 113 | + "Values": ["runner-*"] |
| 114 | + } |
| 115 | + ], MaxResults=1000, NextToken=next_token) |
| 116 | + |
| 117 | + for spot_request in response["SpotInstanceRequests"]: |
| 118 | + if executor_name_part in spot_request["LaunchSpecification"]["KeyName"]: |
| 119 | + spot_requests_to_cancel.append(spot_request["SpotInstanceRequestId"]) |
| 120 | + |
| 121 | + print(json.dumps({ |
| 122 | + "Level": "info", |
| 123 | + "Message": f"Identified spot request {spot_request['SpotInstanceRequestId']}" |
| 124 | + })) |
| 125 | + |
| 126 | + if 'NextToken' in response and response['NextToken']: |
| 127 | + next_token = response['NextToken'] |
| 128 | + else: |
| 129 | + has_more_spot_requests = False |
| 130 | + |
| 131 | + if spot_requests_to_cancel: |
| 132 | + try: |
| 133 | + ec2_client.cancel_spot_instance_requests(SpotInstanceRequestIds=spot_requests_to_cancel) |
| 134 | + |
| 135 | + print(json.dumps({ |
| 136 | + "Level": "info", |
| 137 | + "Message": "Spot requests deleted" |
| 138 | + })) |
| 139 | + except Exception as e: |
| 140 | + print(json.dumps({ |
| 141 | + "Level": "exception", |
| 142 | + "Message": "Bulk cancelling spot requests failed", |
| 143 | + "Exception": str(e) |
| 144 | + })) |
| 145 | + else: |
| 146 | + print(json.dumps({ |
| 147 | + "Level": "info", |
| 148 | + "Message": "No spot requests to cancel" |
| 149 | + })) |
| 150 | + |
93 | 151 | def remove_unused_ssh_key_pairs(client, executor_name_part):
|
94 | 152 | print(json.dumps({
|
95 | 153 | "Level": "info",
|
@@ -147,19 +205,31 @@ def remove_unused_ssh_key_pairs(client, executor_name_part):
|
147 | 205 | def handler(event, context):
|
148 | 206 | response = []
|
149 | 207 | event_detail = event['detail']
|
150 |
| - client = boto3.client("ec2", region_name=event['region']) |
| 208 | + |
151 | 209 | if event_detail['LifecycleTransition'] != "autoscaling:EC2_INSTANCE_TERMINATING":
|
152 | 210 | exit()
|
153 | 211 |
|
| 212 | + client = boto3.client("ec2", region_name=event['region']) |
| 213 | + |
| 214 | + # make sure that no new instances are created |
| 215 | + cancel_active_spot_requests(ec2_client=client, executor_name_part=os.environ['NAME_EXECUTOR_INSTANCE']) |
| 216 | + |
154 | 217 | # find the executors connected to this agent and terminate them as well
|
155 | 218 | _terminate_list = ec2_list(client=client, parent=event_detail['EC2InstanceId'])
|
| 219 | + |
156 | 220 | if len(_terminate_list) > 0:
|
157 | 221 | print(json.dumps({
|
158 | 222 | "Level": "info",
|
159 | 223 | "Message": f"Terminating instances {', '.join(_terminate_list)}"
|
160 | 224 | }))
|
161 | 225 | try:
|
162 | 226 | client.terminate_instances(InstanceIds=_terminate_list, DryRun=False)
|
| 227 | + |
| 228 | + print(json.dumps({ |
| 229 | + "Level": "info", |
| 230 | + "Message": "Instances terminated" |
| 231 | + })) |
| 232 | + |
163 | 233 | except Exception as e:
|
164 | 234 | print(json.dumps({
|
165 | 235 | "Level": "exception",
|
|
0 commit comments