From fa9eaa3346993d0bad55af2fd6cf783130d804ff Mon Sep 17 00:00:00 2001
From: pete911
Date: Fri, 19 May 2023 20:54:11 +0100
Subject: [PATCH 1/2] add imds asg target lifecycle hook
---
README.md | 2 +-
cmd/node-termination-handler.go | 6 +
.../templates/daemonset.linux.yaml | 2 +
.../templates/daemonset.windows.yaml | 2 +
.../aws-node-termination-handler/values.yaml | 3 +
pkg/config/config.go | 4 +
pkg/config/config_test.go | 7 +
pkg/ec2metadata/ec2metadata.go | 24 ++++
pkg/ec2metadata/ec2metadata_test.go | 85 ++++++++++++
.../asglifecycle/asg-lifecycle-monitor.go | 102 ++++++++++++++
.../asg-lifecycle-monitor_internal_test.go | 99 ++++++++++++++
.../asg-lifecycle-monitor_test.go | 125 ++++++++++++++++++
12 files changed, 460 insertions(+), 1 deletion(-)
create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor.go
create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go
create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go
diff --git a/README.md b/README.md
index 0c3437b2..6807611f 100644
--- a/README.md
+++ b/README.md
@@ -75,8 +75,8 @@ Must be deployed as a Kubernetes **Deployment**. Also requires some **additional
| Spot Instance Termination Notifications (ITN) | ✅ | ✅ |
| Scheduled Events | ✅ | ✅ |
| Instance Rebalance Recommendation | ✅ | ✅ |
+| ASG Termination Lifecycle Hooks | ✅ | ✅ |
| AZ Rebalance Recommendation | ❌ | ✅ |
-| ASG Termination Lifecycle Hooks | ❌ | ✅ |
| Instance State Change Events | ❌ | ✅ |
diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go
index 9f7dcaf1..97f33870 100644
--- a/cmd/node-termination-handler.go
+++ b/cmd/node-termination-handler.go
@@ -16,6 +16,7 @@ package main
import (
"context"
"fmt"
+ "github.com/aws/aws-node-termination-handler/pkg/monitor/asglifecycle"
"os"
"os/signal"
"strings"
@@ -50,6 +51,7 @@ import (
const (
scheduledMaintenance = "Scheduled Maintenance"
spotITN = "Spot ITN"
+ asgLifecycle = "ASG Lifecycle"
rebalanceRecommendation = "Rebalance Recommendation"
sqsEvents = "SQS Event"
timeFormat = "2006/01/02 15:04:05"
@@ -172,6 +174,10 @@ func main() {
imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[spotITN] = imdsSpotMonitor
}
+ if nthConfig.EnableASGLifecycleDraining {
+ asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
+ monitoringFns[asgLifecycle] = asgLifecycleMonitor
+ }
if nthConfig.EnableScheduledEventDraining {
imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName)
monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor
diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml
index 95e4b50f..be6385de 100644
--- a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml
+++ b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml
@@ -143,6 +143,8 @@ spec:
{{- end }}
- name: ENABLE_SPOT_INTERRUPTION_DRAINING
value: {{ .Values.enableSpotInterruptionDraining | quote }}
+ - name: ENABLE_ASG_LIFECYCLE_DRAINING
+ value: {{ .Values.enableASGLifecycleDraining | quote }}
- name: ENABLE_SCHEDULED_EVENT_DRAINING
value: {{ .Values.enableScheduledEventDraining | quote }}
- name: ENABLE_REBALANCE_MONITORING
diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml
index 8a9db7bf..95af69d1 100644
--- a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml
+++ b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml
@@ -143,6 +143,8 @@ spec:
{{- end }}
- name: ENABLE_SPOT_INTERRUPTION_DRAINING
value: {{ .Values.enableSpotInterruptionDraining | quote }}
+ - name: ENABLE_ASG_LIFECYCLE_DRAINING
+ value: {{ .Values.enableASGLifecycleDraining | quote }}
- name: ENABLE_SCHEDULED_EVENT_DRAINING
value: {{ .Values.enableScheduledEventDraining | quote }}
- name: ENABLE_REBALANCE_MONITORING
diff --git a/config/helm/aws-node-termination-handler/values.yaml b/config/helm/aws-node-termination-handler/values.yaml
index 3a4d5db4..864c5f96 100644
--- a/config/helm/aws-node-termination-handler/values.yaml
+++ b/config/helm/aws-node-termination-handler/values.yaml
@@ -268,6 +268,9 @@ metadataTries: 3
# enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received. Only used in IMDS mode.
enableSpotInterruptionDraining: true
+# enableASGLifecycleDraining If false, do not drain nodes when ASG target lifecycle state Terminated is received. Only used in IMDS mode.
+enableASGLifecycleDraining: true
+
# enableScheduledEventDraining If false, do not drain nodes before the maintenance window starts for an EC2 instance scheduled event. Only used in IMDS mode.
enableScheduledEventDraining: true
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 4eabaf00..3fd79c25 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -53,6 +53,8 @@ const (
enableScheduledEventDrainingDefault = true
enableSpotInterruptionDrainingConfigKey = "ENABLE_SPOT_INTERRUPTION_DRAINING"
enableSpotInterruptionDrainingDefault = true
+ enableASGLifecycleDrainingConfigKey = "ENABLE_ASG_LIFECYCLE_DRAINING"
+ enableASGLifecycleDrainingDefault = true
enableSQSTerminationDrainingConfigKey = "ENABLE_SQS_TERMINATION_DRAINING"
enableSQSTerminationDrainingDefault = false
enableRebalanceMonitoringConfigKey = "ENABLE_REBALANCE_MONITORING"
@@ -131,6 +133,7 @@ type Config struct {
WebhookProxy string
EnableScheduledEventDraining bool
EnableSpotInterruptionDraining bool
+ EnableASGLifecycleDraining bool
EnableSQSTerminationDraining bool
EnableRebalanceMonitoring bool
EnableRebalanceDraining bool
@@ -193,6 +196,7 @@ func ParseCliArgs() (config Config, err error) {
flag.StringVar(&config.WebhookTemplateFile, "webhook-template-file", getEnv(webhookTemplateFileConfigKey, ""), "If specified, replaces the default webhook message template with content from template file.")
flag.BoolVar(&config.EnableScheduledEventDraining, "enable-scheduled-event-draining", getBoolEnv(enableScheduledEventDrainingConfigKey, enableScheduledEventDrainingDefault), "If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event")
flag.BoolVar(&config.EnableSpotInterruptionDraining, "enable-spot-interruption-draining", getBoolEnv(enableSpotInterruptionDrainingConfigKey, enableSpotInterruptionDrainingDefault), "If true, drain nodes when the spot interruption termination notice is received")
+ flag.BoolVar(&config.EnableASGLifecycleDraining, "enable-asg-lifecycle-draining", getBoolEnv(enableASGLifecycleDrainingConfigKey, enableASGLifecycleDrainingDefault), "If true, drain nodes when the ASG target lifecyle state is Terminated is received")
flag.BoolVar(&config.EnableSQSTerminationDraining, "enable-sqs-termination-draining", getBoolEnv(enableSQSTerminationDrainingConfigKey, enableSQSTerminationDrainingDefault), "If true, drain nodes when an SQS termination event is received")
flag.BoolVar(&config.EnableRebalanceMonitoring, "enable-rebalance-monitoring", getBoolEnv(enableRebalanceMonitoringConfigKey, enableRebalanceMonitoringDefault), "If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set \"enableRebalanceDraining\".")
flag.BoolVar(&config.EnableRebalanceDraining, "enable-rebalance-draining", getBoolEnv(enableRebalanceDrainingConfigKey, enableRebalanceDrainingDefault), "If true, drain nodes when the rebalance recommendation notice is received")
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index e1f7a1ba..4b051b12 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -36,6 +36,7 @@ func TestParseCliArgsEnvSuccess(t *testing.T) {
t.Setenv("DRY_RUN", "true")
t.Setenv("ENABLE_SCHEDULED_EVENT_DRAINING", "true")
t.Setenv("ENABLE_SPOT_INTERRUPTION_DRAINING", "false")
+ t.Setenv("ENABLE_ASG_LIFECYCLE_DRAINING", "false")
t.Setenv("ENABLE_SQS_TERMINATION_DRAINING", "false")
t.Setenv("ENABLE_REBALANCE_MONITORING", "true")
t.Setenv("ENABLE_REBALANCE_DRAINING", "true")
@@ -61,6 +62,7 @@ func TestParseCliArgsEnvSuccess(t *testing.T) {
h.Equals(t, true, nthConfig.DryRun)
h.Equals(t, true, nthConfig.EnableScheduledEventDraining)
h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining)
+ h.Equals(t, false, nthConfig.EnableASGLifecycleDraining)
h.Equals(t, false, nthConfig.EnableSQSTerminationDraining)
h.Equals(t, true, nthConfig.EnableRebalanceMonitoring)
h.Equals(t, true, nthConfig.EnableRebalanceDraining)
@@ -96,6 +98,7 @@ func TestParseCliArgsSuccess(t *testing.T) {
"--dry-run=true",
"--enable-scheduled-event-draining=true",
"--enable-spot-interruption-draining=false",
+ "--enable-asg-lifecycle-draining=false",
"--enable-sqs-termination-draining=false",
"--enable-rebalance-monitoring=true",
"--enable-rebalance-draining=true",
@@ -121,6 +124,7 @@ func TestParseCliArgsSuccess(t *testing.T) {
h.Equals(t, true, nthConfig.DryRun)
h.Equals(t, true, nthConfig.EnableScheduledEventDraining)
h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining)
+ h.Equals(t, false, nthConfig.EnableASGLifecycleDraining)
h.Equals(t, false, nthConfig.EnableSQSTerminationDraining)
h.Equals(t, true, nthConfig.EnableRebalanceMonitoring)
h.Equals(t, true, nthConfig.EnableRebalanceDraining)
@@ -151,6 +155,7 @@ func TestParseCliArgsOverrides(t *testing.T) {
t.Setenv("DRY_RUN", "false")
t.Setenv("ENABLE_SCHEDULED_EVENT_DRAINING", "false")
t.Setenv("ENABLE_SPOT_INTERRUPTION_DRAINING", "true")
+ t.Setenv("ENABLE_ASG_LIFECYCLE_DRAINING", "true")
t.Setenv("ENABLE_SQS_TERMINATION_DRAINING", "false")
t.Setenv("ENABLE_REBALANCE_MONITORING", "true")
t.Setenv("ENABLE_REBALANCE_DRAINING", "true")
@@ -174,6 +179,7 @@ func TestParseCliArgsOverrides(t *testing.T) {
"--dry-run=true",
"--enable-scheduled-event-draining=true",
"--enable-spot-interruption-draining=false",
+ "--enable-asg-lifecycle-draining=false",
"--enable-sqs-termination-draining=true",
"--enable-rebalance-monitoring=false",
"--enable-rebalance-draining=false",
@@ -201,6 +207,7 @@ func TestParseCliArgsOverrides(t *testing.T) {
h.Equals(t, true, nthConfig.DryRun)
h.Equals(t, true, nthConfig.EnableScheduledEventDraining)
h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining)
+ h.Equals(t, false, nthConfig.EnableASGLifecycleDraining)
h.Equals(t, true, nthConfig.EnableSQSTerminationDraining)
h.Equals(t, false, nthConfig.EnableRebalanceMonitoring)
h.Equals(t, false, nthConfig.EnableRebalanceDraining)
diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go
index f7f5ade8..7449d641 100644
--- a/pkg/ec2metadata/ec2metadata.go
+++ b/pkg/ec2metadata/ec2metadata.go
@@ -30,6 +30,8 @@ import (
const (
// SpotInstanceActionPath is the context path to spot/instance-action within IMDS
SpotInstanceActionPath = "/latest/meta-data/spot/instance-action"
+ // ASGTargetLifecycleStatePath path to autoscaling target lifecycle state within IMDS
+ ASGTargetLifecycleStatePath = "/latest/meta-data/autoscaling/target-lifecycle-state"
// ScheduledEventPath is the context path to events/maintenance/scheduled within IMDS
ScheduledEventPath = "/latest/meta-data/events/maintenance/scheduled"
// RebalanceRecommendationPath is the context path to events/recommendations/rebalance within IMDS
@@ -193,6 +195,28 @@ func (e *Service) GetRebalanceRecommendationEvent() (rebalanceRec *RebalanceReco
return rebalanceRec, nil
}
+// GetASGTargetLifecycleState retrieves ASG target lifecycle state from imds. State can be empty string
+// if the lifecycle hook is not present on the ASG
+func (e *Service) GetASGTargetLifecycleState() (state string, err error) {
+ resp, err := e.Request(ASGTargetLifecycleStatePath)
+ // 404s are normal when querying for the 'autoscaling/target-lifecycle-state' path and there is no lifecycle hook
+ if resp != nil && resp.StatusCode == 404 {
+ return "", nil
+ } else if resp != nil && (resp.StatusCode < 200 || resp.StatusCode >= 300) {
+ return "", fmt.Errorf("Metadata request received http status code: %d", resp.StatusCode)
+ }
+ if err != nil {
+ return "", fmt.Errorf("Unable to parse metadata response: %w", err)
+ }
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", fmt.Errorf("Unable to parse http response. Status code: %d. %w", resp.StatusCode, err)
+ }
+ return string(body), nil
+}
+
// GetMetadataInfo generic function for retrieving ec2 metadata
func (e *Service) GetMetadataInfo(path string) (info string, err error) {
metadataInfo := ""
diff --git a/pkg/ec2metadata/ec2metadata_test.go b/pkg/ec2metadata/ec2metadata_test.go
index 6fe58301..4a698c22 100644
--- a/pkg/ec2metadata/ec2metadata_test.go
+++ b/pkg/ec2metadata/ec2metadata_test.go
@@ -504,6 +504,91 @@ func TestGetRebalanceRecommendationEventRequestFailure(t *testing.T) {
h.Assert(t, err != nil, "error expected because no server should be running")
}
+func TestGetASGTargetLifecycleStateSuccess(t *testing.T) {
+ requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state"
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100")
+ if req.URL.String() == "/latest/api/token" {
+ rw.WriteHeader(200)
+ _, err := rw.Write([]byte(`token`))
+ h.Ok(t, err)
+ return
+ }
+ h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token")
+ h.Equals(t, req.URL.String(), requestPath)
+ _, err := rw.Write([]byte("InService"))
+ h.Ok(t, err)
+ }))
+ defer server.Close()
+
+ expectedState := "InService"
+
+ // Use URL from our local test server
+ imds := ec2metadata.New(server.URL, 1)
+
+ state, err := imds.GetASGTargetLifecycleState()
+ h.Ok(t, err)
+ h.Equals(t, expectedState, state)
+}
+
+func TestGetASGTargetLifecycleState404Success(t *testing.T) {
+ requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state"
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100")
+ if req.URL.String() == "/latest/api/token" {
+ rw.WriteHeader(200)
+ _, err := rw.Write([]byte(`token`))
+ h.Ok(t, err)
+ return
+ }
+ h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token")
+ h.Equals(t, req.URL.String(), requestPath)
+ rw.WriteHeader(404)
+ }))
+ defer server.Close()
+
+ // Use URL from our local test server
+ imds := ec2metadata.New(server.URL, 1)
+
+ state, err := imds.GetASGTargetLifecycleState()
+ h.Ok(t, err)
+ h.Assert(t, state == "", "ASG target lifecycle state should be empty")
+}
+
+func TestGetASGTargetLifecycleState500Failure(t *testing.T) {
+ requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state"
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100")
+ if req.URL.String() == "/latest/api/token" {
+ rw.WriteHeader(200)
+ _, err := rw.Write([]byte(`token`))
+ h.Ok(t, err)
+ return
+ }
+ h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token")
+ h.Equals(t, req.URL.String(), requestPath)
+ rw.WriteHeader(500)
+ }))
+ defer server.Close()
+
+ // Use URL from our local test server
+ imds := ec2metadata.New(server.URL, 1)
+
+ _, err := imds.GetASGTargetLifecycleState()
+ h.Assert(t, err != nil, "error expected on non-200 or non-404 status code")
+}
+
+func TestGetASGTargetLifecycleStateRequestFailure(t *testing.T) {
+ // Use URL from our local test server
+ imds := ec2metadata.New("/some-path-that-will-error", 1)
+
+ _, err := imds.GetASGTargetLifecycleState()
+ h.Assert(t, err != nil, "error expected because no server should be running")
+}
+
func TestGetMetadataServiceRequest404(t *testing.T) {
var requestPath string = "/latest/meta-data/instance-type"
diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go
new file mode 100644
index 00000000..53ed5b1e
--- /dev/null
+++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go
@@ -0,0 +1,102 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"). You may
+// not use this file except in compliance with the License. A copy of the
+// License is located at
+//
+// http://aws.amazon.com/apache2.0/
+//
+// or in the "license" file accompanying this file. This file is distributed
+// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+// express or implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package asglifecycle
+
+import (
+ "crypto/sha256"
+ "fmt"
+ "github.com/aws/aws-node-termination-handler/pkg/ec2metadata"
+ "github.com/aws/aws-node-termination-handler/pkg/monitor"
+ "github.com/aws/aws-node-termination-handler/pkg/node"
+ "time"
+)
+
+// ASGLifecycleMonitorKind is a const to define this monitor kind
+const ASGLifecycleMonitorKind = "ASG_LIFECYCLE_MONITOR"
+
+// ASGLifecycleMonitor is a struct definition which facilitates monitoring of ASG target lifecycle state from IMDS
+type ASGLifecycleMonitor struct {
+ IMDS *ec2metadata.Service
+ InterruptionChan chan<- monitor.InterruptionEvent
+ CancelChan chan<- monitor.InterruptionEvent
+ NodeName string
+}
+
+// NewASGLifecycleMonitor creates an instance of a ASG lifecycle IMDS monitor
+func NewASGLifecycleMonitor(imds *ec2metadata.Service, interruptionChan chan<- monitor.InterruptionEvent, cancelChan chan<- monitor.InterruptionEvent, nodeName string) ASGLifecycleMonitor {
+ return ASGLifecycleMonitor{
+ IMDS: imds,
+ InterruptionChan: interruptionChan,
+ CancelChan: cancelChan,
+ NodeName: nodeName,
+ }
+}
+
+// Monitor continuously monitors metadata for ASG target lifecycle state and sends interruption events to the passed in channel
+func (m ASGLifecycleMonitor) Monitor() error {
+ interruptionEvent, err := m.checkForASGTargetLifecycleStateNotice()
+ if err != nil {
+ return err
+ }
+ if interruptionEvent != nil && interruptionEvent.Kind == monitor.ASGLifecycleKind {
+ m.InterruptionChan <- *interruptionEvent
+ }
+ return nil
+}
+
+// Kind denotes the kind of monitor
+func (m ASGLifecycleMonitor) Kind() string {
+ return ASGLifecycleMonitorKind
+}
+
+// checkForSpotInterruptionNotice Checks EC2 instance metadata for a spot interruption termination notice
+func (m ASGLifecycleMonitor) checkForASGTargetLifecycleStateNotice() (*monitor.InterruptionEvent, error) {
+ state, err := m.IMDS.GetASGTargetLifecycleState()
+ if err != nil {
+ return nil, fmt.Errorf("There was a problem checking for ASG target lifecycle state: %w", err)
+ }
+ if state != "Terminated" {
+ // if the state is not "Terminated", we can skip. State can also be empty (no hook configured).
+ return nil, nil
+ }
+
+ nodeName := m.NodeName
+ // there is no time in the response, we just set time to the latest check
+ interruptionTime := time.Now()
+
+ // There's no EventID returned, so we'll create it using a hash to prevent duplicates.
+ hash := sha256.New()
+ if _, err = hash.Write([]byte(fmt.Sprintf("%s:%s", state, interruptionTime))); err != nil {
+ return nil, fmt.Errorf("There was a problem creating an event ID from the event: %w", err)
+ }
+
+ return &monitor.InterruptionEvent{
+ EventID: fmt.Sprintf("spot-itn-%x", hash.Sum(nil)),
+ Kind: monitor.ASGLifecycleKind,
+ Monitor: ASGLifecycleMonitorKind,
+ StartTime: interruptionTime,
+ NodeName: nodeName,
+ Description: "AST tareget lifecycle state received. Instance will be \n",
+ PreDrainTask: setInterruptionTaint,
+ }, nil
+}
+
+func setInterruptionTaint(interruptionEvent monitor.InterruptionEvent, n node.Node) error {
+ err := n.TaintASGLifecycleTermination(interruptionEvent.NodeName, interruptionEvent.EventID)
+ if err != nil {
+ return fmt.Errorf("Unable to taint node with taint %s:%s: %w", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID, err)
+ }
+
+ return nil
+}
diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go
new file mode 100644
index 00000000..8532c16a
--- /dev/null
+++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go
@@ -0,0 +1,99 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"). You may
+// not use this file except in compliance with the License. A copy of the
+// License is located at
+//
+// http://aws.amazon.com/apache2.0/
+//
+// or in the "license" file accompanying this file. This file is distributed
+// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+// express or implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package asglifecycle
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "github.com/rs/zerolog/log"
+
+ "github.com/aws/aws-node-termination-handler/pkg/config"
+ "github.com/aws/aws-node-termination-handler/pkg/monitor"
+ "github.com/aws/aws-node-termination-handler/pkg/node"
+ h "github.com/aws/aws-node-termination-handler/pkg/test"
+ "github.com/aws/aws-node-termination-handler/pkg/uptime"
+ v1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/client-go/kubernetes/fake"
+ "k8s.io/kubectl/pkg/drain"
+)
+
+const nodeName = "NAME"
+
+func getDrainHelper(client *fake.Clientset) *drain.Helper {
+ return &drain.Helper{
+ Client: client,
+ Force: true,
+ GracePeriodSeconds: -1,
+ IgnoreAllDaemonSets: true,
+ DeleteEmptyDirData: true,
+ Timeout: time.Duration(120) * time.Second,
+ Out: log.Logger,
+ ErrOut: log.Logger,
+ }
+}
+
+func TestSetInterruptionTaint(t *testing.T) {
+ drainEvent := monitor.InterruptionEvent{
+ EventID: "some-id-that-is-very-long-for-some-reason-and-is-definitely-over-63-characters",
+ }
+ nthConfig := config.Config{
+ DryRun: true,
+ NodeName: nodeName,
+ }
+
+ client := fake.NewSimpleClientset()
+ _, err := client.CoreV1().Nodes().Create(context.Background(), &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}, metav1.CreateOptions{})
+ h.Ok(t, err)
+
+ tNode, err := node.NewWithValues(nthConfig, getDrainHelper(client), uptime.Uptime)
+ h.Ok(t, err)
+
+ err = setInterruptionTaint(drainEvent, *tNode)
+
+ h.Ok(t, err)
+}
+
+func TestInterruptionTaintAlreadyPresent(t *testing.T) {
+ drainEvent := monitor.InterruptionEvent{
+ EventID: "some-id-that-is-very-long-for-some-reason-and-is-definitely-over-63-characters",
+ }
+ nthConfig := config.Config{
+ DryRun: false,
+ NodeName: nodeName,
+ }
+
+ client := fake.NewSimpleClientset()
+ newNode := &v1.Node{
+ ObjectMeta: metav1.ObjectMeta{Name: nodeName},
+ Spec: v1.NodeSpec{Taints: []v1.Taint{{
+ Key: node.RebalanceRecommendationTaint,
+ Value: drainEvent.EventID[:63],
+ Effect: v1.TaintEffectNoSchedule,
+ },
+ }},
+ }
+
+ _, err := client.CoreV1().Nodes().Create(context.Background(), newNode, metav1.CreateOptions{})
+ h.Ok(t, err)
+
+ tNode, err := node.NewWithValues(nthConfig, getDrainHelper(client), uptime.Uptime)
+ h.Ok(t, err)
+
+ err = setInterruptionTaint(drainEvent, *tNode)
+
+ h.Ok(t, err)
+}
diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go
new file mode 100644
index 00000000..cf6ab6ac
--- /dev/null
+++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go
@@ -0,0 +1,125 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"). You may
+// not use this file except in compliance with the License. A copy of the
+// License is located at
+//
+// http://aws.amazon.com/apache2.0/
+//
+// or in the "license" file accompanying this file. This file is distributed
+// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+// express or implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package asglifecycle_test
+
+import (
+ "github.com/aws/aws-node-termination-handler/pkg/monitor/asglifecycle"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/aws/aws-node-termination-handler/pkg/ec2metadata"
+ "github.com/aws/aws-node-termination-handler/pkg/monitor"
+ h "github.com/aws/aws-node-termination-handler/pkg/test"
+)
+
+const (
+ expFormattedTime = "2020-10-26 15:15:15 +0000 UTC"
+ imdsV2TokenPath = "/latest/api/token"
+ nodeName = "test-node"
+)
+
+var asgTargetLifecycleStateResponse = []byte("InService")
+
+func TestMonitor_Success(t *testing.T) {
+ requestPath := ec2metadata.ASGTargetLifecycleStatePath
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ if imdsV2TokenPath == req.URL.String() {
+ rw.WriteHeader(403)
+ return
+ }
+ h.Equals(t, req.URL.String(), requestPath)
+ _, err := rw.Write(asgTargetLifecycleStateResponse)
+ h.Ok(t, err)
+ }))
+ defer server.Close()
+
+ drainChan := make(chan monitor.InterruptionEvent)
+ cancelChan := make(chan monitor.InterruptionEvent)
+ imds := ec2metadata.New(server.URL, 1)
+
+ go func() {
+ result := <-drainChan
+ h.Equals(t, monitor.ASGLifecycleKind, result.Kind)
+ h.Equals(t, asglifecycle.ASGLifecycleMonitorKind, result.Monitor)
+ h.Equals(t, expFormattedTime, result.StartTime.String())
+ }()
+
+ asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName)
+ err := asgLifecycleMonitor.Monitor()
+ h.Ok(t, err)
+}
+
+func TestMonitor_MetadataParseFailure(t *testing.T) {
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ if imdsV2TokenPath == req.URL.String() {
+ rw.WriteHeader(403)
+ return
+ }
+ }))
+ defer server.Close()
+
+ drainChan := make(chan monitor.InterruptionEvent)
+ cancelChan := make(chan monitor.InterruptionEvent)
+ imds := ec2metadata.New(server.URL, 1)
+
+ asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName)
+ err := asgLifecycleMonitor.Monitor()
+ h.Ok(t, err)
+}
+
+func TestMonitor_404Response(t *testing.T) {
+ requestPath := ec2metadata.ASGTargetLifecycleStatePath
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ if imdsV2TokenPath == req.URL.String() {
+ rw.WriteHeader(403)
+ return
+ }
+ h.Equals(t, req.URL.String(), requestPath)
+ http.Error(rw, "error", http.StatusNotFound)
+ }))
+ defer server.Close()
+
+ drainChan := make(chan monitor.InterruptionEvent)
+ cancelChan := make(chan monitor.InterruptionEvent)
+ imds := ec2metadata.New(server.URL, 1)
+
+ asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName)
+ err := asgLifecycleMonitor.Monitor()
+ h.Ok(t, err)
+}
+
+func TestMonitor_500Response(t *testing.T) {
+ requestPath := ec2metadata.ASGTargetLifecycleStatePath
+
+ server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
+ if imdsV2TokenPath == req.URL.String() {
+ rw.WriteHeader(403)
+ return
+ }
+ h.Equals(t, req.URL.String(), requestPath)
+ http.Error(rw, "error", http.StatusInternalServerError)
+ }))
+ defer server.Close()
+
+ drainChan := make(chan monitor.InterruptionEvent)
+ cancelChan := make(chan monitor.InterruptionEvent)
+ imds := ec2metadata.New(server.URL, 1)
+
+ asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName)
+ err := asgLifecycleMonitor.Monitor()
+ h.Assert(t, err != nil, "Failed to return error when 500 response")
+}
From 2ff59b9eeb0bb1c16f4e6ed3b516b54ac45de514 Mon Sep 17 00:00:00 2001
From: pete911
Date: Tue, 23 May 2023 08:20:00 +0100
Subject: [PATCH 2/2] update comment on target lifecycle 404 response
---
pkg/ec2metadata/ec2metadata.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go
index 7449d641..f64d0cde 100644
--- a/pkg/ec2metadata/ec2metadata.go
+++ b/pkg/ec2metadata/ec2metadata.go
@@ -199,7 +199,7 @@ func (e *Service) GetRebalanceRecommendationEvent() (rebalanceRec *RebalanceReco
// if the lifecycle hook is not present on the ASG
func (e *Service) GetASGTargetLifecycleState() (state string, err error) {
resp, err := e.Request(ASGTargetLifecycleStatePath)
- // 404s are normal when querying for the 'autoscaling/target-lifecycle-state' path and there is no lifecycle hook
+ // 404s should not happen, but there can be a case if the instance is brand new and the field is not populated yet
if resp != nil && resp.StatusCode == 404 {
return "", nil
} else if resp != nil && (resp.StatusCode < 200 || resp.StatusCode >= 300) {