From fa9eaa3346993d0bad55af2fd6cf783130d804ff Mon Sep 17 00:00:00 2001 From: pete911 Date: Fri, 19 May 2023 20:54:11 +0100 Subject: [PATCH 1/2] add imds asg target lifecycle hook --- README.md | 2 +- cmd/node-termination-handler.go | 6 + .../templates/daemonset.linux.yaml | 2 + .../templates/daemonset.windows.yaml | 2 + .../aws-node-termination-handler/values.yaml | 3 + pkg/config/config.go | 4 + pkg/config/config_test.go | 7 + pkg/ec2metadata/ec2metadata.go | 24 ++++ pkg/ec2metadata/ec2metadata_test.go | 85 ++++++++++++ .../asglifecycle/asg-lifecycle-monitor.go | 102 ++++++++++++++ .../asg-lifecycle-monitor_internal_test.go | 99 ++++++++++++++ .../asg-lifecycle-monitor_test.go | 125 ++++++++++++++++++ 12 files changed, 460 insertions(+), 1 deletion(-) create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor.go create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go create mode 100644 pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go diff --git a/README.md b/README.md index 0c3437b2..6807611f 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,8 @@ Must be deployed as a Kubernetes **Deployment**. Also requires some **additional | Spot Instance Termination Notifications (ITN) | ✅ | ✅ | | Scheduled Events | ✅ | ✅ | | Instance Rebalance Recommendation | ✅ | ✅ | +| ASG Termination Lifecycle Hooks | ✅ | ✅ | | AZ Rebalance Recommendation | ❌ | ✅ | -| ASG Termination Lifecycle Hooks | ❌ | ✅ | | Instance State Change Events | ❌ | ✅ | diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 9f7dcaf1..97f33870 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -16,6 +16,7 @@ package main import ( "context" "fmt" + "github.com/aws/aws-node-termination-handler/pkg/monitor/asglifecycle" "os" "os/signal" "strings" @@ -50,6 +51,7 @@ import ( const ( scheduledMaintenance = "Scheduled Maintenance" spotITN = "Spot ITN" + asgLifecycle = "ASG Lifecycle" rebalanceRecommendation = "Rebalance Recommendation" sqsEvents = "SQS Event" timeFormat = "2006/01/02 15:04:05" @@ -172,6 +174,10 @@ func main() { imdsSpotMonitor := spotitn.NewSpotInterruptionMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) monitoringFns[spotITN] = imdsSpotMonitor } + if nthConfig.EnableASGLifecycleDraining { + asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) + monitoringFns[asgLifecycle] = asgLifecycleMonitor + } if nthConfig.EnableScheduledEventDraining { imdsScheduledEventMonitor := scheduledevent.NewScheduledEventMonitor(imds, interruptionChan, cancelChan, nthConfig.NodeName) monitoringFns[scheduledMaintenance] = imdsScheduledEventMonitor diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml index 95e4b50f..be6385de 100644 --- a/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml +++ b/config/helm/aws-node-termination-handler/templates/daemonset.linux.yaml @@ -143,6 +143,8 @@ spec: {{- end }} - name: ENABLE_SPOT_INTERRUPTION_DRAINING value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_ASG_LIFECYCLE_DRAINING + value: {{ .Values.enableASGLifecycleDraining | quote }} - name: ENABLE_SCHEDULED_EVENT_DRAINING value: {{ .Values.enableScheduledEventDraining | quote }} - name: ENABLE_REBALANCE_MONITORING diff --git a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml index 8a9db7bf..95af69d1 100644 --- a/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml +++ b/config/helm/aws-node-termination-handler/templates/daemonset.windows.yaml @@ -143,6 +143,8 @@ spec: {{- end }} - name: ENABLE_SPOT_INTERRUPTION_DRAINING value: {{ .Values.enableSpotInterruptionDraining | quote }} + - name: ENABLE_ASG_LIFECYCLE_DRAINING + value: {{ .Values.enableASGLifecycleDraining | quote }} - name: ENABLE_SCHEDULED_EVENT_DRAINING value: {{ .Values.enableScheduledEventDraining | quote }} - name: ENABLE_REBALANCE_MONITORING diff --git a/config/helm/aws-node-termination-handler/values.yaml b/config/helm/aws-node-termination-handler/values.yaml index 3a4d5db4..864c5f96 100644 --- a/config/helm/aws-node-termination-handler/values.yaml +++ b/config/helm/aws-node-termination-handler/values.yaml @@ -268,6 +268,9 @@ metadataTries: 3 # enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received. Only used in IMDS mode. enableSpotInterruptionDraining: true +# enableASGLifecycleDraining If false, do not drain nodes when ASG target lifecycle state Terminated is received. Only used in IMDS mode. +enableASGLifecycleDraining: true + # enableScheduledEventDraining If false, do not drain nodes before the maintenance window starts for an EC2 instance scheduled event. Only used in IMDS mode. enableScheduledEventDraining: true diff --git a/pkg/config/config.go b/pkg/config/config.go index 4eabaf00..3fd79c25 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -53,6 +53,8 @@ const ( enableScheduledEventDrainingDefault = true enableSpotInterruptionDrainingConfigKey = "ENABLE_SPOT_INTERRUPTION_DRAINING" enableSpotInterruptionDrainingDefault = true + enableASGLifecycleDrainingConfigKey = "ENABLE_ASG_LIFECYCLE_DRAINING" + enableASGLifecycleDrainingDefault = true enableSQSTerminationDrainingConfigKey = "ENABLE_SQS_TERMINATION_DRAINING" enableSQSTerminationDrainingDefault = false enableRebalanceMonitoringConfigKey = "ENABLE_REBALANCE_MONITORING" @@ -131,6 +133,7 @@ type Config struct { WebhookProxy string EnableScheduledEventDraining bool EnableSpotInterruptionDraining bool + EnableASGLifecycleDraining bool EnableSQSTerminationDraining bool EnableRebalanceMonitoring bool EnableRebalanceDraining bool @@ -193,6 +196,7 @@ func ParseCliArgs() (config Config, err error) { flag.StringVar(&config.WebhookTemplateFile, "webhook-template-file", getEnv(webhookTemplateFileConfigKey, ""), "If specified, replaces the default webhook message template with content from template file.") flag.BoolVar(&config.EnableScheduledEventDraining, "enable-scheduled-event-draining", getBoolEnv(enableScheduledEventDrainingConfigKey, enableScheduledEventDrainingDefault), "If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event") flag.BoolVar(&config.EnableSpotInterruptionDraining, "enable-spot-interruption-draining", getBoolEnv(enableSpotInterruptionDrainingConfigKey, enableSpotInterruptionDrainingDefault), "If true, drain nodes when the spot interruption termination notice is received") + flag.BoolVar(&config.EnableASGLifecycleDraining, "enable-asg-lifecycle-draining", getBoolEnv(enableASGLifecycleDrainingConfigKey, enableASGLifecycleDrainingDefault), "If true, drain nodes when the ASG target lifecyle state is Terminated is received") flag.BoolVar(&config.EnableSQSTerminationDraining, "enable-sqs-termination-draining", getBoolEnv(enableSQSTerminationDrainingConfigKey, enableSQSTerminationDrainingDefault), "If true, drain nodes when an SQS termination event is received") flag.BoolVar(&config.EnableRebalanceMonitoring, "enable-rebalance-monitoring", getBoolEnv(enableRebalanceMonitoringConfigKey, enableRebalanceMonitoringDefault), "If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set \"enableRebalanceDraining\".") flag.BoolVar(&config.EnableRebalanceDraining, "enable-rebalance-draining", getBoolEnv(enableRebalanceDrainingConfigKey, enableRebalanceDrainingDefault), "If true, drain nodes when the rebalance recommendation notice is received") diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index e1f7a1ba..4b051b12 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -36,6 +36,7 @@ func TestParseCliArgsEnvSuccess(t *testing.T) { t.Setenv("DRY_RUN", "true") t.Setenv("ENABLE_SCHEDULED_EVENT_DRAINING", "true") t.Setenv("ENABLE_SPOT_INTERRUPTION_DRAINING", "false") + t.Setenv("ENABLE_ASG_LIFECYCLE_DRAINING", "false") t.Setenv("ENABLE_SQS_TERMINATION_DRAINING", "false") t.Setenv("ENABLE_REBALANCE_MONITORING", "true") t.Setenv("ENABLE_REBALANCE_DRAINING", "true") @@ -61,6 +62,7 @@ func TestParseCliArgsEnvSuccess(t *testing.T) { h.Equals(t, true, nthConfig.DryRun) h.Equals(t, true, nthConfig.EnableScheduledEventDraining) h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining) + h.Equals(t, false, nthConfig.EnableASGLifecycleDraining) h.Equals(t, false, nthConfig.EnableSQSTerminationDraining) h.Equals(t, true, nthConfig.EnableRebalanceMonitoring) h.Equals(t, true, nthConfig.EnableRebalanceDraining) @@ -96,6 +98,7 @@ func TestParseCliArgsSuccess(t *testing.T) { "--dry-run=true", "--enable-scheduled-event-draining=true", "--enable-spot-interruption-draining=false", + "--enable-asg-lifecycle-draining=false", "--enable-sqs-termination-draining=false", "--enable-rebalance-monitoring=true", "--enable-rebalance-draining=true", @@ -121,6 +124,7 @@ func TestParseCliArgsSuccess(t *testing.T) { h.Equals(t, true, nthConfig.DryRun) h.Equals(t, true, nthConfig.EnableScheduledEventDraining) h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining) + h.Equals(t, false, nthConfig.EnableASGLifecycleDraining) h.Equals(t, false, nthConfig.EnableSQSTerminationDraining) h.Equals(t, true, nthConfig.EnableRebalanceMonitoring) h.Equals(t, true, nthConfig.EnableRebalanceDraining) @@ -151,6 +155,7 @@ func TestParseCliArgsOverrides(t *testing.T) { t.Setenv("DRY_RUN", "false") t.Setenv("ENABLE_SCHEDULED_EVENT_DRAINING", "false") t.Setenv("ENABLE_SPOT_INTERRUPTION_DRAINING", "true") + t.Setenv("ENABLE_ASG_LIFECYCLE_DRAINING", "true") t.Setenv("ENABLE_SQS_TERMINATION_DRAINING", "false") t.Setenv("ENABLE_REBALANCE_MONITORING", "true") t.Setenv("ENABLE_REBALANCE_DRAINING", "true") @@ -174,6 +179,7 @@ func TestParseCliArgsOverrides(t *testing.T) { "--dry-run=true", "--enable-scheduled-event-draining=true", "--enable-spot-interruption-draining=false", + "--enable-asg-lifecycle-draining=false", "--enable-sqs-termination-draining=true", "--enable-rebalance-monitoring=false", "--enable-rebalance-draining=false", @@ -201,6 +207,7 @@ func TestParseCliArgsOverrides(t *testing.T) { h.Equals(t, true, nthConfig.DryRun) h.Equals(t, true, nthConfig.EnableScheduledEventDraining) h.Equals(t, false, nthConfig.EnableSpotInterruptionDraining) + h.Equals(t, false, nthConfig.EnableASGLifecycleDraining) h.Equals(t, true, nthConfig.EnableSQSTerminationDraining) h.Equals(t, false, nthConfig.EnableRebalanceMonitoring) h.Equals(t, false, nthConfig.EnableRebalanceDraining) diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go index f7f5ade8..7449d641 100644 --- a/pkg/ec2metadata/ec2metadata.go +++ b/pkg/ec2metadata/ec2metadata.go @@ -30,6 +30,8 @@ import ( const ( // SpotInstanceActionPath is the context path to spot/instance-action within IMDS SpotInstanceActionPath = "/latest/meta-data/spot/instance-action" + // ASGTargetLifecycleStatePath path to autoscaling target lifecycle state within IMDS + ASGTargetLifecycleStatePath = "/latest/meta-data/autoscaling/target-lifecycle-state" // ScheduledEventPath is the context path to events/maintenance/scheduled within IMDS ScheduledEventPath = "/latest/meta-data/events/maintenance/scheduled" // RebalanceRecommendationPath is the context path to events/recommendations/rebalance within IMDS @@ -193,6 +195,28 @@ func (e *Service) GetRebalanceRecommendationEvent() (rebalanceRec *RebalanceReco return rebalanceRec, nil } +// GetASGTargetLifecycleState retrieves ASG target lifecycle state from imds. State can be empty string +// if the lifecycle hook is not present on the ASG +func (e *Service) GetASGTargetLifecycleState() (state string, err error) { + resp, err := e.Request(ASGTargetLifecycleStatePath) + // 404s are normal when querying for the 'autoscaling/target-lifecycle-state' path and there is no lifecycle hook + if resp != nil && resp.StatusCode == 404 { + return "", nil + } else if resp != nil && (resp.StatusCode < 200 || resp.StatusCode >= 300) { + return "", fmt.Errorf("Metadata request received http status code: %d", resp.StatusCode) + } + if err != nil { + return "", fmt.Errorf("Unable to parse metadata response: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("Unable to parse http response. Status code: %d. %w", resp.StatusCode, err) + } + return string(body), nil +} + // GetMetadataInfo generic function for retrieving ec2 metadata func (e *Service) GetMetadataInfo(path string) (info string, err error) { metadataInfo := "" diff --git a/pkg/ec2metadata/ec2metadata_test.go b/pkg/ec2metadata/ec2metadata_test.go index 6fe58301..4a698c22 100644 --- a/pkg/ec2metadata/ec2metadata_test.go +++ b/pkg/ec2metadata/ec2metadata_test.go @@ -504,6 +504,91 @@ func TestGetRebalanceRecommendationEventRequestFailure(t *testing.T) { h.Assert(t, err != nil, "error expected because no server should be running") } +func TestGetASGTargetLifecycleStateSuccess(t *testing.T) { + requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state" + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100") + if req.URL.String() == "/latest/api/token" { + rw.WriteHeader(200) + _, err := rw.Write([]byte(`token`)) + h.Ok(t, err) + return + } + h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token") + h.Equals(t, req.URL.String(), requestPath) + _, err := rw.Write([]byte("InService")) + h.Ok(t, err) + })) + defer server.Close() + + expectedState := "InService" + + // Use URL from our local test server + imds := ec2metadata.New(server.URL, 1) + + state, err := imds.GetASGTargetLifecycleState() + h.Ok(t, err) + h.Equals(t, expectedState, state) +} + +func TestGetASGTargetLifecycleState404Success(t *testing.T) { + requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state" + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100") + if req.URL.String() == "/latest/api/token" { + rw.WriteHeader(200) + _, err := rw.Write([]byte(`token`)) + h.Ok(t, err) + return + } + h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token") + h.Equals(t, req.URL.String(), requestPath) + rw.WriteHeader(404) + })) + defer server.Close() + + // Use URL from our local test server + imds := ec2metadata.New(server.URL, 1) + + state, err := imds.GetASGTargetLifecycleState() + h.Ok(t, err) + h.Assert(t, state == "", "ASG target lifecycle state should be empty") +} + +func TestGetASGTargetLifecycleState500Failure(t *testing.T) { + requestPath := "/latest/meta-data/autoscaling/target-lifecycle-state" + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Header().Add("X-aws-ec2-metadata-token-ttl-seconds", "100") + if req.URL.String() == "/latest/api/token" { + rw.WriteHeader(200) + _, err := rw.Write([]byte(`token`)) + h.Ok(t, err) + return + } + h.Equals(t, req.Header.Get("X-aws-ec2-metadata-token"), "token") + h.Equals(t, req.URL.String(), requestPath) + rw.WriteHeader(500) + })) + defer server.Close() + + // Use URL from our local test server + imds := ec2metadata.New(server.URL, 1) + + _, err := imds.GetASGTargetLifecycleState() + h.Assert(t, err != nil, "error expected on non-200 or non-404 status code") +} + +func TestGetASGTargetLifecycleStateRequestFailure(t *testing.T) { + // Use URL from our local test server + imds := ec2metadata.New("/some-path-that-will-error", 1) + + _, err := imds.GetASGTargetLifecycleState() + h.Assert(t, err != nil, "error expected because no server should be running") +} + func TestGetMetadataServiceRequest404(t *testing.T) { var requestPath string = "/latest/meta-data/instance-type" diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go new file mode 100644 index 00000000..53ed5b1e --- /dev/null +++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor.go @@ -0,0 +1,102 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package asglifecycle + +import ( + "crypto/sha256" + "fmt" + "github.com/aws/aws-node-termination-handler/pkg/ec2metadata" + "github.com/aws/aws-node-termination-handler/pkg/monitor" + "github.com/aws/aws-node-termination-handler/pkg/node" + "time" +) + +// ASGLifecycleMonitorKind is a const to define this monitor kind +const ASGLifecycleMonitorKind = "ASG_LIFECYCLE_MONITOR" + +// ASGLifecycleMonitor is a struct definition which facilitates monitoring of ASG target lifecycle state from IMDS +type ASGLifecycleMonitor struct { + IMDS *ec2metadata.Service + InterruptionChan chan<- monitor.InterruptionEvent + CancelChan chan<- monitor.InterruptionEvent + NodeName string +} + +// NewASGLifecycleMonitor creates an instance of a ASG lifecycle IMDS monitor +func NewASGLifecycleMonitor(imds *ec2metadata.Service, interruptionChan chan<- monitor.InterruptionEvent, cancelChan chan<- monitor.InterruptionEvent, nodeName string) ASGLifecycleMonitor { + return ASGLifecycleMonitor{ + IMDS: imds, + InterruptionChan: interruptionChan, + CancelChan: cancelChan, + NodeName: nodeName, + } +} + +// Monitor continuously monitors metadata for ASG target lifecycle state and sends interruption events to the passed in channel +func (m ASGLifecycleMonitor) Monitor() error { + interruptionEvent, err := m.checkForASGTargetLifecycleStateNotice() + if err != nil { + return err + } + if interruptionEvent != nil && interruptionEvent.Kind == monitor.ASGLifecycleKind { + m.InterruptionChan <- *interruptionEvent + } + return nil +} + +// Kind denotes the kind of monitor +func (m ASGLifecycleMonitor) Kind() string { + return ASGLifecycleMonitorKind +} + +// checkForSpotInterruptionNotice Checks EC2 instance metadata for a spot interruption termination notice +func (m ASGLifecycleMonitor) checkForASGTargetLifecycleStateNotice() (*monitor.InterruptionEvent, error) { + state, err := m.IMDS.GetASGTargetLifecycleState() + if err != nil { + return nil, fmt.Errorf("There was a problem checking for ASG target lifecycle state: %w", err) + } + if state != "Terminated" { + // if the state is not "Terminated", we can skip. State can also be empty (no hook configured). + return nil, nil + } + + nodeName := m.NodeName + // there is no time in the response, we just set time to the latest check + interruptionTime := time.Now() + + // There's no EventID returned, so we'll create it using a hash to prevent duplicates. + hash := sha256.New() + if _, err = hash.Write([]byte(fmt.Sprintf("%s:%s", state, interruptionTime))); err != nil { + return nil, fmt.Errorf("There was a problem creating an event ID from the event: %w", err) + } + + return &monitor.InterruptionEvent{ + EventID: fmt.Sprintf("spot-itn-%x", hash.Sum(nil)), + Kind: monitor.ASGLifecycleKind, + Monitor: ASGLifecycleMonitorKind, + StartTime: interruptionTime, + NodeName: nodeName, + Description: "AST tareget lifecycle state received. Instance will be \n", + PreDrainTask: setInterruptionTaint, + }, nil +} + +func setInterruptionTaint(interruptionEvent monitor.InterruptionEvent, n node.Node) error { + err := n.TaintASGLifecycleTermination(interruptionEvent.NodeName, interruptionEvent.EventID) + if err != nil { + return fmt.Errorf("Unable to taint node with taint %s:%s: %w", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID, err) + } + + return nil +} diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go new file mode 100644 index 00000000..8532c16a --- /dev/null +++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_internal_test.go @@ -0,0 +1,99 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package asglifecycle + +import ( + "context" + "testing" + "time" + + "github.com/rs/zerolog/log" + + "github.com/aws/aws-node-termination-handler/pkg/config" + "github.com/aws/aws-node-termination-handler/pkg/monitor" + "github.com/aws/aws-node-termination-handler/pkg/node" + h "github.com/aws/aws-node-termination-handler/pkg/test" + "github.com/aws/aws-node-termination-handler/pkg/uptime" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/kubectl/pkg/drain" +) + +const nodeName = "NAME" + +func getDrainHelper(client *fake.Clientset) *drain.Helper { + return &drain.Helper{ + Client: client, + Force: true, + GracePeriodSeconds: -1, + IgnoreAllDaemonSets: true, + DeleteEmptyDirData: true, + Timeout: time.Duration(120) * time.Second, + Out: log.Logger, + ErrOut: log.Logger, + } +} + +func TestSetInterruptionTaint(t *testing.T) { + drainEvent := monitor.InterruptionEvent{ + EventID: "some-id-that-is-very-long-for-some-reason-and-is-definitely-over-63-characters", + } + nthConfig := config.Config{ + DryRun: true, + NodeName: nodeName, + } + + client := fake.NewSimpleClientset() + _, err := client.CoreV1().Nodes().Create(context.Background(), &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}, metav1.CreateOptions{}) + h.Ok(t, err) + + tNode, err := node.NewWithValues(nthConfig, getDrainHelper(client), uptime.Uptime) + h.Ok(t, err) + + err = setInterruptionTaint(drainEvent, *tNode) + + h.Ok(t, err) +} + +func TestInterruptionTaintAlreadyPresent(t *testing.T) { + drainEvent := monitor.InterruptionEvent{ + EventID: "some-id-that-is-very-long-for-some-reason-and-is-definitely-over-63-characters", + } + nthConfig := config.Config{ + DryRun: false, + NodeName: nodeName, + } + + client := fake.NewSimpleClientset() + newNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: nodeName}, + Spec: v1.NodeSpec{Taints: []v1.Taint{{ + Key: node.RebalanceRecommendationTaint, + Value: drainEvent.EventID[:63], + Effect: v1.TaintEffectNoSchedule, + }, + }}, + } + + _, err := client.CoreV1().Nodes().Create(context.Background(), newNode, metav1.CreateOptions{}) + h.Ok(t, err) + + tNode, err := node.NewWithValues(nthConfig, getDrainHelper(client), uptime.Uptime) + h.Ok(t, err) + + err = setInterruptionTaint(drainEvent, *tNode) + + h.Ok(t, err) +} diff --git a/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go new file mode 100644 index 00000000..cf6ab6ac --- /dev/null +++ b/pkg/monitor/asglifecycle/asg-lifecycle-monitor_test.go @@ -0,0 +1,125 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package asglifecycle_test + +import ( + "github.com/aws/aws-node-termination-handler/pkg/monitor/asglifecycle" + "net/http" + "net/http/httptest" + "testing" + + "github.com/aws/aws-node-termination-handler/pkg/ec2metadata" + "github.com/aws/aws-node-termination-handler/pkg/monitor" + h "github.com/aws/aws-node-termination-handler/pkg/test" +) + +const ( + expFormattedTime = "2020-10-26 15:15:15 +0000 UTC" + imdsV2TokenPath = "/latest/api/token" + nodeName = "test-node" +) + +var asgTargetLifecycleStateResponse = []byte("InService") + +func TestMonitor_Success(t *testing.T) { + requestPath := ec2metadata.ASGTargetLifecycleStatePath + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + if imdsV2TokenPath == req.URL.String() { + rw.WriteHeader(403) + return + } + h.Equals(t, req.URL.String(), requestPath) + _, err := rw.Write(asgTargetLifecycleStateResponse) + h.Ok(t, err) + })) + defer server.Close() + + drainChan := make(chan monitor.InterruptionEvent) + cancelChan := make(chan monitor.InterruptionEvent) + imds := ec2metadata.New(server.URL, 1) + + go func() { + result := <-drainChan + h.Equals(t, monitor.ASGLifecycleKind, result.Kind) + h.Equals(t, asglifecycle.ASGLifecycleMonitorKind, result.Monitor) + h.Equals(t, expFormattedTime, result.StartTime.String()) + }() + + asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName) + err := asgLifecycleMonitor.Monitor() + h.Ok(t, err) +} + +func TestMonitor_MetadataParseFailure(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + if imdsV2TokenPath == req.URL.String() { + rw.WriteHeader(403) + return + } + })) + defer server.Close() + + drainChan := make(chan monitor.InterruptionEvent) + cancelChan := make(chan monitor.InterruptionEvent) + imds := ec2metadata.New(server.URL, 1) + + asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName) + err := asgLifecycleMonitor.Monitor() + h.Ok(t, err) +} + +func TestMonitor_404Response(t *testing.T) { + requestPath := ec2metadata.ASGTargetLifecycleStatePath + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + if imdsV2TokenPath == req.URL.String() { + rw.WriteHeader(403) + return + } + h.Equals(t, req.URL.String(), requestPath) + http.Error(rw, "error", http.StatusNotFound) + })) + defer server.Close() + + drainChan := make(chan monitor.InterruptionEvent) + cancelChan := make(chan monitor.InterruptionEvent) + imds := ec2metadata.New(server.URL, 1) + + asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName) + err := asgLifecycleMonitor.Monitor() + h.Ok(t, err) +} + +func TestMonitor_500Response(t *testing.T) { + requestPath := ec2metadata.ASGTargetLifecycleStatePath + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + if imdsV2TokenPath == req.URL.String() { + rw.WriteHeader(403) + return + } + h.Equals(t, req.URL.String(), requestPath) + http.Error(rw, "error", http.StatusInternalServerError) + })) + defer server.Close() + + drainChan := make(chan monitor.InterruptionEvent) + cancelChan := make(chan monitor.InterruptionEvent) + imds := ec2metadata.New(server.URL, 1) + + asgLifecycleMonitor := asglifecycle.NewASGLifecycleMonitor(imds, drainChan, cancelChan, nodeName) + err := asgLifecycleMonitor.Monitor() + h.Assert(t, err != nil, "Failed to return error when 500 response") +} From 2ff59b9eeb0bb1c16f4e6ed3b516b54ac45de514 Mon Sep 17 00:00:00 2001 From: pete911 Date: Tue, 23 May 2023 08:20:00 +0100 Subject: [PATCH 2/2] update comment on target lifecycle 404 response --- pkg/ec2metadata/ec2metadata.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ec2metadata/ec2metadata.go b/pkg/ec2metadata/ec2metadata.go index 7449d641..f64d0cde 100644 --- a/pkg/ec2metadata/ec2metadata.go +++ b/pkg/ec2metadata/ec2metadata.go @@ -199,7 +199,7 @@ func (e *Service) GetRebalanceRecommendationEvent() (rebalanceRec *RebalanceReco // if the lifecycle hook is not present on the ASG func (e *Service) GetASGTargetLifecycleState() (state string, err error) { resp, err := e.Request(ASGTargetLifecycleStatePath) - // 404s are normal when querying for the 'autoscaling/target-lifecycle-state' path and there is no lifecycle hook + // 404s should not happen, but there can be a case if the instance is brand new and the field is not populated yet if resp != nil && resp.StatusCode == 404 { return "", nil } else if resp != nil && (resp.StatusCode < 200 || resp.StatusCode >= 300) {