Skip to content

Commit 3f3ce56

Browse files
authored
Merge pull request #5039 from mtulio/OCPBUGS-36293-fix-byoip-eip
🐛 ec2/byoip: fix EIP leak when creating machine
2 parents 686a3f5 + 4626a6a commit 3f3ce56

File tree

8 files changed

+123
-57
lines changed

8 files changed

+123
-57
lines changed

controllers/awsmachine_controller.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -533,17 +533,20 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
533533
}
534534

535535
// BYO Public IPv4 Pool feature: allocates and associates an EIP to machine when PublicIP and
536-
// cluster-wide Public IPv4 Pool configuration are set. The EIP must be associated after the instance
537-
// is created and transictioned from Pending state.
538-
// In the regular flow, if the instance have already a public IPv4 address (EIP) associated it will
539-
// be released when a new is assigned, the createInstance() prevents that behavior by enforcing
540-
// to not launch an instance with EIP, allowing ReconcileElasticIPFromPublicPool assigning
541-
// a BYOIP without duplication.
536+
// cluster-wide config Public IPv4 Pool configuration are set. The custom EIP is associated
537+
// after the instance is created and transictioned to Running state.
538+
// The CreateInstance() is enforcing to not assign public IP address when PublicIP is set with
539+
// BYOIpv4 Pool, preventing a duplicated EIP creation.
542540
if pool := machineScope.GetElasticIPPool(); pool != nil {
543-
if err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance); err != nil {
544-
machineScope.Error(err, "failed to associate elastic IP address")
541+
requeue, err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance)
542+
if err != nil {
543+
machineScope.Error(err, "Failed to reconcile BYO Public IPv4")
545544
return ctrl.Result{}, err
546545
}
546+
if requeue {
547+
machineScope.Debug("Found instance in pending state while reconciling publicIpv4Pool, requeue", "instance", instance.ID)
548+
return ctrl.Result{RequeueAfter: DefaultReconcilerRequeue}, nil
549+
}
547550
}
548551

549552
if feature.Gates.Enabled(feature.EventBridgeInstanceState) {

controllers/awsmachine_controller_unit_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,7 @@ func TestAWSMachineReconciler(t *testing.T) {
10231023
PublicIpv4Pool: aws.String("ipv4pool-ec2-0123456789abcdef0"),
10241024
PublicIpv4PoolFallBackOrder: ptr.To(infrav1.PublicIpv4PoolFallbackOrderAmazonPool),
10251025
}
1026-
ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
1026+
ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(false, nil).AnyTimes()
10271027

10281028
_, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs, cs)
10291029
g.Expect(err).To(BeNil())

pkg/cloud/services/ec2/eip.go

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
"github.com/aws/aws-sdk-go/aws"
88
"github.com/aws/aws-sdk-go/service/ec2"
9+
"k8s.io/utils/ptr"
910

1011
infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
1112
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
@@ -16,17 +17,51 @@ func getElasticIPRoleName(instanceID string) string {
1617
}
1718

1819
// ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool.
19-
func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error {
20-
// TODO: check if the instance is in the state allowing EIP association.
21-
// Expected instance states: pending or running
22-
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
20+
func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error) {
21+
shouldRequeue := true
22+
// Should not happen
23+
if pool == nil {
24+
return shouldRequeue, fmt.Errorf("unexpected behavior, pool must be set when reconcile ElasticIPPool")
25+
}
26+
iip := ptr.Deref(instance.PublicIP, "")
27+
s.scope.Debug("Reconciling machine with custom Public IPv4 Pool", "instance-id", instance.ID, "instance-state", instance.State, "instance-public-ip", iip, "publicIpv4PoolID", pool.PublicIpv4Pool)
28+
29+
// Requeue when the instance is not ready to be associated.
30+
if instance.State != infrav1.InstanceStateRunning {
31+
s.scope.Debug("Unable to reconcile Elastic IP Pool for instance", "instance-id", instance.ID, "instance-state", instance.State)
32+
return shouldRequeue, nil
33+
}
34+
35+
// All done, must reconcile only when the instance is in running state.
36+
shouldRequeue = false
37+
38+
// Prevent running association every reconciliation when it is already done.
39+
addrs, err := s.netService.GetAddresses(getElasticIPRoleName(instance.ID))
40+
if err != nil {
41+
s.scope.Error(err, "error checking if addresses exists for Elastic IP Pool to machine", "eip-role", getElasticIPRoleName(instance.ID))
42+
return shouldRequeue, err
43+
}
44+
if len(addrs.Addresses) > 0 {
45+
if len(addrs.Addresses) != 1 {
46+
return shouldRequeue, fmt.Errorf("unexpected number of EIPs allocated to the role. expected 1, got %d", len(addrs.Addresses))
47+
}
48+
addr := addrs.Addresses[0]
49+
// address is already associated.
50+
if addr.AssociationId != nil && addr.InstanceId != nil && *addr.InstanceId == instance.ID {
51+
s.scope.Debug("Machine is already associated with an Elastic IP with custom Public IPv4 pool", "eip", addr.AllocationId, "eip-address", addr.PublicIp, "eip-associationID", addr.AssociationId, "eip-instance", addr.InstanceId)
52+
return shouldRequeue, nil
53+
}
54+
}
55+
56+
// Get existing, or allocate an EIP, then Associate to the machine.
57+
// Should requeue if any error is returned in the process.
2358
if err := s.getAndAssociateAddressesToInstance(pool, getElasticIPRoleName(instance.ID), instance.ID); err != nil {
24-
return fmt.Errorf("failed to reconcile Elastic IP: %w", err)
59+
return true, fmt.Errorf("failed to reconcile Elastic IP: %w", err)
2560
}
26-
return nil
61+
return shouldRequeue, nil
2762
}
2863

29-
// ReleaseElasticIP releases a specific elastic IP based on the instance role.
64+
// ReleaseElasticIP releases a specific Elastic IP based on the instance role.
3065
func (s *Service) ReleaseElasticIP(instanceID string) error {
3166
return s.netService.ReleaseAddressByRole(getElasticIPRoleName(instanceID))
3267
}

pkg/cloud/services/ec2/instances.go

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -572,21 +572,13 @@ func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instan
572572

573573
input.NetworkInterfaces = netInterfaces
574574
} else {
575-
if ptr.Deref(i.PublicIPOnLaunch, false) {
576-
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
577-
{
578-
DeviceIndex: aws.Int64(0),
579-
SubnetId: aws.String(i.SubnetID),
580-
Groups: aws.StringSlice(i.SecurityGroupIDs),
581-
AssociatePublicIpAddress: i.PublicIPOnLaunch,
582-
},
583-
}
584-
} else {
585-
input.SubnetId = aws.String(i.SubnetID)
586-
587-
if len(i.SecurityGroupIDs) > 0 {
588-
input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs)
589-
}
575+
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
576+
{
577+
DeviceIndex: aws.Int64(0),
578+
SubnetId: aws.String(i.SubnetID),
579+
Groups: aws.StringSlice(i.SecurityGroupIDs),
580+
AssociatePublicIpAddress: i.PublicIPOnLaunch,
581+
},
590582
}
591583
}
592584

pkg/cloud/services/ec2/instances_test.go

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -672,11 +672,16 @@ func TestCreateInstance(t *testing.T) {
672672
}, nil)
673673
m.
674674
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
675-
ImageId: aws.String("abc"),
676-
InstanceType: aws.String("m5.2xlarge"),
677-
KeyName: aws.String("default"),
678-
SecurityGroupIds: aws.StringSlice([]string{"2", "3"}),
679-
SubnetId: aws.String("subnet-3"),
675+
ImageId: aws.String("abc"),
676+
InstanceType: aws.String("m5.2xlarge"),
677+
KeyName: aws.String("default"),
678+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
679+
{
680+
DeviceIndex: aws.Int64(0),
681+
SubnetId: aws.String("subnet-3"),
682+
Groups: aws.StringSlice([]string{"2", "3"}),
683+
},
684+
},
680685
TagSpecifications: []*ec2.TagSpecification{
681686
{
682687
ResourceType: aws.String("instance"),
@@ -920,11 +925,16 @@ func TestCreateInstance(t *testing.T) {
920925
}, nil)
921926
m.
922927
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
923-
ImageId: aws.String("abc"),
924-
InstanceType: aws.String("m5.2xlarge"),
925-
KeyName: aws.String("default"),
926-
SecurityGroupIds: aws.StringSlice([]string{"4", "3"}),
927-
SubnetId: aws.String("subnet-5"),
928+
ImageId: aws.String("abc"),
929+
InstanceType: aws.String("m5.2xlarge"),
930+
KeyName: aws.String("default"),
931+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
932+
{
933+
DeviceIndex: aws.Int64(0),
934+
SubnetId: aws.String("subnet-5"),
935+
Groups: aws.StringSlice([]string{"4", "3"}),
936+
},
937+
},
928938
TagSpecifications: []*ec2.TagSpecification{
929939
{
930940
ResourceType: aws.String("instance"),
@@ -3346,8 +3356,13 @@ func TestCreateInstance(t *testing.T) {
33463356
Placement: &ec2.Placement{
33473357
Tenancy: &tenancy,
33483358
},
3349-
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
3350-
SubnetId: aws.String("subnet-1"),
3359+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
3360+
{
3361+
DeviceIndex: aws.Int64(0),
3362+
SubnetId: aws.String("subnet-1"),
3363+
Groups: []*string{aws.String("2"), aws.String("3")},
3364+
},
3365+
},
33513366
TagSpecifications: []*ec2.TagSpecification{
33523367
{
33533368
ResourceType: aws.String("instance"),
@@ -3555,8 +3570,13 @@ func TestCreateInstance(t *testing.T) {
35553570
Placement: &ec2.Placement{
35563571
GroupName: aws.String("placement-group1"),
35573572
},
3558-
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
3559-
SubnetId: aws.String("subnet-1"),
3573+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
3574+
{
3575+
DeviceIndex: aws.Int64(0),
3576+
SubnetId: aws.String("subnet-1"),
3577+
Groups: []*string{aws.String("2"), aws.String("3")},
3578+
},
3579+
},
35603580
TagSpecifications: []*ec2.TagSpecification{
35613581
{
35623582
ResourceType: aws.String("instance"),
@@ -3785,8 +3805,13 @@ func TestCreateInstance(t *testing.T) {
37853805
Tenancy: &tenancy,
37863806
GroupName: aws.String("placement-group1"),
37873807
},
3788-
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
3789-
SubnetId: aws.String("subnet-1"),
3808+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
3809+
{
3810+
DeviceIndex: aws.Int64(0),
3811+
SubnetId: aws.String("subnet-1"),
3812+
Groups: []*string{aws.String("2"), aws.String("3")},
3813+
},
3814+
},
37903815
TagSpecifications: []*ec2.TagSpecification{
37913816
{
37923817
ResourceType: aws.String("instance"),
@@ -3976,8 +4001,13 @@ func TestCreateInstance(t *testing.T) {
39764001
GroupName: aws.String("placement-group1"),
39774002
PartitionNumber: aws.Int64(2),
39784003
},
3979-
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
3980-
SubnetId: aws.String("subnet-1"),
4004+
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
4005+
{
4006+
DeviceIndex: aws.Int64(0),
4007+
SubnetId: aws.String("subnet-1"),
4008+
Groups: aws.StringSlice([]string{"2", "3"}),
4009+
},
4010+
},
39814011
TagSpecifications: []*ec2.TagSpecification{
39824012
{
39834013
ResourceType: aws.String("instance"),

pkg/cloud/services/interfaces.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ type EC2Interface interface {
8282
DeleteBastion() error
8383
ReconcileBastion() error
8484
// ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool.
85-
ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error
85+
ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error)
8686

8787
// ReleaseElasticIP reconciles the elastic IP from a custom Public IPv4 Pool.
8888
ReleaseElasticIP(instanceID string) error

pkg/cloud/services/mock_services/ec2_interface_mock.go

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/cloud/services/network/eips.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,12 +191,17 @@ func (s *Service) GetOrAllocateAddresses(pool *infrav1.ElasticIPPool, num int, r
191191
return s.getOrAllocateAddresses(num, role, pool)
192192
}
193193

194+
// GetAddresses returns the address associated to a given role.
195+
func (s *Service) GetAddresses(role string) (*ec2.DescribeAddressesOutput, error) {
196+
return s.describeAddresses(role)
197+
}
198+
194199
// ReleaseAddressByRole releases EIP addresses filtering by tag CAPA provider role.
195200
func (s *Service) ReleaseAddressByRole(role string) error {
196-
clusterFilter := []*ec2.Filter{filter.EC2.Cluster(s.scope.Name())}
197-
clusterFilter = append(clusterFilter, filter.EC2.ProviderRole(role))
198-
199-
return s.releaseAddressesWithFilter(clusterFilter)
201+
return s.releaseAddressesWithFilter([]*ec2.Filter{
202+
filter.EC2.ClusterOwned(s.scope.Name()),
203+
filter.EC2.ProviderRole(role),
204+
})
200205
}
201206

202207
// setByoPublicIpv4 check if the config has Public IPv4 Pool defined, then

0 commit comments

Comments
 (0)