Skip to content

Commit 40a937e

Browse files
Merge pull request #20485 from danwinship/egress-ip-master-ha
HA for fully-automatic egress IPs
2 parents 79faa47 + 31c3218 commit 40a937e

File tree

3 files changed

+331
-35
lines changed

3 files changed

+331
-35
lines changed

pkg/network/common/egressip.go

+76-26
Original file line numberDiff line numberDiff line change
@@ -450,22 +450,32 @@ func (eit *EgressIPTracker) SetNodeOffline(nodeIP string, offline bool) {
450450
eit.egressIPChanged(eg)
451451
}
452452
}
453+
454+
if node.requestedCIDRs.Len() != 0 {
455+
eit.updateEgressCIDRs = true
456+
}
457+
453458
eit.syncEgressIPs()
454459
}
455460

461+
func (eit *EgressIPTracker) lookupNodeIP(ip string) string {
462+
eit.Lock()
463+
defer eit.Unlock()
464+
465+
if node := eit.nodesByNodeIP[ip]; node != nil {
466+
return node.sdnIP
467+
}
468+
return ip
469+
}
470+
456471
// Ping a node and return whether or not it is online. We do this by trying to open a TCP
457472
// connection to the "discard" service (port 9); if the node is offline, the attempt will
458473
// time out with no response (and we will return false). If the node is online then we
459474
// presumably will get a "connection refused" error; the code below assumes that anything
460475
// other than timing out indicates that the node is online.
461476
func (eit *EgressIPTracker) Ping(ip string, timeout time.Duration) bool {
462-
eit.Lock()
463-
defer eit.Unlock()
464-
465477
// If the caller used a public node IP, replace it with the SDN IP
466-
if node := eit.nodesByNodeIP[ip]; node != nil {
467-
ip = node.sdnIP
468-
}
478+
ip = eit.lookupNodeIP(ip)
469479

470480
conn, err := net.DialTimeout("tcp", ip+":9", timeout)
471481
if conn != nil {
@@ -485,10 +495,10 @@ func (eit *EgressIPTracker) findEgressIPAllocation(ip net.IP, allocation map[str
485495
otherNodes := false
486496

487497
for _, node := range eit.nodes {
488-
egressIPs, exists := allocation[node.nodeName]
489-
if !exists {
498+
if node.offline {
490499
continue
491500
}
501+
egressIPs := allocation[node.nodeName]
492502
for _, parsed := range node.parsedCIDRs {
493503
if parsed.Contains(ip) {
494504
if bestNode != "" {
@@ -506,24 +516,22 @@ func (eit *EgressIPTracker) findEgressIPAllocation(ip net.IP, allocation map[str
506516
return bestNode, otherNodes
507517
}
508518

509-
// ReallocateEgressIPs returns a map from Node name to array-of-Egress-IP. Unchanged nodes are not included.
510-
func (eit *EgressIPTracker) ReallocateEgressIPs() map[string][]string {
511-
eit.Lock()
512-
defer eit.Unlock()
519+
func (eit *EgressIPTracker) makeEmptyAllocation() (map[string][]string, map[string]bool) {
520+
return make(map[string][]string), make(map[string]bool)
521+
}
522+
523+
func (eit *EgressIPTracker) allocateExistingEgressIPs(allocation map[string][]string, alreadyAllocated map[string]bool) bool {
524+
removedEgressIPs := false
513525

514-
allocation := make(map[string][]string)
515-
changed := make(map[string]bool)
516-
alreadyAllocated := make(map[string]bool)
517526
for _, node := range eit.nodes {
518527
if len(node.parsedCIDRs) > 0 {
519528
allocation[node.nodeName] = make([]string, 0, node.requestedIPs.Len())
520529
}
521530
}
522531
// For each active egress IP, if it still fits within some egress CIDR on its node,
523-
// add it to that node's allocation. (Otherwise add the node to the "changed" map,
524-
// since we'll be removing this egress IP from it.)
532+
// add it to that node's allocation.
525533
for egressIP, eip := range eit.egressIPs {
526-
if eip.assignedNodeIP == "" {
534+
if eip.assignedNodeIP == "" || alreadyAllocated[egressIP] {
527535
continue
528536
}
529537
node := eip.nodes[0]
@@ -534,17 +542,21 @@ func (eit *EgressIPTracker) ReallocateEgressIPs() map[string][]string {
534542
break
535543
}
536544
}
537-
if found {
545+
if found && !node.offline {
538546
allocation[node.nodeName] = append(allocation[node.nodeName], egressIP)
539547
} else {
540-
changed[node.nodeName] = true
548+
removedEgressIPs = true
541549
}
542550
// (We set alreadyAllocated even if the egressIP will be removed from
543551
// its current node; we can't assign it to a new node until the next
544552
// reallocation.)
545553
alreadyAllocated[egressIP] = true
546554
}
547555

556+
return removedEgressIPs
557+
}
558+
559+
func (eit *EgressIPTracker) allocateNewEgressIPs(allocation map[string][]string, alreadyAllocated map[string]bool) {
548560
// Allocate pending egress IPs that can only go to a single node
549561
for egressIP, eip := range eit.egressIPs {
550562
if alreadyAllocated[egressIP] || len(eip.namespaces) == 0 {
@@ -553,7 +565,6 @@ func (eit *EgressIPTracker) ReallocateEgressIPs() map[string][]string {
553565
nodeName, otherNodes := eit.findEgressIPAllocation(eip.parsed, allocation)
554566
if nodeName != "" && !otherNodes {
555567
allocation[nodeName] = append(allocation[nodeName], egressIP)
556-
changed[nodeName] = true
557568
alreadyAllocated[egressIP] = true
558569
}
559570
}
@@ -565,15 +576,54 @@ func (eit *EgressIPTracker) ReallocateEgressIPs() map[string][]string {
565576
nodeName, _ := eit.findEgressIPAllocation(eip.parsed, allocation)
566577
if nodeName != "" {
567578
allocation[nodeName] = append(allocation[nodeName], egressIP)
568-
changed[nodeName] = true
569579
}
570580
}
581+
}
571582

572-
// Remove unchanged nodes from the return value
573-
for _, node := range eit.nodes {
574-
if !changed[node.nodeName] {
575-
delete(allocation, node.nodeName)
583+
// ReallocateEgressIPs returns a map from Node name to array-of-Egress-IP for all auto-allocated egress IPs
584+
func (eit *EgressIPTracker) ReallocateEgressIPs() map[string][]string {
585+
eit.Lock()
586+
defer eit.Unlock()
587+
588+
allocation, alreadyAllocated := eit.makeEmptyAllocation()
589+
removedEgressIPs := eit.allocateExistingEgressIPs(allocation, alreadyAllocated)
590+
eit.allocateNewEgressIPs(allocation, alreadyAllocated)
591+
if removedEgressIPs {
592+
// Process the removals now; we'll get called again afterward and can
593+
// check for balance then.
594+
return allocation
595+
}
596+
597+
// Compare the allocation to what we would have gotten if we started from scratch,
598+
// to see if things have gotten too unbalanced. (In particular, if a node goes
599+
// offline, gets emptied, and then comes back online, we want to move a bunch of
600+
// egress IPs back onto that node.)
601+
fullReallocation, alreadyAllocated := eit.makeEmptyAllocation()
602+
eit.allocateNewEgressIPs(fullReallocation, alreadyAllocated)
603+
604+
emptyNodes := []string{}
605+
for nodeName, fullEgressIPs := range fullReallocation {
606+
incrementalEgressIPs := allocation[nodeName]
607+
if len(incrementalEgressIPs) < len(fullEgressIPs)/2 {
608+
emptyNodes = append(emptyNodes, nodeName)
609+
}
610+
}
611+
612+
if len(emptyNodes) > 0 {
613+
// Make a new incremental allocation, but skipping all of the egress IPs
614+
// that got assigned to the "empty" nodes in the full reallocation; this
615+
// will cause them to be dropped from their current nodes and then later
616+
// reassigned (to one of the "empty" nodes, for balance).
617+
allocation, alreadyAllocated = eit.makeEmptyAllocation()
618+
for _, nodeName := range emptyNodes {
619+
for _, egressIP := range fullReallocation[nodeName] {
620+
alreadyAllocated[egressIP] = true
621+
}
576622
}
623+
eit.allocateExistingEgressIPs(allocation, alreadyAllocated)
624+
eit.allocateNewEgressIPs(allocation, alreadyAllocated)
625+
eit.updateEgressCIDRs = true
577626
}
627+
578628
return allocation
579629
}

pkg/network/common/egressip_test.go

+157-7
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,20 @@ func (w *testEIPWatcher) assertNoChanges() error {
7373
return w.assertChanges()
7474
}
7575

76+
func (w *testEIPWatcher) flushChanges() {
77+
w.changes = []string{}
78+
}
79+
80+
func (w *testEIPWatcher) assertUpdateEgressCIDRsNotification() error {
81+
for _, change := range w.changes {
82+
if change == "update egress CIDRs" {
83+
w.flushChanges()
84+
return nil
85+
}
86+
}
87+
return fmt.Errorf("expected change \"update egress CIDRs\", got %#v", w.changes)
88+
}
89+
7690
func setupEgressIPTracker(t *testing.T) (*EgressIPTracker, *testEIPWatcher) {
7791
watcher := &testEIPWatcher{}
7892
return NewEgressIPTracker(watcher), watcher
@@ -864,9 +878,6 @@ func TestEgressCIDRAllocation(t *testing.T) {
864878
t.Fatalf("%v", err)
865879
}
866880
allocation = eit.ReallocateEgressIPs()
867-
if len(allocation) != 0 {
868-
t.Fatalf("Unexpected allocation: %#v", allocation)
869-
}
870881
updateAllocations(eit, allocation)
871882
err = w.assertNoChanges()
872883
if err != nil {
@@ -947,31 +958,41 @@ func TestEgressCIDRAllocation(t *testing.T) {
947958
t.Fatalf("%v", err)
948959
}
949960

950-
// Changing the EgressIPs of a namespace should drop the old allocation and create a new one
961+
// Changing/Removing the EgressIPs of a namespace should drop the old allocation and create a new one
951962
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
952963
NetID: 46,
953964
EgressIPs: []string{"172.17.0.202"}, // was 172.17.0.200
954965
})
966+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
967+
NetID: 44,
968+
EgressIPs: []string{}, // was 172.17.1.1
969+
})
955970
err = w.assertChanges(
956971
"release 172.17.0.200 on 172.17.0.4",
957972
"namespace 46 dropped",
958973
"update egress CIDRs",
974+
"release 172.17.1.1 on 172.17.0.3",
975+
"namespace 44 normal",
976+
"update egress CIDRs",
959977
)
960978
if err != nil {
961979
t.Fatalf("%v", err)
962980
}
963981

964982
allocation = eit.ReallocateEgressIPs()
965-
for _, ip := range allocation["node-4"] {
966-
if ip == "172.17.0.200" {
967-
t.Fatalf("reallocation failed to drop unused egress IP 172.17.0.200: %#v", allocation)
983+
for _, nodeAllocation := range allocation {
984+
for _, ip := range nodeAllocation {
985+
if ip == "172.17.1.1" || ip == "172.17.0.200" {
986+
t.Fatalf("reallocation failed to drop unused egress IP %s: %#v", ip, allocation)
987+
}
968988
}
969989
}
970990
updateAllocations(eit, allocation)
971991
err = w.assertChanges(
972992
"claim 172.17.0.202 on 172.17.0.4 for namespace 46",
973993
"namespace 46 via 172.17.0.202 on 172.17.0.4",
974994
"update egress CIDRs",
995+
"update egress CIDRs",
975996
)
976997
if err != nil {
977998
t.Fatalf("%v", err)
@@ -1031,3 +1052,132 @@ func TestEgressNodeRenumbering(t *testing.T) {
10311052
t.Fatalf("%v", err)
10321053
}
10331054
}
1055+
1056+
func TestEgressCIDRAllocationOffline(t *testing.T) {
1057+
eit, w := setupEgressIPTracker(t)
1058+
1059+
// Create nodes...
1060+
updateHostSubnetEgress(eit, &networkapi.HostSubnet{
1061+
HostIP: "172.17.0.3",
1062+
EgressIPs: []string{},
1063+
EgressCIDRs: []string{"172.17.0.0/24", "172.17.1.0/24"},
1064+
})
1065+
updateHostSubnetEgress(eit, &networkapi.HostSubnet{
1066+
HostIP: "172.17.0.4",
1067+
EgressIPs: []string{},
1068+
EgressCIDRs: []string{"172.17.0.0/24"},
1069+
})
1070+
updateHostSubnetEgress(eit, &networkapi.HostSubnet{
1071+
HostIP: "172.17.0.5",
1072+
EgressIPs: []string{},
1073+
EgressCIDRs: []string{"172.17.1.0/24"},
1074+
})
1075+
1076+
// Create namespaces
1077+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1078+
NetID: 100,
1079+
EgressIPs: []string{"172.17.0.100"},
1080+
})
1081+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1082+
NetID: 101,
1083+
EgressIPs: []string{"172.17.0.101"},
1084+
})
1085+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1086+
NetID: 102,
1087+
EgressIPs: []string{"172.17.0.102"},
1088+
})
1089+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1090+
NetID: 200,
1091+
EgressIPs: []string{"172.17.1.200"},
1092+
})
1093+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1094+
NetID: 201,
1095+
EgressIPs: []string{"172.17.1.201"},
1096+
})
1097+
updateNetNamespaceEgress(eit, &networkapi.NetNamespace{
1098+
NetID: 202,
1099+
EgressIPs: []string{"172.17.1.202"},
1100+
})
1101+
1102+
// In a perfect world, we'd get 2 IPs on each node, but depending on processing
1103+
// order, this isn't guaranteed. Eg, if the three 172.17.0.x IPs get processed
1104+
// first, we could get two of them on node-3 and one on node-4. Then the first two
1105+
// 172.17.1.x IPs get assigned to node-5, and the last one could go to either
1106+
// node-3 or node-5. Regardless of order, node-3 is guaranteed to get at least
1107+
// two IPs since there's no way either node-4 or node-5 could be assigned a
1108+
// third IP if node-3 still only had one.
1109+
allocation := eit.ReallocateEgressIPs()
1110+
node3ips := allocation["node-3"]
1111+
node4ips := allocation["node-4"]
1112+
node5ips := allocation["node-5"]
1113+
if len(node3ips) < 2 || len(node4ips) == 0 || len(node5ips) == 0 ||
1114+
len(node3ips)+len(node4ips)+len(node5ips) != 6 {
1115+
t.Fatalf("Bad IP allocation: %#v", allocation)
1116+
}
1117+
updateAllocations(eit, allocation)
1118+
1119+
w.flushChanges()
1120+
1121+
// Now take node-3 offline
1122+
eit.SetNodeOffline("172.17.0.3", true)
1123+
err := w.assertUpdateEgressCIDRsNotification()
1124+
if err != nil {
1125+
t.Fatalf("%v", err)
1126+
}
1127+
1128+
// First reallocation should empty out node-3
1129+
allocation = eit.ReallocateEgressIPs()
1130+
if node3ips, ok := allocation["node-3"]; !ok || len(node3ips) != 0 {
1131+
t.Fatalf("Bad IP allocation: %#v", allocation)
1132+
}
1133+
updateAllocations(eit, allocation)
1134+
1135+
err = w.assertUpdateEgressCIDRsNotification()
1136+
if err != nil {
1137+
t.Fatalf("%v", err)
1138+
}
1139+
1140+
// Next reallocation should reassign egress IPs to node-4 and node-5
1141+
allocation = eit.ReallocateEgressIPs()
1142+
node3ips = allocation["node-3"]
1143+
node4ips = allocation["node-4"]
1144+
node5ips = allocation["node-5"]
1145+
if len(node3ips) != 0 || len(node4ips) != 3 || len(node5ips) != 3 {
1146+
t.Fatalf("Bad IP allocation: %#v", allocation)
1147+
}
1148+
updateAllocations(eit, allocation)
1149+
1150+
// Bring node-3 back
1151+
eit.SetNodeOffline("172.17.0.3", false)
1152+
err = w.assertUpdateEgressCIDRsNotification()
1153+
if err != nil {
1154+
t.Fatalf("%v", err)
1155+
}
1156+
1157+
// First reallocation should remove some IPs from node-4 and node-5 but not add
1158+
// them to node-3. As above, the "balanced" allocation we're aiming for may not
1159+
// be perfect, but it has to be planning to assign at least 2 IPs to node-3.
1160+
allocation = eit.ReallocateEgressIPs()
1161+
node3ips = allocation["node-3"]
1162+
node4ips = allocation["node-4"]
1163+
node5ips = allocation["node-5"]
1164+
if len(node3ips) != 0 || len(node4ips)+len(node5ips) > 4 {
1165+
t.Fatalf("Bad IP allocation: %#v", allocation)
1166+
}
1167+
updateAllocations(eit, allocation)
1168+
1169+
err = w.assertUpdateEgressCIDRsNotification()
1170+
if err != nil {
1171+
t.Fatalf("%v", err)
1172+
}
1173+
1174+
// Next reallocation should reassign egress IPs to node-3
1175+
allocation = eit.ReallocateEgressIPs()
1176+
node3ips = allocation["node-3"]
1177+
node4ips = allocation["node-4"]
1178+
node5ips = allocation["node-5"]
1179+
if len(node3ips) < 2 || len(node4ips) == 0 || len(node5ips) == 0 {
1180+
t.Fatalf("Bad IP allocation: %#v", allocation)
1181+
}
1182+
updateAllocations(eit, allocation)
1183+
}

0 commit comments

Comments
 (0)