Skip to content

Commit 308bb2e

Browse files
Merge pull request #18758 from pravisankar/fix-clear-nodenetwork
Automatic merge from submit-queue. Bug 1550266 - Fix clearInitialNodeNetworkUnavailableCondition() in sdn master #This change fixes these 2 issues: - Currently, clearing NodeNetworkUnavailable node condition only works if we are successful in updating the node status during the first iteration. Subsequent retries will not work because: 1. knode != node 2. node.Status is updated in memory 3. UpdateNodeStatus(knode) (3) will have no effect as in step (2) node.Status is updated but not knode.Status - Node object passed to this method is pointer to an item in the informer cache and it should not be modified directly. Avoid NodeNetworkUnavailable condition check for every node status update - We know that kubelet sets NodeNetworkUnavailable condition when the node is created/registered with api server. - So we only need to call clearInitialNodeNetworkUnavailableCondition() for the first time and not during subsequent node status update events.
2 parents 8f99915 + 7d5f2ac commit 308bb2e

File tree

1 file changed

+21
-29
lines changed

1 file changed

+21
-29
lines changed

pkg/network/master/subnets.go

+21-29
Original file line numberDiff line numberDiff line change
@@ -190,55 +190,46 @@ func (master *OsdnMaster) deleteNode(nodeName string) error {
190190
// TODO: make upstream kubelet more flexible with overlays and GCE so this
191191
// condition doesn't get added for network plugins that don't want it, and then
192192
// we can remove this function.
193-
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi.Node) {
193+
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(origNode *kapi.Node) {
194+
// Informer cache should not be mutated, so get a copy of the object
195+
node := origNode.DeepCopy()
194196
knode := node
195197
cleared := false
196198
resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
197199
var err error
198200

199201
if knode != node {
200-
knode, err = master.kClient.Core().Nodes().Get(node.ObjectMeta.Name, metav1.GetOptions{})
202+
knode, err = master.kClient.Core().Nodes().Get(node.Name, metav1.GetOptions{})
201203
if err != nil {
202204
return err
203205
}
204206
}
205207

206-
// Let caller modify knode's status, then push to api server.
207-
_, condition := GetNodeCondition(&node.Status, kapi.NodeNetworkUnavailable)
208-
if condition != nil && condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" {
209-
condition.Status = kapi.ConditionFalse
210-
condition.Reason = "RouteCreated"
211-
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated"
212-
condition.LastTransitionTime = metav1.Now()
213-
knode, err = master.kClient.Core().Nodes().UpdateStatus(knode)
214-
if err == nil {
215-
cleared = true
208+
for i := range knode.Status.Conditions {
209+
if knode.Status.Conditions[i].Type == kapi.NodeNetworkUnavailable {
210+
condition := &knode.Status.Conditions[i]
211+
if condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" {
212+
condition.Status = kapi.ConditionFalse
213+
condition.Reason = "RouteCreated"
214+
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated"
215+
condition.LastTransitionTime = metav1.Now()
216+
217+
if knode, err = master.kClient.Core().Nodes().UpdateStatus(knode); err == nil {
218+
cleared = true
219+
}
220+
}
221+
break
216222
}
217223
}
218224
return err
219225
})
220226
if resultErr != nil {
221227
utilruntime.HandleError(fmt.Errorf("status update failed for local node: %v", resultErr))
222228
} else if cleared {
223-
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.ObjectMeta.Name)
229+
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.Name)
224230
}
225231
}
226232

227-
// TODO remove this and switch to external
228-
// GetNodeCondition extracts the provided condition from the given status and returns that.
229-
// Returns nil and -1 if the condition is not present, and the index of the located condition.
230-
func GetNodeCondition(status *kapi.NodeStatus, conditionType kapi.NodeConditionType) (int, *kapi.NodeCondition) {
231-
if status == nil {
232-
return -1, nil
233-
}
234-
for i := range status.Conditions {
235-
if status.Conditions[i].Type == conditionType {
236-
return i, &status.Conditions[i]
237-
}
238-
}
239-
return -1, nil
240-
}
241-
242233
func (master *OsdnMaster) watchNodes() {
243234
funcs := common.InformerFuncs(&kapi.Node{}, master.handleAddOrUpdateNode, master.handleDeleteNode)
244235
master.kubeInformers.Core().InternalVersion().Nodes().Informer().AddEventHandler(funcs)
@@ -258,14 +249,15 @@ func (master *OsdnMaster) handleAddOrUpdateNode(obj, _ interface{}, eventType wa
258249
utilruntime.HandleError(fmt.Errorf("Node IP is not set for node %s, skipping %s event, node: %v", node.Name, eventType, node))
259250
return
260251
}
261-
master.clearInitialNodeNetworkUnavailableCondition(node)
262252

263253
if oldNodeIP, ok := master.hostSubnetNodeIPs[node.UID]; ok && (nodeIP == oldNodeIP) {
264254
return
265255
}
266256
// Node status is frequently updated by kubelet, so log only if the above condition is not met
267257
glog.V(5).Infof("Watch %s event for Node %q", eventType, node.Name)
268258

259+
master.clearInitialNodeNetworkUnavailableCondition(node)
260+
269261
usedNodeIP, err := master.addNode(node.Name, string(node.UID), nodeIP, nil)
270262
if err != nil {
271263
utilruntime.HandleError(fmt.Errorf("Error creating subnet for node %s, ip %s: %v", node.Name, nodeIP, err))

0 commit comments

Comments
 (0)