Skip to content

Commit fc95b86

Browse files
committed
sdn: fix initialization order to prevent crash on node startup
OsdnNode.Start() (node.pm == nil at this point) -> node.policy.Start() (which is multitenant policy) -> mp.vnids.Start() -> go vmap.watchNetNamespaces() -> (net namespace event happens) -> watchNetNamespaces() -> vmap.policy.AddNetNamespace() (policy is multitenant) -> mp.updatePodNetwork() -> mp.node.podManager.UpdateLocalMulticastRules() (and podManager is still nil) Create the PodManager earlier so it's not nil if we get early events. Fixes: #13742
1 parent c2640b6 commit fc95b86

File tree

3 files changed

+35
-30
lines changed

3 files changed

+35
-30
lines changed

pkg/sdn/plugin/node.go

+8-6
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ func (node *OsdnNode) Start() error {
220220
return err
221221
}
222222

223+
// podManager must be created early because other goroutines
224+
// may call into it before its started due to event watches
225+
log.V(5).Infof("Creating openshift-sdn pod manager")
226+
node.podManager = newPodManager(node.kClient, node.policy, node.mtu, node.oc)
227+
223228
err = node.SubnetStartNode()
224229
if err != nil {
225230
return err
@@ -242,12 +247,9 @@ func (node *OsdnNode) Start() error {
242247
return true, nil
243248
})
244249

245-
log.V(5).Infof("Creating and initializing openshift-sdn pod manager")
246-
node.podManager, err = newPodManager(node.host, node.localSubnetCIDR, node.networkInfo, node.kClient, node.policy, node.mtu, node.oc)
247-
if err != nil {
248-
return err
249-
}
250-
if err := node.podManager.Start(cniserver.CNIServerSocketPath); err != nil {
250+
// Kubelet has initialized, now we have a valid node.host
251+
log.V(5).Infof("Starting openshift-sdn pod manager")
252+
if err := node.podManager.Start(cniserver.CNIServerSocketPath, node.host, node.localSubnetCIDR, node.networkInfo.ClusterNetwork); err != nil {
251253
return err
252254
}
253255

pkg/sdn/plugin/pod.go

+21-20
Original file line numberDiff line numberDiff line change
@@ -36,45 +36,38 @@ type podManager struct {
3636
cniServer *cniserver.CNIServer
3737
// Request queue for pod operations incoming from the CNIServer
3838
requests chan (*cniserver.PodRequest)
39-
// Tracks pod :: IP address for hostport handling
39+
// Tracks pod :: IP address for hostport and multicast handling
4040
runningPods map[string]*runningPod
4141
runningPodsLock sync.Mutex
4242

4343
// Live pod setup/teardown stuff not used in testing code
44-
kClient *kclientset.Clientset
45-
policy osdnPolicy
44+
kClient *kclientset.Clientset
45+
policy osdnPolicy
46+
mtu uint32
47+
oc *ovsController
48+
49+
// Things only accessed through the processCNIRequests() goroutine
50+
// and thus can be set from Start()
4651
ipamConfig []byte
47-
mtu uint32
4852
hostportHandler kubehostport.HostportHandler
4953
host knetwork.Host
50-
oc *ovsController
5154
}
5255

5356
// Creates a new live podManager; used by node code
54-
func newPodManager(host knetwork.Host, localSubnetCIDR string, netInfo *NetworkInfo, kClient *kclientset.Clientset, policy osdnPolicy, mtu uint32, oc *ovsController) (*podManager, error) {
55-
pm := newDefaultPodManager(host)
57+
func newPodManager(kClient *kclientset.Clientset, policy osdnPolicy, mtu uint32, oc *ovsController) *podManager {
58+
pm := newDefaultPodManager()
5659
pm.kClient = kClient
5760
pm.policy = policy
5861
pm.mtu = mtu
59-
pm.hostportHandler = kubehostport.NewHostportHandler()
60-
pm.podHandler = pm
6162
pm.oc = oc
62-
63-
var err error
64-
pm.ipamConfig, err = getIPAMConfig(netInfo.ClusterNetwork, localSubnetCIDR)
65-
if err != nil {
66-
return nil, err
67-
}
68-
69-
return pm, nil
63+
return pm
7064
}
7165

7266
// Creates a new basic podManager; used by testcases
73-
func newDefaultPodManager(host knetwork.Host) *podManager {
67+
func newDefaultPodManager() *podManager {
7468
return &podManager{
7569
runningPods: make(map[string]*runningPod),
7670
requests: make(chan *cniserver.PodRequest, 20),
77-
host: host,
7871
}
7972
}
8073

@@ -132,7 +125,15 @@ func getIPAMConfig(clusterNetwork *net.IPNet, localSubnet string) ([]byte, error
132125
}
133126

134127
// Start the CNI server and start processing requests from it
135-
func (m *podManager) Start(socketPath string) error {
128+
func (m *podManager) Start(socketPath string, host knetwork.Host, localSubnetCIDR string, clusterNetwork *net.IPNet) error {
129+
m.host = host
130+
m.hostportHandler = kubehostport.NewHostportHandler()
131+
132+
var err error
133+
if m.ipamConfig, err = getIPAMConfig(clusterNetwork, localSubnetCIDR); err != nil {
134+
return err
135+
}
136+
136137
go m.processCNIRequests()
137138

138139
m.cniServer = cniserver.NewCNIServer(socketPath)

pkg/sdn/plugin/pod_test.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,10 @@ func TestPodManager(t *testing.T) {
368368

369369
for k, tc := range testcases {
370370
podTester := newPodTester(t, k, socketPath)
371-
podManager := newDefaultPodManager(newFakeHost())
371+
podManager := newDefaultPodManager()
372372
podManager.podHandler = podTester
373-
podManager.Start(socketPath)
373+
_, net, _ := net.ParseCIDR("1.2.0.0/16")
374+
podManager.Start(socketPath, newFakeHost(), "1.2.3.0/24", net)
374375

375376
// Add pods to our expected pod list before kicking off the
376377
// actual pod setup to ensure we don't concurrently access
@@ -463,9 +464,10 @@ func TestDirectPodUpdate(t *testing.T) {
463464
socketPath := filepath.Join(tmpDir, "cni-server.sock")
464465

465466
podTester := newPodTester(t, "update", socketPath)
466-
podManager := newDefaultPodManager(newFakeHost())
467+
podManager := newDefaultPodManager()
467468
podManager.podHandler = podTester
468-
podManager.Start(socketPath)
469+
_, net, _ := net.ParseCIDR("1.2.0.0/16")
470+
podManager.Start(socketPath, newFakeHost(), "1.2.3.0/24", net)
469471

470472
op := &operation{
471473
command: cniserver.CNI_UPDATE,

0 commit comments

Comments
 (0)