Skip to content

Commit 7e74be9

Browse files
authoredOct 16, 2017
Merge pull request #16848 from sosiouxme/20171012-NetworkCheck-interrupt
Automatic merge from submit-queue (batch tested with PRs 16848, 16874). Fix some diagnostic error handling (NetworkCheck and DiagnosticPod) Fixes #16847 A keyboard interrupt on the NetworkCheck diagnostic will actually abort it (giving it a chance to clean up) and proceed to the next diagnostic. The same is done for DiagnosticPod (which previously did not catch the signal and cleanup at all).
2 parents 7f10b2d + 4c90bb6 commit 7e74be9

File tree

2 files changed

+49
-22
lines changed

2 files changed

+49
-22
lines changed
 

Diff for: ‎pkg/diagnostics/client/run_diagnostics_pod.go

+31-9
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ package client
33
import (
44
"bufio"
55
"fmt"
6+
"os"
7+
"os/signal"
68
"regexp"
79
"strconv"
10+
"syscall"
811
"time"
912

1013
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -51,11 +54,11 @@ func (d *DiagnosticPod) CanRun() (bool, error) {
5154
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
5255
func (d *DiagnosticPod) Check() types.DiagnosticResult {
5356
r := types.NewDiagnosticResult("DiagnosticPod")
54-
d.runDiagnosticPod(nil, r)
57+
d.runDiagnosticPod(r)
5558
return r
5659
}
5760

58-
func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) {
61+
func (d *DiagnosticPod) runDiagnosticPod(r types.DiagnosticResult) {
5962
loglevel := d.Level
6063
if loglevel > 2 {
6164
loglevel = 2 // need to show summary at least
@@ -78,14 +81,33 @@ func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.Diagnost
7881
r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err))
7982
return
8083
}
81-
defer func() { // delete what we created, or notify that we couldn't
82-
zero := int64(0)
83-
delOpts := metav1.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
84-
if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
85-
r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
86-
}
84+
85+
// Jump straight to clean up if there is an interrupt/terminate signal while running diagnostic
86+
done := make(chan bool, 1)
87+
sig := make(chan os.Signal, 1)
88+
signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
89+
go func() {
90+
<-sig
91+
r.Warn("DCli2014", nil, "Interrupt received; aborting diagnostic.")
92+
done <- true
8793
}()
88-
pod, err = d.KubeClient.Core().Pods(d.Namespace).Get(pod.ObjectMeta.Name, metav1.GetOptions{}) // status is filled in post-create
94+
go func() {
95+
d.processDiagnosticPodResults(pod, imageName, r)
96+
done <- true
97+
}()
98+
99+
<-done
100+
signal.Stop(sig)
101+
// delete what we created, or notify that we couldn't
102+
zero := int64(0)
103+
delOpts := metav1.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
104+
if err := d.KubeClient.Core().Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
105+
r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
106+
}
107+
}
108+
109+
func (d *DiagnosticPod) processDiagnosticPodResults(protoPod *kapi.Pod, imageName string, r types.DiagnosticResult) {
110+
pod, err := d.KubeClient.Core().Pods(d.Namespace).Get(protoPod.ObjectMeta.Name, metav1.GetOptions{}) // status is filled in post-create
89111
if err != nil {
90112
r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err))
91113
return

Diff for: ‎pkg/diagnostics/network/run_pod.go

+18-13
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
8585
return d.res
8686
}
8787
if !ok {
88-
d.res.Warn("DNet2002", nil, "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
88+
d.res.Info("DNet2002", "Skipping network diagnostics check. Reason: Not using openshift network plugin.")
8989
return d.res
9090
}
9191

@@ -99,22 +99,27 @@ func (d *NetworkDiagnostic) Check() types.DiagnosticResult {
9999
return d.res
100100
}
101101

102-
d.runNetworkDiagnostic()
102+
// Abort and clean up if there is an interrupt/terminate signal while running network diagnostics
103+
done := make(chan bool, 1)
104+
sig := make(chan os.Signal, 1)
105+
signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
106+
go func() {
107+
<-sig
108+
d.res.Warn("DNet2014", nil, "Interrupt received; aborting network diagnostic.")
109+
done <- true
110+
}()
111+
go func() {
112+
d.runNetworkDiagnostic()
113+
done <- true
114+
}()
115+
<-done
116+
signal.Stop(sig)
117+
d.Cleanup()
118+
103119
return d.res
104120
}
105121

106122
func (d *NetworkDiagnostic) runNetworkDiagnostic() {
107-
// Do clean up if there is an interrupt/terminate signal while running network diagnostics
108-
c := make(chan os.Signal, 2)
109-
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
110-
go func() {
111-
<-c
112-
d.Cleanup()
113-
}()
114-
115-
defer func() {
116-
d.Cleanup()
117-
}()
118123
// Setup test environment
119124
if err := d.TestSetup(); err != nil {
120125
d.res.Error("DNet2005", err, fmt.Sprintf("Setting up test environment for network diagnostics failed: %v", err))

0 commit comments

Comments
 (0)
Please sign in to comment.