Skip to content

Commit 766d0df

Browse files
deads2kbertinatto
authored andcommitted
UPSTREAM: <carry>: provide events, messages, and bodies for probe failures of important pods
UPSTREAM: <carry>: provide unique reason for pod probe event during termination OpenShift-Rebase-Source: 01542fc
1 parent 2cfee6e commit 766d0df

File tree

4 files changed

+91
-8
lines changed

4 files changed

+91
-8
lines changed

pkg/kubelet/prober/patch_prober.go

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package prober
2+
3+
import (
4+
"net/http"
5+
"strings"
6+
"time"
7+
8+
v1 "k8s.io/api/core/v1"
9+
"k8s.io/klog/v2"
10+
"k8s.io/kubernetes/pkg/probe"
11+
httpprobe "k8s.io/kubernetes/pkg/probe/http"
12+
)
13+
14+
func (pb *prober) maybeProbeForBody(prober httpprobe.Prober, req *http.Request, timeout time.Duration, pod *v1.Pod, container v1.Container, probeType probeType) (probe.Result, string, error) {
15+
if !isInterestingPod(pod) {
16+
return prober.Probe(req, timeout)
17+
}
18+
bodyProber, ok := prober.(httpprobe.DetailedProber)
19+
if !ok {
20+
return prober.Probe(req, timeout)
21+
}
22+
result, output, body, probeError := bodyProber.ProbeForBody(req, timeout)
23+
switch result {
24+
case probe.Success:
25+
return result, output, probeError
26+
case probe.Warning, probe.Failure, probe.Unknown:
27+
// these pods are interesting enough to show the body content
28+
klog.Infof("interesting pod/%s container/%s namespace/%s: %s probe status=%v output=%q start-of-body=%s",
29+
pod.Name, container.Name, pod.Namespace, probeType, result, output, body)
30+
31+
reason := "ProbeError" // this is the normal value
32+
if pod.DeletionTimestamp != nil {
33+
// If the container was sent a sig-term, we want to have a different reason so we can distinguish this in our
34+
// monitoring and watching code.
35+
// Pod delete does this, but there are other possible reasons as well. We'll start with pod delete to improve the state of the world.
36+
reason = "TerminatingPodProbeError"
37+
}
38+
39+
// in fact, they are so interesting we'll try to send events for them
40+
pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, reason, "%s probe error: %s\nbody: %s\n", probeType, output, body)
41+
return result, output, probeError
42+
default:
43+
return result, output, probeError
44+
}
45+
}
46+
47+
func isInterestingPod(pod *v1.Pod) bool {
48+
if pod == nil {
49+
return false
50+
}
51+
if strings.HasPrefix(pod.Namespace, "openshift-") {
52+
return true
53+
}
54+
55+
return false
56+
}

pkg/kubelet/prober/prober.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ func (pb *prober) runProbe(ctx context.Context, probeType probeType, p *v1.Probe
169169
headers := p.HTTPGet.HTTPHeaders
170170
klogV4.InfoS("HTTP-Probe", "scheme", scheme, "host", host, "port", port, "path", path, "timeout", timeout, "headers", headers, "probeType", probeType)
171171
}
172-
return pb.http.Probe(req, timeout)
172+
return pb.maybeProbeForBody(pb.http, req, timeout, pod, container, probeType)
173173

174174
case p.TCPSocket != nil:
175175
port, err := probe.ResolveContainerPort(p.TCPSocket.Port, &container)

pkg/probe/http/http.go

+9-7
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ func (pr httpProber) Probe(req *http.Request, timeout time.Duration) (probe.Resu
7878
Transport: pr.transport,
7979
CheckRedirect: RedirectChecker(pr.followNonLocalRedirects),
8080
}
81-
return DoHTTPProbe(req, client)
81+
result, details, _, err := DoHTTPProbe(req, client)
82+
return result, details, err
8283
}
8384

8485
// GetHTTPInterface is an interface for making HTTP requests, that returns a response and error.
@@ -90,36 +91,37 @@ type GetHTTPInterface interface {
9091
// If the HTTP response code is successful (i.e. 400 > code >= 200), it returns Success.
9192
// If the HTTP response code is unsuccessful or HTTP communication fails, it returns Failure.
9293
// This is exported because some other packages may want to do direct HTTP probes.
93-
func DoHTTPProbe(req *http.Request, client GetHTTPInterface) (probe.Result, string, error) {
94+
func DoHTTPProbe(req *http.Request, client GetHTTPInterface) (probe.Result, string, string, error) {
9495
url := req.URL
9596
headers := req.Header
9697
res, err := client.Do(req)
9798
if err != nil {
9899
// Convert errors into failures to catch timeouts.
99-
return probe.Failure, err.Error(), nil
100+
return probe.Failure, err.Error(), "", nil
100101
}
101102
defer res.Body.Close()
102103
b, err := utilio.ReadAtMost(res.Body, maxRespBodyLength)
103104
if err != nil {
104105
if err == utilio.ErrLimitReached {
105106
klog.V(4).Infof("Non fatal body truncation for %s, Response: %v", url.String(), *res)
106107
} else {
107-
return probe.Failure, "", err
108+
return probe.Failure, "", "", err
108109
}
109110
}
110111
body := string(b)
111112
if res.StatusCode >= http.StatusOK && res.StatusCode < http.StatusBadRequest {
112113
if res.StatusCode >= http.StatusMultipleChoices { // Redirect
113114
klog.V(4).Infof("Probe terminated redirects for %s, Response: %v", url.String(), *res)
114-
return probe.Warning, fmt.Sprintf("Probe terminated redirects, Response body: %v", body), nil
115+
return probe.Warning, fmt.Sprintf("Probe terminated redirects, Response body: %v", body), body, nil
115116
}
116117
klog.V(4).Infof("Probe succeeded for %s, Response: %v", url.String(), *res)
117-
return probe.Success, body, nil
118+
return probe.Success, body, body, nil
118119
}
119120
klog.V(4).Infof("Probe failed for %s with request headers %v, response body: %v", url.String(), headers, body)
120121
// Note: Until https://issue.k8s.io/99425 is addressed, this user-facing failure message must not contain the response body.
122+
// @deads2k recommended we return the body. Slack discussion: https://redhat-internal.slack.com/archives/C04UQLWQAP3/p1679590747021409
121123
failureMsg := fmt.Sprintf("HTTP probe failed with statuscode: %d", res.StatusCode)
122-
return probe.Failure, failureMsg, nil
124+
return probe.Failure, failureMsg, body, nil
123125
}
124126

125127
// RedirectChecker returns a function that can be used to check HTTP redirects.

pkg/probe/http/patch_http.go

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package http
2+
3+
import (
4+
"net/http"
5+
"time"
6+
7+
"k8s.io/kubernetes/pkg/probe"
8+
)
9+
10+
// Prober is an interface that defines the Probe function for doing HTTP readiness/liveness checks.
11+
type DetailedProber interface {
12+
ProbeForBody(req *http.Request, timeout time.Duration) (probe.Result, string, string, error)
13+
}
14+
15+
// ProbeForBody returns a ProbeRunner capable of running an HTTP check.
16+
// returns result, details, body, error
17+
func (pr httpProber) ProbeForBody(req *http.Request, timeout time.Duration) (probe.Result, string, string, error) {
18+
pr.transport.DisableCompression = true // removes Accept-Encoding header
19+
client := &http.Client{
20+
Timeout: timeout,
21+
Transport: pr.transport,
22+
CheckRedirect: RedirectChecker(pr.followNonLocalRedirects),
23+
}
24+
return DoHTTPProbe(req, client)
25+
}

0 commit comments

Comments
 (0)