Skip to content

Commit 0db79d4

Browse files
committed
refactor: update the mux server to expose both healthz and metrics, add graceful shutdown
1 parent 63b787c commit 0db79d4

File tree

2 files changed

+90
-15
lines changed

2 files changed

+90
-15
lines changed

Diff for: cmd/kar-controllers/app/server.go

+74-11
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,21 @@ limitations under the License.
3131
package app
3232

3333
import (
34+
"context"
35+
"fmt"
36+
"net/http"
37+
"os"
38+
"os/signal"
39+
"syscall"
40+
"time"
41+
3442
"k8s.io/client-go/rest"
3543
"k8s.io/client-go/tools/clientcmd"
36-
"net/http"
3744

3845
"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
3946
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob"
4047
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/health"
48+
"github.com/prometheus/client_golang/prometheus/promhttp"
4149

4250
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
4351
)
@@ -67,7 +75,7 @@ func Run(opt *options.ServerOption) error {
6775
jobctrl.Run(neverStop)
6876

6977
// This call is blocking (unless an error occurs) which equates to <-neverStop
70-
err = listenHealthProbe(opt)
78+
err = startHealthAndMetricsServers(opt)
7179
if err != nil {
7280
return err
7381
}
@@ -76,14 +84,69 @@ func Run(opt *options.ServerOption) error {
7684
}
7785

7886
// Starts the health probe listener
79-
func listenHealthProbe(opt *options.ServerOption) error {
80-
handler := http.NewServeMux()
81-
handler.Handle("/healthz", &health.Handler{})
82-
err := http.ListenAndServe(opt.HealthProbeListenAddr, handler)
83-
if err != nil {
84-
return err
85-
}
86-
87-
return nil
87+
func startHealthAndMetricsServers(opt *options.ServerOption) error {
88+
// Start the metrics server
89+
RecordMetrics()
90+
91+
metricsHandler := http.NewServeMux()
92+
metricsHandler.Handle("/metrics", promhttp.Handler())
93+
healthHandler := http.NewServeMux()
94+
healthHandler.Handle("/healthz", &health.Handler{})
95+
96+
metricsServer := &http.Server{
97+
Addr: opt.MetricsListenAddr,
98+
Handler: metricsHandler,
99+
}
100+
101+
healthServer := &http.Server{
102+
Addr: opt.HealthProbeListenAddr,
103+
Handler: healthHandler,
104+
}
105+
106+
// make a channel for errors for each server
107+
metricsServerErrChan := make(chan error)
108+
healthServerErrChan := make(chan error)
109+
110+
// start servers in their own goroutines
111+
go func() {
112+
err := metricsServer.ListenAndServe()
113+
if err != nil && err != http.ErrServerClosed {
114+
metricsServerErrChan <- err
115+
}
116+
}()
117+
118+
go func() {
119+
err := healthServer.ListenAndServe()
120+
if err != nil && err != http.ErrServerClosed {
121+
healthServerErrChan <- err
122+
}
123+
}()
124+
125+
// make a channel to listen for OS shutdown signal
126+
stop := make(chan os.Signal, 1)
127+
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
128+
129+
// use select to wait for either a shutdown signal or an error
130+
select {
131+
case <-stop:
132+
// received an OS shutdown signal, shut down servers gracefully
133+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
134+
defer cancel()
135+
136+
errM := metricsServer.Shutdown(ctx)
137+
if errM != nil {
138+
return fmt.Errorf("metrics server shutdown error: %v", errM)
139+
}
140+
errH := healthServer.Shutdown(ctx)
141+
if errH != nil {
142+
return fmt.Errorf("health server shutdown error: %v", errH)
143+
}
144+
case err := <-metricsServerErrChan:
145+
return fmt.Errorf("metrics server error: %v", err)
146+
case err := <-healthServerErrChan:
147+
return fmt.Errorf("health server error: %v", err)
148+
}
149+
150+
return nil
88151
}
89152

Diff for: deployment/mcad-controller/templates/deployment.yaml

+16-4
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,25 @@ spec:
1111
- name: http
1212
port: 80
1313
targetPort: 8080
14+
- name: metrics
15+
port: 8083
16+
targetPort: 8083
1417
selector:
1518
app: custom-metrics-apiserver
1619
---
20+
apiVersion: v1
21+
kind: Service
22+
metadata:
23+
name: metrics
24+
namespace: kube-system
25+
spec:
26+
ports:
27+
- name: metrics
28+
port: 8083
29+
targetPort: 8083
30+
selector:
31+
app: metrics
32+
---
1733
#{{ if .Values.configMap.quotaRestUrl }}
1834
apiVersion: v1
1935
kind: Service
@@ -373,10 +389,6 @@ spec:
373389
port: 8081
374390
periodSeconds: 5
375391
timeoutSeconds: 5
376-
metrics:
377-
httpGet:
378-
path: /metrics
379-
port: 8083
380392
#{{ if .Values.configMap.name }}
381393
envFrom:
382394
- configMapRef:

0 commit comments

Comments
 (0)