Skip to content

Commit 4bcf9e1

Browse files
committed
refactor: update the mux server to expose both healthz and metrics, add graceful shutdown
1 parent 82a4e8c commit 4bcf9e1

File tree

3 files changed

+99
-14
lines changed

3 files changed

+99
-14
lines changed

Diff for: cmd/kar-controllers/app/server.go

+74-11
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,21 @@ limitations under the License.
3131
package app
3232

3333
import (
34+
"context"
35+
"fmt"
36+
"net/http"
37+
"os"
38+
"os/signal"
39+
"syscall"
40+
"time"
41+
3442
"k8s.io/client-go/rest"
3543
"k8s.io/client-go/tools/clientcmd"
36-
"net/http"
3744

3845
"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
3946
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob"
4047
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/health"
48+
"github.com/prometheus/client_golang/prometheus/promhttp"
4149

4250
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
4351
)
@@ -67,7 +75,7 @@ func Run(opt *options.ServerOption) error {
6775
jobctrl.Run(neverStop)
6876

6977
// This call is blocking (unless an error occurs) which equates to <-neverStop
70-
err = listenHealthProbe(opt)
78+
err = startHealthAndMetricsServers(opt)
7179
if err != nil {
7280
return err
7381
}
@@ -76,14 +84,69 @@ func Run(opt *options.ServerOption) error {
7684
}
7785

7886
// Starts the health probe listener
79-
func listenHealthProbe(opt *options.ServerOption) error {
80-
handler := http.NewServeMux()
81-
handler.Handle("/healthz", &health.Handler{})
82-
err := http.ListenAndServe(opt.HealthProbeListenAddr, handler)
83-
if err != nil {
84-
return err
85-
}
86-
87-
return nil
87+
func startHealthAndMetricsServers(opt *options.ServerOption) error {
88+
// Start the metrics server
89+
RecordMetrics()
90+
91+
metricsHandler := http.NewServeMux()
92+
metricsHandler.Handle("/metrics", promhttp.Handler())
93+
healthHandler := http.NewServeMux()
94+
healthHandler.Handle("/healthz", &health.Handler{})
95+
96+
metricsServer := &http.Server{
97+
Addr: opt.MetricsListenAddr,
98+
Handler: metricsHandler,
99+
}
100+
101+
healthServer := &http.Server{
102+
Addr: opt.HealthProbeListenAddr,
103+
Handler: healthHandler,
104+
}
105+
106+
// make a channel for errors for each server
107+
metricsServerErrChan := make(chan error)
108+
healthServerErrChan := make(chan error)
109+
110+
// start servers in their own goroutines
111+
go func() {
112+
err := metricsServer.ListenAndServe()
113+
if err != nil && err != http.ErrServerClosed {
114+
metricsServerErrChan <- err
115+
}
116+
}()
117+
118+
go func() {
119+
err := healthServer.ListenAndServe()
120+
if err != nil && err != http.ErrServerClosed {
121+
healthServerErrChan <- err
122+
}
123+
}()
124+
125+
// make a channel to listen for OS shutdown signal
126+
stop := make(chan os.Signal, 1)
127+
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
128+
129+
// use select to wait for either a shutdown signal or an error
130+
select {
131+
case <-stop:
132+
// received an OS shutdown signal, shut down servers gracefully
133+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
134+
defer cancel()
135+
136+
errM := metricsServer.Shutdown(ctx)
137+
if errM != nil {
138+
return fmt.Errorf("metrics server shutdown error: %v", errM)
139+
}
140+
errH := healthServer.Shutdown(ctx)
141+
if errH != nil {
142+
return fmt.Errorf("health server shutdown error: %v", errH)
143+
}
144+
case err := <-metricsServerErrChan:
145+
return fmt.Errorf("metrics server error: %v", err)
146+
case err := <-healthServerErrChan:
147+
return fmt.Errorf("health server error: %v", err)
148+
}
149+
150+
return nil
88151
}
89152

Diff for: deployment/mcad-controller/templates/deployment.yaml

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ spec:
1111
- name: http
1212
port: 80
1313
targetPort: 8080
14+
- name: metrics
15+
port: 8083
16+
targetPort: 8083
1417
selector:
1518
app: custom-metrics-apiserver
1619
---
@@ -353,7 +356,7 @@ spec:
353356
- containerPort: 8080
354357
name: http
355358
- containerPort: 8083
356-
name: metrics
359+
name: metrics-server
357360
volumeMounts:
358361
- mountPath: /tmp
359362
name: temp-vol
@@ -373,7 +376,7 @@ spec:
373376
port: 8081
374377
periodSeconds: 5
375378
timeoutSeconds: 5
376-
metrics:
379+
metrics-server:
377380
httpGet:
378381
path: /metrics
379382
port: 8083

Diff for: deployment/mcad-operator/helm-charts/mcad-controller/templates/deployment.yaml

+20-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,24 @@ spec:
1414
selector:
1515
app: custom-metrics-apiserver
1616
---
17+
apiVersion: v1
18+
kind: Service
19+
metadata:
20+
name: metrics-server
21+
spec:
22+
ports:
23+
- name: https
24+
port: 443
25+
targetPort: 6443
26+
- name: http
27+
port: 80
28+
targetPort: 8080
29+
- name: metrics-server
30+
port: 8083
31+
targetPort: 8083
32+
selector:
33+
app: metrics-server
34+
---
1735
#{{ if .Values.configMap.multiCluster }}
1836
apiVersion: apiregistration.k8s.io/v1beta1
1937
kind: APIService
@@ -8025,7 +8043,6 @@ spec:
80258043
#{{ if .Values.volumes.hostPath }}
80268044
- name: agent-config-vol
80278045
mountPath: /root/kubernetes
8028-
#{{ end }}
80298046
#{{ end }}
80308047
- name: {{ .Chart.Name }}
80318048
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
@@ -8038,6 +8055,8 @@ spec:
80388055
name: https
80398056
- containerPort: 8080
80408057
name: http
8058+
- containerPort: 8083
8059+
name: metrics-server
80418060
volumeMounts:
80428061
- mountPath: /tmp
80438062
name: temp-vol

0 commit comments

Comments
 (0)