@@ -31,13 +31,21 @@ limitations under the License.
31
31
package app
32
32
33
33
import (
34
+ "context"
35
+ "fmt"
36
+ "net/http"
37
+ "os"
38
+ "os/signal"
39
+ "syscall"
40
+ "time"
41
+
34
42
"k8s.io/client-go/rest"
35
43
"k8s.io/client-go/tools/clientcmd"
36
- "net/http"
37
44
38
45
"github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options"
39
46
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob"
40
47
"github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/health"
48
+ "github.com/prometheus/client_golang/prometheus/promhttp"
41
49
42
50
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
43
51
)
@@ -67,7 +75,7 @@ func Run(opt *options.ServerOption) error {
67
75
jobctrl .Run (neverStop )
68
76
69
77
// This call is blocking (unless an error occurs) which equates to <-neverStop
70
- err = listenHealthProbe (opt )
78
+ err = startHealthAndMetricsServers (opt )
71
79
if err != nil {
72
80
return err
73
81
}
@@ -76,14 +84,69 @@ func Run(opt *options.ServerOption) error {
76
84
}
77
85
78
86
// Starts the health probe listener
79
- func listenHealthProbe (opt * options.ServerOption ) error {
80
- handler := http .NewServeMux ()
81
- handler .Handle ("/healthz" , & health.Handler {})
82
- err := http .ListenAndServe (opt .HealthProbeListenAddr , handler )
83
- if err != nil {
84
- return err
85
- }
86
-
87
- return nil
87
+ func startHealthAndMetricsServers (opt * options.ServerOption ) error {
88
+ // Start the metrics server
89
+ RecordMetrics ()
90
+
91
+ metricsHandler := http .NewServeMux ()
92
+ metricsHandler .Handle ("/metrics" , promhttp .Handler ())
93
+ healthHandler := http .NewServeMux ()
94
+ healthHandler .Handle ("/healthz" , & health.Handler {})
95
+
96
+ metricsServer := & http.Server {
97
+ Addr : opt .MetricsListenAddr ,
98
+ Handler : metricsHandler ,
99
+ }
100
+
101
+ healthServer := & http.Server {
102
+ Addr : opt .HealthProbeListenAddr ,
103
+ Handler : healthHandler ,
104
+ }
105
+
106
+ // make a channel for errors for each server
107
+ metricsServerErrChan := make (chan error )
108
+ healthServerErrChan := make (chan error )
109
+
110
+ // start servers in their own goroutines
111
+ go func () {
112
+ err := metricsServer .ListenAndServe ()
113
+ if err != nil && err != http .ErrServerClosed {
114
+ metricsServerErrChan <- err
115
+ }
116
+ }()
117
+
118
+ go func () {
119
+ err := healthServer .ListenAndServe ()
120
+ if err != nil && err != http .ErrServerClosed {
121
+ healthServerErrChan <- err
122
+ }
123
+ }()
124
+
125
+ // make a channel to listen for OS shutdown signal
126
+ stop := make (chan os.Signal , 1 )
127
+ signal .Notify (stop , os .Interrupt , syscall .SIGTERM )
128
+
129
+ // use select to wait for either a shutdown signal or an error
130
+ select {
131
+ case <- stop :
132
+ // received an OS shutdown signal, shut down servers gracefully
133
+ ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
134
+ defer cancel ()
135
+
136
+ errM := metricsServer .Shutdown (ctx )
137
+ if errM != nil {
138
+ return fmt .Errorf ("metrics server shutdown error: %v" , errM )
139
+ }
140
+ errH := healthServer .Shutdown (ctx )
141
+ if errH != nil {
142
+ return fmt .Errorf ("health server shutdown error: %v" , errH )
143
+ }
144
+ case err := <- metricsServerErrChan :
145
+ return fmt .Errorf ("metrics server error: %v" , err )
146
+ case err := <- healthServerErrChan :
147
+ return fmt .Errorf ("health server error: %v" , err )
148
+ }
149
+
150
+ return nil
88
151
}
89
152
0 commit comments