Skip to content

Add prometheus worker process metrics #1076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/nginx-ingress/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ func main() {
registry = prometheus.NewRegistry()
managerCollector = collectors.NewLocalManagerMetricsCollector(constLabels)
controllerCollector = collectors.NewControllerMetricsCollector(*enableCustomResources, constLabels)
processCollector := collectors.NewNginxProcessesMetricsCollector(constLabels)

err = managerCollector.Register(registry)
if err != nil {
Expand All @@ -317,6 +318,11 @@ func main() {
if err != nil {
glog.Errorf("Error registering Controller Prometheus metrics: %v", err)
}

err = processCollector.Register(registry)
if err != nil {
glog.Errorf("Error registering NginxProcess Prometheus metrics: %v", err)
}
}

useFakeNginxManager := *proxyURL != ""
Expand Down
1 change: 1 addition & 0 deletions docs-web/logging-and-monitoring/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The Ingress Controller exports the following metrics:
* `controller_nginx_reload_errors_total`. Number of unsuccessful NGINX reloads.
* `controller_nginx_last_reload_status`. Status of the last NGINX reload, 0 meaning down and 1 up.
* `controller_nginx_last_reload_milliseconds`. Duration in milliseconds of the last NGINX reload.
* `controller_nginx_worker_processes_total`. Number of NGINX worker processes. This metric includes the constant label `generation` with two possible values `old` (the shutting down processes of the old generations) or `current` (the processes of the current generation).
* `controller_ingress_resources_total`. Number of handled Ingress resources. This metric includes the label type, that groups the Ingress resources by their type (regular, [minion or master](/nginx-ingress-controller/configuration/ingress-resources/cross-namespace-configuration)). **Note**: The metric doesn't count minions without a master.
* `controller_virtualserver_resources_total`. Number of handled VirtualServer resources.
* `controller_virtualserverroute_resources_total`. Number of handled VirtualServerRoute resources. **Note**: The metric counts only VirtualServerRoutes that have a reference from a VirtualServer.
Expand Down
91 changes: 91 additions & 0 deletions internal/metrics/collectors/processes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package collectors

import (
"bytes"
"fmt"
"io/ioutil"
"strconv"

"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)

// NginxProcessesMetricsCollector implements NginxPorcessesCollector interface and prometheus.Collector interface
type NginxProcessesMetricsCollector struct {
// Metrics
workerProcessTotal *prometheus.GaugeVec
}

// NewNginxProcessesMetricsCollector creates a new NginxProcessMetricsCollector
func NewNginxProcessesMetricsCollector(constLabels map[string]string) *NginxProcessesMetricsCollector {
pc := &NginxProcessesMetricsCollector{
workerProcessTotal: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "nginx_worker_processes_total",
Namespace: metricsNamespace,
Help: "Number of NGINX worker processes",
ConstLabels: constLabels,
},
[]string{"generation"},
),
}
return pc
}

// updateWorkerProcessCount sets the number of NGINX worker processes
func (pc *NginxProcessesMetricsCollector) updateWorkerProcessCount() {
currWorkerProcesses, prevWorkerPrcesses, err := getWorkerProcesses()
if err != nil {
glog.Errorf("unable to collect process metrics : %v", err)
return
}
pc.workerProcessTotal.WithLabelValues("current").Set(float64(currWorkerProcesses))
pc.workerProcessTotal.WithLabelValues("old").Set(float64(prevWorkerPrcesses))
}

func getWorkerProcesses() (int, int, error) {
var workerProcesses int
var prevWorkerProcesses int

procFolders, err := ioutil.ReadDir("/proc")
if err != nil {
return 0, 0, fmt.Errorf("unable to read directory /proc : %v", err)
}

for _, folder := range procFolders {
_, err := strconv.Atoi(folder.Name())
if err != nil {
continue
}

cmdlineFile := fmt.Sprintf("/proc/%v/cmdline", folder.Name())
content, err := ioutil.ReadFile(cmdlineFile)
if err != nil {
return 0, 0, fmt.Errorf("unable to read file %v: %v", cmdlineFile, err)
}

text := string(bytes.TrimRight(content, "\x00"))
if text == "nginx: worker process" {
workerProcesses++
} else if text == "nginx: worker process is shutting down" {
prevWorkerProcesses++
}
}
return workerProcesses, prevWorkerProcesses, nil
}

// Collect implements the prometheus.Collector interface Collect method
func (pc *NginxProcessesMetricsCollector) Collect(ch chan<- prometheus.Metric) {
pc.updateWorkerProcessCount()
pc.workerProcessTotal.Collect(ch)
}

// Describe implements prometheus.Collector interface Describe method
func (pc *NginxProcessesMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
pc.workerProcessTotal.Describe(ch)
}

// Register registers all the metrics of the collector
func (pc *NginxProcessesMetricsCollector) Register(registry *prometheus.Registry) error {
return registry.Register(pc)
}
5 changes: 2 additions & 3 deletions internal/nginx/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ const appPluginParams = "tmm_count 4 proc_cpuinfo_cpu_mhz 2000000 total_xml_memo
const appProtectPluginStartCmd = "/usr/share/ts/bin/bd-socket-plugin"
const appProtectAgentStartCmd = "/opt/app_protect/bin/bd_agent"


// ServerConfig holds the config data for an upstream server in NGINX Plus.
type ServerConfig struct {
MaxFails int
Expand Down Expand Up @@ -233,7 +232,7 @@ func (lm *LocalManager) CreateDHParam(content string) (string, error) {
}

// CreateAppProtectResourceFile writes contents of An App Protect resource to a file
func (lm *LocalManager) CreateAppProtectResourceFile(name string, content []byte){
func (lm *LocalManager) CreateAppProtectResourceFile(name string, content []byte) {
glog.V(3).Infof("Writing App Protect Resource to %v", name)
err := createFileAndWrite(name, content)
if err != nil {
Expand Down Expand Up @@ -433,7 +432,7 @@ func (lm *LocalManager) AppProtectAgentStart(apaDone chan error, debug bool) {
err = createFileAndWrite(appProtectLogConfigFileName, []byte(appProtectDebugLogConfigFileContent))
if err != nil {
glog.Fatalf("Failed Writing App Protect Log configuration file")
}
}
}
glog.V(3).Info("Starting AppProtect Agent")

Expand Down