Skip to content

Commit af54dc3

Browse files
authored
Merge pull request #22 from Tommassino/prometheus
report the memory usage metrics as prometheus metrics
2 parents dd2d21c + d75cdca commit af54dc3

File tree

4 files changed

+166
-110
lines changed

4 files changed

+166
-110
lines changed

nbresuse/__init__.py

Lines changed: 11 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,17 @@
11
import os
2-
import json
3-
import psutil
2+
3+
from tornado import ioloop
44
from traitlets import Bool, Float, Int, Union, default
55
from traitlets.config import Configurable
6-
from notebook.utils import url_path_join
7-
from notebook.base.handlers import IPythonHandler
8-
from tornado import web
6+
7+
from nbresuse.prometheus import PrometheusHandler
98

109
try:
1110
# Traitlets >= 4.3.3
1211
from traitlets import Callable
1312
except ImportError:
1413
from .utils import Callable
1514

16-
from concurrent.futures import ThreadPoolExecutor
17-
from tornado.concurrent import run_on_executor
18-
19-
class MetricsHandler(IPythonHandler):
20-
def initialize(self):
21-
super().initialize()
22-
self.cpu_percent = 0
23-
24-
# https://www.tornadoweb.org/en/stable/concurrent.html#tornado.concurrent.run_on_executor
25-
self.executor = ThreadPoolExecutor(max_workers=10)
26-
27-
self.cpu_count = psutil.cpu_count()
28-
29-
@run_on_executor
30-
def update_cpu_percent(self, all_processes):
31-
32-
def get_cpu_percent(p):
33-
try:
34-
return p.cpu_percent(interval=0.05)
35-
# Avoid littering logs with stack traces complaining
36-
# about dead processes having no CPU usage
37-
except:
38-
return 0
39-
40-
return sum([get_cpu_percent(p) for p in all_processes])
41-
42-
@web.authenticated
43-
async def get(self):
44-
"""
45-
Calculate and return current resource usage metrics
46-
"""
47-
config = self.settings['nbresuse_display_config']
48-
cur_process = psutil.Process()
49-
all_processes = [cur_process] + cur_process.children(recursive=True)
50-
limits = {}
51-
52-
# Get memory information
53-
rss = sum([p.memory_info().rss for p in all_processes])
54-
55-
if callable(config.mem_limit):
56-
mem_limit = config.mem_limit(rss=rss)
57-
else: # mem_limit is an Int
58-
mem_limit = config.mem_limit
59-
60-
# A better approach would use cpu_affinity to account for the
61-
# fact that the number of logical CPUs in the system is not
62-
# necessarily the same as the number of CPUs the process
63-
# can actually use. But cpu_affinity isn't available for OS X.
64-
cpu_count = psutil.cpu_count()
65-
66-
if config.track_cpu_percent:
67-
self.cpu_percent = await self.update_cpu_percent(all_processes)
68-
69-
if config.mem_limit != 0:
70-
limits['memory'] = {
71-
'rss': mem_limit
72-
}
73-
if config.mem_warning_threshold != 0:
74-
limits['memory']['warn'] = (mem_limit - rss) < (mem_limit * config.mem_warning_threshold)
75-
76-
# Optionally get CPU information
77-
if config.track_cpu_percent:
78-
self.cpu_percent = await self.update_cpu_percent(all_processes)
79-
80-
if config.cpu_limit != 0:
81-
limits['cpu'] = {
82-
'cpu': config.cpu_limit
83-
}
84-
if config.cpu_warning_threshold != 0:
85-
limits['cpu']['warn'] = (config.cpu_limit - self.cpu_percent) < (config.cpu_limit * config.cpu_warning_threshold)
86-
87-
metrics = {
88-
'rss': rss,
89-
'limits': limits,
90-
}
91-
if config.track_cpu_percent:
92-
metrics.update(cpu_percent=self.cpu_percent,
93-
cpu_count=self.cpu_count)
94-
95-
self.log.debug("NBResuse metrics: %s", metrics)
96-
self.write(json.dumps(metrics))
97-
9815

9916
def _jupyter_server_extension_paths():
10017
"""
@@ -104,6 +21,7 @@ def _jupyter_server_extension_paths():
10421
'module': 'nbresuse',
10522
}]
10623

24+
10725
def _jupyter_nbextension_paths():
10826
"""
10927
Set up the notebook extension for displaying metrics
@@ -115,6 +33,7 @@ def _jupyter_nbextension_paths():
11533
"require": "nbresuse/main"
11634
}]
11735

36+
11837
class ResourceUseDisplay(Configurable):
11938
"""
12039
Holds server-side configuration for nbresuse
@@ -142,7 +61,7 @@ class ResourceUseDisplay(Configurable):
14261
Note that this does not actually limit the user's memory usage!
14362
14463
Defaults to reading from the `MEM_LIMIT` environment variable. If
145-
set to 0, no memory limit is displayed.
64+
set to 0, the max memory available is displayed.
14665
"""
14766
).tag(config=True)
14867

@@ -178,19 +97,20 @@ def _mem_limit_default(self):
17897
Note that this does not actually limit the user's CPU usage!
17998
18099
Defaults to reading from the `CPU_LIMIT` environment variable. If
181-
set to 0, no CPU usage limit is displayed.
100+
set to 0, the total CPU count available is displayed.
182101
"""
183102
).tag(config=True)
184103

185104
@default('cpu_limit')
186105
def _cpu_limit_default(self):
187106
return float(os.environ.get('CPU_LIMIT', 0))
188107

108+
189109
def load_jupyter_server_extension(nbapp):
190110
"""
191111
Called during notebook start
192112
"""
193113
resuseconfig = ResourceUseDisplay(parent=nbapp)
194114
nbapp.web_app.settings['nbresuse_display_config'] = resuseconfig
195-
route_pattern = url_path_join(nbapp.web_app.settings['base_url'], '/metrics')
196-
nbapp.web_app.add_handlers('.*', [(route_pattern, MetricsHandler)])
115+
callback = ioloop.PeriodicCallback(PrometheusHandler(nbapp), 1000)
116+
callback.start()

nbresuse/metrics.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from typing import NamedTuple
2+
3+
import psutil
4+
5+
6+
class MemoryMetrics(NamedTuple):
7+
current_memory: int
8+
max_memory: int
9+
10+
11+
class CPUMetrics(NamedTuple):
12+
cpu_max: float
13+
cpu_usage: float
14+
15+
16+
def memory_metrics() -> MemoryMetrics:
17+
cur_process = psutil.Process()
18+
all_processes = [cur_process] + cur_process.children(recursive=True)
19+
20+
rss = sum([p.memory_info().rss for p in all_processes])
21+
virtual_memory = psutil.virtual_memory()
22+
23+
return MemoryMetrics(
24+
rss,
25+
virtual_memory.total
26+
)
27+
28+
29+
def cpu_metrics() -> CPUMetrics:
30+
cur_process = psutil.Process()
31+
all_processes = [cur_process] + cur_process.children(recursive=True)
32+
33+
cpu_count = psutil.cpu_count()
34+
35+
def get_cpu_percent(p):
36+
try:
37+
return p.cpu_percent(interval=0.05)
38+
# Avoid littering logs with stack traces complaining
39+
# about dead processes having no CPU usage
40+
except:
41+
return 0
42+
cpu_percent = sum([get_cpu_percent(p) for p in all_processes])
43+
44+
return CPUMetrics(
45+
cpu_count * 100.0,
46+
cpu_percent
47+
)

nbresuse/prometheus.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from notebook.notebookapp import NotebookApp
2+
from prometheus_client import Gauge
3+
from tornado import gen
4+
5+
from nbresuse.metrics import CPUMetrics, MemoryMetrics, cpu_metrics, memory_metrics
6+
7+
try:
8+
# Traitlets >= 4.3.3
9+
from traitlets import Callable
10+
except ImportError:
11+
from .utils import Callable
12+
13+
TOTAL_MEMORY_USAGE = Gauge(
14+
'total_memory_usage',
15+
'counter for total memory usage',
16+
[]
17+
)
18+
19+
MAX_MEMORY_USAGE = Gauge(
20+
'max_memory_usage',
21+
'counter for max memory usage',
22+
[]
23+
)
24+
25+
TOTAL_CPU_USAGE = Gauge(
26+
'total_cpu_usage',
27+
'counter for total cpu usage',
28+
[]
29+
)
30+
31+
MAX_CPU_USAGE = Gauge(
32+
'max_cpu_usage',
33+
'counter for max cpu usage',
34+
[]
35+
)
36+
37+
38+
class PrometheusHandler(Callable):
39+
def __init__(self, nbapp: NotebookApp):
40+
super().__init__()
41+
self.config = nbapp.web_app.settings['nbresuse_display_config']
42+
self.session_manager = nbapp.session_manager
43+
44+
@gen.coroutine
45+
def __call__(self, *args, **kwargs):
46+
metrics = self.apply_memory_limits(memory_metrics())
47+
TOTAL_MEMORY_USAGE.set(metrics.current_memory)
48+
MAX_MEMORY_USAGE.set(metrics.max_memory)
49+
if self.config.track_cpu_percent:
50+
metrics = self.apply_cpu_limits(cpu_metrics())
51+
TOTAL_CPU_USAGE.set(metrics.cpu_usage)
52+
MAX_CPU_USAGE.set(metrics.cpu_max)
53+
54+
def apply_memory_limits(self, metrics: MemoryMetrics) -> MemoryMetrics:
55+
if callable(self.config.mem_limit):
56+
metrics.max_memory = self.config.mem_limit(rss=metrics.max_memory)
57+
elif self.config.mem_limit > 0: # mem_limit is an Int
58+
metrics.max_memory = self.config.mem_limit
59+
return metrics
60+
61+
def apply_cpu_limits(self, metrics: CPUMetrics) -> CPUMetrics:
62+
if self.config.cpu_limit > 0:
63+
metrics.cpu_max = self.config.cpu_limit
64+
return metrics

nbresuse/static/main.js

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
define(['jquery', 'base/js/utils'], function ($, utils) {
1+
define([
2+
'jquery',
3+
'base/js/utils'
4+
], function ($, utils) {
25
function setupDOM() {
36
$('#maintoolbar-container').append(
47
$('<div>').attr('id', 'nbresuse-display')
@@ -20,32 +23,54 @@ define(['jquery', 'base/js/utils'], function ($, utils) {
2023
);
2124
}
2225

26+
function humanFileSize(size) {
27+
var i = Math.floor( Math.log(size) / Math.log(1024) );
28+
return ( size / Math.pow(1024, i) ).toFixed(1) * 1 + ' ' + ['B', 'kB', 'MB', 'GB', 'TB'][i];
29+
}
30+
31+
32+
function metric(metric_name, text, multiple=false) {
33+
var regex = new RegExp("^" + metric_name + "\{?([^ \}]*)\}? (.*)$", "gm");
34+
var matches = [];
35+
var match;
36+
37+
do{
38+
match = regex.exec(text);
39+
if (match){
40+
matches.push(match)
41+
}
42+
}
43+
while (match);
44+
45+
if (!multiple) {
46+
if (matches.length > 0)
47+
return matches[0];
48+
return null;
49+
}else
50+
return matches;
51+
}
52+
2353
var displayMetrics = function() {
2454
if (document.hidden) {
2555
// Don't poll when nobody is looking
2656
return;
2757
}
28-
$.getJSON(utils.get_body_data('baseUrl') + 'metrics', function(data) {
29-
// FIXME: Proper setups for MB and GB. MB should have 0 things
30-
// after the ., but GB should have 2.
31-
var display = Math.round(data['rss'] / (1024 * 1024));
58+
$.ajax({
59+
url: utils.get_body_data('baseUrl') + 'metrics',
60+
success: function(data) {
61+
let totalMemoryUsage = metric("total_memory_usage", data);
62+
let maxMemoryUsage = metric("max_memory_usage", data);
3263

33-
var limits = data['limits'];
34-
if ('memory' in limits) {
35-
if ('rss' in limits['memory']) {
36-
display += " / " + Math.round(limits['memory']['rss'] / (1024 * 1024));
37-
}
38-
if (limits['memory']['warn']) {
39-
$('#nbresuse-display').addClass('nbresuse-warn');
40-
} else {
41-
$('#nbresuse-display').removeClass('nbresuse-warn');
42-
}
43-
}
44-
if (data['limits']['memory'] !== null) {
64+
if (!totalMemoryUsage || !maxMemoryUsage)
65+
return;
66+
totalMemoryUsage = humanFileSize(parseFloat(totalMemoryUsage[2]));
67+
maxMemoryUsage = humanFileSize(parseFloat(maxMemoryUsage[2]));
68+
69+
var display = totalMemoryUsage + "/" + maxMemoryUsage;
70+
$('#nbresuse-mem').text(display);
4571
}
46-
$('#nbresuse-mem').text(display + ' MB');
4772
});
48-
}
73+
};
4974

5075
var load_ipython_extension = function () {
5176
setupDOM();

0 commit comments

Comments
 (0)