diff --git a/tools/dashboards/inference_gateway.json b/tools/dashboards/inference_gateway.json index 4e872739..cf00420d 100644 --- a/tools/dashboards/inference_gateway.json +++ b/tools/dashboards/inference_gateway.json @@ -28,7 +28,7 @@ }, "gridPos": { "h": 3, - "w": 23, + "w": 20, "x": 0, "y": 0 }, @@ -42,7 +42,7 @@ "content": "# Inferece Gateway Dashboard\n\nPlease see https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/pkg/epp/metrics for more details of underlying metrics used in the dashboard.", "mode": "markdown" }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "title": "", "type": "text" }, @@ -54,15 +54,15 @@ "x": 0, "y": 3 }, - "id": 3, + "id": 15, "panels": [], - "title": "Inference Model", + "title": "Inference Pool", "type": "row" }, { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "deap2an4eadc0d" }, "fieldConfig": { "defaults": { @@ -125,7 +125,7 @@ "x": 0, "y": 4 }, - "id": 1, + "id": 16, "options": { "legend": { "calcs": [], @@ -139,33 +139,27 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, "disableTextWrap": false, "editorMode": "builder", - "exemplar": false, - "expr": "sum by(model_name, target_model_name) (rate(inference_model_request_total{}[$__rate_interval]))", + "expr": "sum by(name) (inference_pool_average_kv_cache_utilization)", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "", "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], - "title": "Request / s", + "title": "Average KV Cache Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "deap2an4eadc0d" }, "fieldConfig": { "defaults": { @@ -228,7 +222,7 @@ "x": 10, "y": 4 }, - "id": 2, + "id": 17, "options": { "legend": { "calcs": [], @@ -242,55 +236,36 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", + "expr": "sum by(name) (inference_pool_average_queue_size)", "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "95%", + "includeNullMetadata": true, + "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "90%", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "50%", - "range": true, - "refId": "C", - "useBackend": false } ], - "title": "E2E Request Latency", + "title": "Average Queue Size", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 3, + "panels": [], + "title": "Inference Model", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -353,11 +328,11 @@ }, "gridPos": { "h": 8, - "w": 10, + "w": 20, "x": 0, - "y": 12 + "y": 13 }, - "id": 6, + "id": 2, "options": { "legend": { "calcs": [], @@ -371,12 +346,12 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "95%", @@ -391,7 +366,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -407,7 +382,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_request_duration_seconds_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -417,7 +392,7 @@ "useBackend": false } ], - "title": "Request Size", + "title": "E2E Request Latency", "type": "timeseries" }, { @@ -483,10 +458,10 @@ "gridPos": { "h": 8, "w": 10, - "x": 10, - "y": 12 + "x": 0, + "y": 21 }, - "id": 7, + "id": 1, "options": { "legend": { "calcs": [], @@ -500,35 +475,8 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "95%", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "90%", - "range": true, - "refId": "B", - "useBackend": false - }, { "datasource": { "type": "prometheus", @@ -536,17 +484,18 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", + "exemplar": false, + "expr": "sum by(model_name, target_model_name) (rate(inference_model_request_total{}[$__rate_interval]))", "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "50%", + "includeNullMetadata": true, + "interval": "", + "legendFormat": "__auto", "range": true, - "refId": "C", + "refId": "A", "useBackend": false } ], - "title": "Response Size", + "title": "Request / s", "type": "timeseries" }, { @@ -612,10 +561,10 @@ "gridPos": { "h": 8, "w": 10, - "x": 0, - "y": 20 + "x": 10, + "y": 21 }, - "id": 8, + "id": 18, "options": { "legend": { "calcs": [], @@ -629,19 +578,8 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ - { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "legendFormat": "95%", - "range": true, - "refId": "A", - "useBackend": false - }, { "datasource": { "type": "prometheus", @@ -649,33 +587,18 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "90%", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", + "exemplar": false, + "expr": "sum by(error_code, model_name, target_model_name) (rate(inference_model_request_error_total[$__rate_interval]))", "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "legendFormat": "50%", + "includeNullMetadata": true, + "interval": "", + "legendFormat": "__auto", "range": true, - "refId": "C", + "refId": "A", "useBackend": false } ], - "title": "Input Token Count", + "title": "Request Error / s", "type": "timeseries" }, { @@ -741,10 +664,10 @@ "gridPos": { "h": 8, "w": 10, - "x": 10, - "y": 20 + "x": 0, + "y": 29 }, - "id": 9, + "id": 6, "options": { "legend": { "calcs": [], @@ -758,12 +681,12 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "95%", @@ -778,7 +701,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -794,7 +717,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_request_sizes_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -804,22 +727,9 @@ "useBackend": false } ], - "title": "Output Token Count", + "title": "Request Size", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 10, - "panels": [], - "title": "vLLM", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -881,12 +791,12 @@ "overrides": [] }, "gridPos": { - "h": 7, + "h": 8, "w": 10, - "x": 0, + "x": 10, "y": 29 }, - "id": 14, + "id": 7, "options": { "legend": { "calcs": [], @@ -900,15 +810,15 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(model_name) (rate(vllm:prompt_tokens_total[$__rate_interval]))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "Prompt Tokens/Sec", + "includeNullMetadata": false, + "legendFormat": "95%", "range": true, "refId": "A", "useBackend": false @@ -920,17 +830,33 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum by(model_name) (rate(vllm:generation_tokens_total[$__rate_interval]))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, - "includeNullMetadata": true, - "legendFormat": "Generation Tokens/Sec", + "includeNullMetadata": false, + "legendFormat": "90%", "range": true, "refId": "B", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_response_sizes_bucket{}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "legendFormat": "50%", + "range": true, + "refId": "C", + "useBackend": false } ], - "title": "Token Throughput", + "title": "Response Size", "type": "timeseries" }, { @@ -994,12 +920,12 @@ "overrides": [] }, "gridPos": { - "h": 7, + "h": 8, "w": 10, - "x": 10, - "y": 29 + "x": 0, + "y": 37 }, - "id": 11, + "id": 8, "options": { "legend": { "calcs": [], @@ -1013,14 +939,14 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "95%", "range": true, "refId": "A", @@ -1033,10 +959,10 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "90%", "range": true, "refId": "B", @@ -1049,17 +975,17 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_input_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "50%", "range": true, "refId": "C", "useBackend": false } ], - "title": "E2E Request Latency", + "title": "Input Token Count", "type": "timeseries" }, { @@ -1123,12 +1049,12 @@ "overrides": [] }, "gridPos": { - "h": 7, + "h": 8, "w": 10, - "x": 0, - "y": 36 + "x": 10, + "y": 37 }, - "id": 13, + "id": 9, "options": { "legend": { "calcs": [], @@ -1142,14 +1068,14 @@ "sort": "none" } }, - "pluginVersion": "11.5.0", + "pluginVersion": "11.5.2", "targets": [ { "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "95%", "range": true, "refId": "A", @@ -1162,10 +1088,10 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "90%", "range": true, "refId": "B", @@ -1178,147 +1104,532 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(inference_model_output_tokens_bucket{}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, - "includeNullMetadata": true, + "includeNullMetadata": false, "legendFormat": "50%", "range": true, "refId": "C", "useBackend": false } ], - "title": "Time Per Output Token Latency", + "title": "Output Token Count", "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 10, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 52 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "11.5.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(model_name) (rate(vllm:prompt_tokens_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "Prompt Tokens/Sec", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 10, - "x": 10, - "y": 36 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(model_name) (rate(vllm:generation_tokens_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "Generation Tokens/Sec", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Token Throughput", + "type": "timeseries" }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.5.0", - "targets": [ { - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "legendFormat": "95%", - "range": true, - "refId": "A", - "useBackend": false + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 52 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.5.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "95%", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "90%", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "50%", + "range": true, + "refId": "C", + "useBackend": false + } + ], + "title": "E2E Request Latency", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "legendFormat": "90%", - "range": true, - "refId": "B", - "useBackend": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 0, + "y": 59 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.5.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "95%", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "90%", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "50%", + "range": true, + "refId": "C", + "useBackend": false + } + ], + "title": "Time Per Output Token Latency", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "legendFormat": "50%", - "range": true, - "refId": "C", - "useBackend": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 10, + "y": 59 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.5.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "legendFormat": "95%", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "90%", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "50%", + "range": true, + "refId": "C", + "useBackend": false + } + ], + "title": "Time To First Token Latency", + "type": "timeseries" } ], - "title": "Time To First Token Latency", - "type": "timeseries" + "title": "vLLM", + "type": "row" } ], "preload": false, @@ -1350,6 +1661,6 @@ "timezone": "browser", "title": "Inference Gateway", "uid": "aeap3g4ujefb4b", - "version": 16, + "version": 20, "weekStart": "" }