Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 6a8a97b

Browse files
ronenscrobertgshaw2-redhatRobert Shaw
committed
Add more Prometheus metrics (vllm-project#2764)
Co-authored-by: Robert Shaw <[email protected]> Co-authored-by: Robert Shaw <[email protected]>
1 parent b24aae6 commit 6a8a97b

File tree

6 files changed

+582
-114
lines changed

6 files changed

+582
-114
lines changed

examples/production_monitoring/grafana.json

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,289 @@
873873
],
874874
"title": "Cache Utilization",
875875
"type": "timeseries"
876+
},
877+
{
878+
"type": "heatmap",
879+
"title": "Request Prompt Length",
880+
"description": "Heatmap of request prompt length",
881+
"gridPos": {
882+
"x": 0,
883+
"y": 24,
884+
"w": 12,
885+
"h": 8
886+
},
887+
"datasource": {
888+
"uid": "prometheus",
889+
"type": "prometheus"
890+
},
891+
"id": 12,
892+
"targets": [
893+
{
894+
"datasource": {
895+
"type": "prometheus",
896+
"uid": "prometheus"
897+
},
898+
"refId": "A",
899+
"expr": "sum by(le) (increase(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
900+
"range": true,
901+
"instant": false,
902+
"editorMode": "builder",
903+
"legendFormat": "{{le}}",
904+
"useBackend": false,
905+
"disableTextWrap": false,
906+
"fullMetaSearch": false,
907+
"includeNullMetadata": true,
908+
"format": "heatmap"
909+
}
910+
],
911+
"options": {
912+
"calculate": false,
913+
"yAxis": {
914+
"axisPlacement": "left",
915+
"reverse": false,
916+
"unit": "none",
917+
"axisLabel": "Prompt Length"
918+
},
919+
"rowsFrame": {
920+
"layout": "auto",
921+
"value": "Request count"
922+
},
923+
"color": {
924+
"mode": "scheme",
925+
"fill": "dark-orange",
926+
"scale": "exponential",
927+
"exponent": 0.5,
928+
"scheme": "Spectral",
929+
"steps": 64,
930+
"reverse": false,
931+
"min": 0
932+
},
933+
"cellGap": 1,
934+
"filterValues": {
935+
"le": 1e-9
936+
},
937+
"tooltip": {
938+
"show": true,
939+
"yHistogram": true
940+
},
941+
"legend": {
942+
"show": true
943+
},
944+
"exemplars": {
945+
"color": "rgba(255,0,255,0.7)"
946+
},
947+
"cellValues": {
948+
"unit": "none"
949+
}
950+
},
951+
"fieldConfig": {
952+
"defaults": {
953+
"custom": {
954+
"scaleDistribution": {
955+
"type": "linear"
956+
},
957+
"hideFrom": {
958+
"tooltip": false,
959+
"viz": false,
960+
"legend": false
961+
}
962+
}
963+
},
964+
"overrides": []
965+
},
966+
"pluginVersion": "10.2.0"
967+
},
968+
{
969+
"datasource": {
970+
"uid": "prometheus",
971+
"type": "prometheus"
972+
},
973+
"type": "heatmap",
974+
"title": "Request Generation Length",
975+
"description": "Heatmap of request generation length",
976+
"gridPos": {
977+
"x": 12,
978+
"y": 24,
979+
"w": 12,
980+
"h": 8
981+
},
982+
"id": 13,
983+
"targets": [
984+
{
985+
"datasource": {
986+
"type": "prometheus",
987+
"uid": "prometheus"
988+
},
989+
"refId": "A",
990+
"expr": "sum by(le) (increase(vllm:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))",
991+
"range": true,
992+
"instant": false,
993+
"editorMode": "builder",
994+
"legendFormat": "{{le}}",
995+
"useBackend": false,
996+
"disableTextWrap": false,
997+
"fullMetaSearch": false,
998+
"includeNullMetadata": true,
999+
"format": "heatmap"
1000+
}
1001+
],
1002+
"options": {
1003+
"calculate": false,
1004+
"yAxis": {
1005+
"axisPlacement": "left",
1006+
"reverse": false,
1007+
"unit": "none",
1008+
"axisLabel": "Generation Length"
1009+
},
1010+
"rowsFrame": {
1011+
"layout": "auto",
1012+
"value": "Request count"
1013+
},
1014+
"color": {
1015+
"mode": "scheme",
1016+
"fill": "dark-orange",
1017+
"scale": "exponential",
1018+
"exponent": 0.5,
1019+
"scheme": "Spectral",
1020+
"steps": 64,
1021+
"reverse": false,
1022+
"min": 0
1023+
},
1024+
"cellGap": 1,
1025+
"filterValues": {
1026+
"le": 1e-9
1027+
},
1028+
"tooltip": {
1029+
"show": true,
1030+
"yHistogram": true
1031+
},
1032+
"legend": {
1033+
"show": true
1034+
},
1035+
"exemplars": {
1036+
"color": "rgba(255,0,255,0.7)"
1037+
},
1038+
"cellValues": {
1039+
"unit": "none"
1040+
}
1041+
},
1042+
"fieldConfig": {
1043+
"defaults": {
1044+
"custom": {
1045+
"scaleDistribution": {
1046+
"type": "linear"
1047+
},
1048+
"hideFrom": {
1049+
"tooltip": false,
1050+
"viz": false,
1051+
"legend": false
1052+
}
1053+
}
1054+
},
1055+
"overrides": []
1056+
},
1057+
"pluginVersion": "10.2.0"
1058+
},
1059+
{
1060+
"datasource": {
1061+
"type": "prometheus",
1062+
"uid": "prometheus"
1063+
},
1064+
"fieldConfig": {
1065+
"defaults": {
1066+
"custom": {
1067+
"drawStyle": "line",
1068+
"lineInterpolation": "linear",
1069+
"barAlignment": 0,
1070+
"lineWidth": 1,
1071+
"fillOpacity": 0,
1072+
"gradientMode": "none",
1073+
"spanNulls": false,
1074+
"insertNulls": false,
1075+
"showPoints": "auto",
1076+
"pointSize": 5,
1077+
"stacking": {
1078+
"mode": "none",
1079+
"group": "A"
1080+
},
1081+
"axisPlacement": "auto",
1082+
"axisLabel": "",
1083+
"axisColorMode": "text",
1084+
"axisBorderShow": false,
1085+
"scaleDistribution": {
1086+
"type": "linear"
1087+
},
1088+
"axisCenteredZero": false,
1089+
"hideFrom": {
1090+
"tooltip": false,
1091+
"viz": false,
1092+
"legend": false
1093+
},
1094+
"thresholdsStyle": {
1095+
"mode": "off"
1096+
}
1097+
},
1098+
"color": {
1099+
"mode": "palette-classic"
1100+
},
1101+
"mappings": [],
1102+
"thresholds": {
1103+
"mode": "absolute",
1104+
"steps": [
1105+
{
1106+
"color": "green",
1107+
"value": null
1108+
},
1109+
{
1110+
"color": "red",
1111+
"value": 80
1112+
}
1113+
]
1114+
}
1115+
},
1116+
"overrides": []
1117+
},
1118+
"gridPos": {
1119+
"h": 8,
1120+
"w": 12,
1121+
"x": 0,
1122+
"y": 32
1123+
},
1124+
"id": 11,
1125+
"options": {
1126+
"tooltip": {
1127+
"mode": "single",
1128+
"sort": "none"
1129+
},
1130+
"legend": {
1131+
"showLegend": true,
1132+
"displayMode": "list",
1133+
"placement": "bottom",
1134+
"calcs": []
1135+
}
1136+
},
1137+
"targets": [
1138+
{
1139+
"datasource": {
1140+
"type": "prometheus",
1141+
"uid": "prometheus"
1142+
},
1143+
"disableTextWrap": false,
1144+
"editorMode": "builder",
1145+
"expr": "sum by(finished_reason) (increase(vllm:request_success_total{model_name=\"$model_name\"}[$__rate_interval]))",
1146+
"fullMetaSearch": false,
1147+
"includeNullMetadata": true,
1148+
"instant": false,
1149+
"interval": "",
1150+
"legendFormat": "__auto",
1151+
"range": true,
1152+
"refId": "A",
1153+
"useBackend": false
1154+
}
1155+
],
1156+
"title": "Finish Reason",
1157+
"description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.",
1158+
"type": "timeseries"
8761159
}
8771160
],
8781161
"refresh": "",

requirements-common.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ openai
1212
uvicorn[standard]
1313
pydantic >= 2.0 # Required for OpenAI server.
1414
prometheus_client >= 0.18.0
15+
prometheus-fastapi-instrumentator >= 7.0.0
1516
tiktoken == 0.6.0 # Required for DBRX tokenizer
1617
lm-format-enforcer == 0.9.8
1718
outlines == 0.0.34 # Requires torch >= 2.1.0

vllm/core/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def abort_seq_group(self, request_id: Union[str, Iterable[str]]) -> None:
320320
for seq_group in state_queue:
321321
if not request_ids:
322322
# Using 'break' here may add two extra iterations,
323-
# but is acceptable to reduce complexity .
323+
# but is acceptable to reduce complexity.
324324
break
325325
if seq_group.request_id in request_ids:
326326
# Appending aborted group into pending list.

0 commit comments

Comments
 (0)