@@ -9,37 +9,37 @@ import (
9
9
)
10
10
11
11
const (
12
- LLMServiceModelComponent = "llmservice_model "
12
+ InferenceModelComponent = "inference_model "
13
13
)
14
14
15
15
var (
16
16
requestCounter = compbasemetrics .NewCounterVec (
17
17
& compbasemetrics.CounterOpts {
18
- Subsystem : LLMServiceModelComponent ,
18
+ Subsystem : InferenceModelComponent ,
19
19
Name : "request_total" ,
20
- Help : "Counter of LLM service requests broken out for each model and target model." ,
20
+ Help : "Counter of inference model requests broken out for each model and target model." ,
21
21
StabilityLevel : compbasemetrics .ALPHA ,
22
22
},
23
- []string {"llmservice_name" , " model_name" , "target_model_name" },
23
+ []string {"model_name" , "target_model_name" },
24
24
)
25
25
26
26
requestLatencies = compbasemetrics .NewHistogramVec (
27
27
& compbasemetrics.HistogramOpts {
28
- Subsystem : LLMServiceModelComponent ,
28
+ Subsystem : InferenceModelComponent ,
29
29
Name : "request_duration_seconds" ,
30
- Help : "LLM service response latency distribution in seconds for each model and target model." ,
30
+ Help : "Inference model response latency distribution in seconds for each model and target model." ,
31
31
Buckets : []float64 {0.005 , 0.025 , 0.05 , 0.1 , 0.2 , 0.4 , 0.6 , 0.8 , 1.0 , 1.25 , 1.5 , 2 , 3 ,
32
32
4 , 5 , 6 , 8 , 10 , 15 , 20 , 30 , 45 , 60 , 120 , 180 , 240 , 300 , 360 , 480 , 600 , 900 , 1200 , 1800 , 2700 , 3600 },
33
33
StabilityLevel : compbasemetrics .ALPHA ,
34
34
},
35
- []string {"llmservice_name" , " model_name" , "target_model_name" },
35
+ []string {"model_name" , "target_model_name" },
36
36
)
37
37
38
38
requestSizes = compbasemetrics .NewHistogramVec (
39
39
& compbasemetrics.HistogramOpts {
40
- Subsystem : LLMServiceModelComponent ,
40
+ Subsystem : InferenceModelComponent ,
41
41
Name : "request_sizes" ,
42
- Help : "LLM service requests size distribution in bytes for each model and target model." ,
42
+ Help : "Inference model requests size distribution in bytes for each model and target model." ,
43
43
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
44
44
Buckets : []float64 {
45
45
64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 , 32768 , 65536 , // More fine-grained up to 64KB
48
48
},
49
49
StabilityLevel : compbasemetrics .ALPHA ,
50
50
},
51
- []string {"llmservice_name" , " model_name" , "target_model_name" },
51
+ []string {"model_name" , "target_model_name" },
52
52
)
53
53
)
54
54
@@ -64,9 +64,9 @@ func Register() {
64
64
}
65
65
66
66
// MonitorRequest handles monitoring requests.
67
- func MonitorRequest (llmserviceName , modelName , targetModelName string , reqSize int , elapsed time.Duration ) {
67
+ func MonitorRequest (modelName , targetModelName string , reqSize int , elapsed time.Duration ) {
68
68
elapsedSeconds := elapsed .Seconds ()
69
- requestCounter .WithLabelValues (llmserviceName , modelName , targetModelName ).Inc ()
70
- requestLatencies .WithLabelValues (llmserviceName , modelName , targetModelName ).Observe (elapsedSeconds )
71
- requestSizes .WithLabelValues (llmserviceName , modelName , targetModelName ).Observe (float64 (reqSize ))
69
+ requestCounter .WithLabelValues (modelName , targetModelName ).Inc ()
70
+ requestLatencies .WithLabelValues (modelName , targetModelName ).Observe (elapsedSeconds )
71
+ requestSizes .WithLabelValues (modelName , targetModelName ).Observe (float64 (reqSize ))
72
72
}
0 commit comments