11import time
22from abc import ABC , abstractmethod
3- from typing import Dict , List
3+ from typing import List
44
55import numpy as np
66import prometheus_client
77
8+ from vllm .config import ModelConfig
89from vllm .logger import init_logger
910from vllm .v1 .metrics .stats import IterationStats , SchedulerStats
1011
@@ -78,13 +79,13 @@ def log(self, scheduler_stats: SchedulerStats,
7879
7980class PrometheusStatLogger (StatLoggerBase ):
8081
81- def __init__ (self , labels : Dict [ str , str ] ):
82- self .labels = labels
82+ def __init__ (self , model_config : ModelConfig ):
83+ self ._unregister_vllm_metrics ()
8384
84- labelnames = self . labels . keys ()
85- labelvalues = self . labels . values ()
85+ labelnames = [ "model_name" ]
86+ labelvalues = [ model_config . served_model_name ]
8687
87- self . _unregister_vllm_metrics ()
88+ max_model_len = model_config . max_model_len
8889
8990 self .gauge_scheduler_running = prometheus_client .Gauge (
9091 name = "vllm:num_requests_running" ,
@@ -106,6 +107,20 @@ def __init__(self, labels: Dict[str, str]):
106107 documentation = "Number of generation tokens processed." ,
107108 labelnames = labelnames ).labels (* labelvalues )
108109
110+ self .histogram_num_prompt_tokens_request = \
111+ prometheus_client .Histogram (
112+ name = "vllm:request_prompt_tokens" ,
113+ documentation = "Number of prefill tokens processed." ,
114+ buckets = build_1_2_5_buckets (max_model_len ),
115+ labelnames = labelnames ).labels (* labelvalues )
116+
117+ self .histogram_num_generation_tokens_request = \
118+ prometheus_client .Histogram (
119+ name = "vllm:request_generation_tokens" ,
120+ documentation = "Number of generation tokens processed." ,
121+ buckets = build_1_2_5_buckets (max_model_len ),
122+ labelnames = labelnames ).labels (* labelvalues )
123+
109124 def log (self , scheduler_stats : SchedulerStats ,
110125 iteration_stats : IterationStats ):
111126 """Log to prometheus."""
@@ -116,9 +131,42 @@ def log(self, scheduler_stats: SchedulerStats,
116131 self .counter_generation_tokens .inc (
117132 iteration_stats .num_generation_tokens )
118133
134+ for finished_request in iteration_stats .finished_requests :
135+ self .histogram_num_prompt_tokens_request .observe (
136+ finished_request .num_prompt_tokens )
137+ self .histogram_num_generation_tokens_request .observe (
138+ finished_request .num_generation_tokens )
139+
119140 @staticmethod
120141 def _unregister_vllm_metrics ():
121142 # Unregister any existing vLLM collectors (for CI/CD
122143 for collector in list (prometheus_client .REGISTRY ._collector_to_names ):
123144 if hasattr (collector , "_name" ) and "vllm" in collector ._name :
124145 prometheus_client .REGISTRY .unregister (collector )
146+
147+
148+ def build_buckets (mantissa_lst : List [int ], max_value : int ) -> List [int ]:
149+ """
150+ Builds a list of buckets with increasing powers of 10 multiplied by
151+ mantissa values until the value exceeds the specified maximum.
152+
153+ """
154+ exponent = 0
155+ buckets : List [int ] = []
156+ while True :
157+ for m in mantissa_lst :
158+ value = m * 10 ** exponent
159+ if value <= max_value :
160+ buckets .append (value )
161+ else :
162+ return buckets
163+ exponent += 1
164+
165+
166+ def build_1_2_5_buckets (max_value : int ) -> List [int ]:
167+ """
168+ Example:
169+ >>> build_1_2_5_buckets(100)
170+ [1, 2, 5, 10, 20, 50, 100]
171+ """
172+ return build_buckets ([1 , 2 , 5 ], max_value )
0 commit comments