+{"metrics": {"num_prompts_attempted": 59999, "num_prompts_succeeded": 59999, "request_rate": 200.0, "server_metrics": {}, "benchmark_time": 377.69680404663086, "throughput_rps": 158.85757929948576, "throughput": 35786.07723228514, "total_output_token": 13516287, "output_tokens_per_min": 2147164.6339371083, "total_input_tokens": 15092072, "input_tokens_per_min": 2397490.024533549, "total_tokens": 28608359, "tokens_per_min": 4544654.658470658, "avg_per_token_latency": 0.038136584066158385, "median_per_token_latency": 0.03260710797991071, "sd_per_token_latency": 0.039995399094383204, "min_per_token_latency": 0.00010268625128206123, "max_per_token_latency": 0.8718070238828659, "p90_per_token_latency": 0.07052694590421603, "p99_per_token_latency": 0.19175863699585777, "avg_latency": 13490.14784723948, "median_latency": 10904.660940170288, "sd_latency": 10759.461472867813, "min_latency": 53.10511589050293, "max_latency": 55610.99076271057, "p90_latency": 28706.796979904175, "p99_latency": 45658.41965198513, "avg_per_output_token_latency": 148.97623456610614, "median_per_output_token_latency": 60.334928053662296, "sd_per_output_token_latency": 232.28505133364948, "min_per_output_token_latency": 7.44791825612386, "max_per_output_token_latency": 3108.849883079529, "p90_per_output_token_latency": 393.8944477023501, "p99_per_output_token_latency": 1193.081065813697, "avg_input_len": 251.53872564542743, "median_input_len": 109.0, "sd_input_len": 281.6475735479433, "min_input_len": 4.0, "max_input_len": 1024.0, "p90_input_len": 714.0, "p99_input_len": 987.0, "avg_output_len": 225.27520458674311, "median_output_len": 144.0, "sd_output_len": 234.48900674005114, "min_output_len": 3.0, "max_output_len": 1025.0, "p90_output_len": 564.0, "p99_output_len": 948.0, "ClientConnectorError": 0, "TimeoutError": 0, "ContentTypeError": 1, "ClientOSError": 0, "ServerDisconnectedError": 0, "unknown_error": 0}, "dimensions": {"date": "20250328-043623", "backend": "vllm", "model_id": "meta-llama/Llama-2-7b-hf", "tokenizer_id": "meta-llama/Llama-2-7b-hf"}, "config": {"model": "meta-llama/Llama-2-7b-hf", "num_models": 1, "model_server": "vllm", "start_time": {"seconds": 1743136583, "nanos": 238149000}}, "summary_stats": {"stats": [{"request_rate": 200.0, "request_latency": {"mean": 13490.14784723948, "median": 10904.660940170288, "sd": 10759.461472867813, "min": 53.10511589050293, "max": 55610.99076271057, "p90": 28706.796979904175, "p99": 45658.41965198513}, "throughput": {"mean": 35786.07723228514}, "input_length": {"mean": 251.53872564542743, "median": 109.0, "sd": 281.6475735479433, "min": 4.0, "max": 1024.0, "p90": 714.0, "p99": 987.0}, "output_length": {"mean": 225.27520458674311, "median": 144.0, "sd": 234.48900674005114, "min": 3.0, "max": 1025.0, "p90": 564.0, "p99": 948.0}, "tpot": {"mean": 148.97623456610614, "median": 60.334928053662296, "sd": 232.28505133364948, "min": 7.44791825612386, "max": 3108.849883079529, "p90": 393.8944477023501, "p99": 1193.081065813697}, "model_server_metrics": []}]}}
0 commit comments