File tree Expand file tree Collapse file tree 1 file changed +3
-9
lines changed Expand file tree Collapse file tree 1 file changed +3
-9
lines changed Original file line number Diff line number Diff line change 1- from time import time
2-
31from vllm import LLM , SamplingParams
42
3+ # NOTE: This is just a running example. For benchmarking purpose,
4+ # please see benchmarks/benchmark_prefix_caching.py
5+
56# Common prefix.
67prefix = (
78 "You are an expert school principal, skilled in effectively managing "
3738
3839# Generate texts from the prompts. The output is a list of RequestOutput objects
3940# that contain the prompt, generated text, and other information.
40- start_time_regular = time ()
4141outputs = regular_llm .generate (generating_prompts , sampling_params )
42- duration_regular = time () - start_time_regular
4342
4443regular_generated_texts = []
4544# Print the outputs.
5554prefix_cached_llm .generate (generating_prompts [0 ], sampling_params )
5655
5756# Generate with prefix caching.
58- start_time_cached = time ()
5957outputs = prefix_cached_llm .generate (generating_prompts , sampling_params )
60- duration_cached = time () - start_time_cached
6158
6259print ("Results with `enable_prefix_caching`" )
6360
7774 for i in range (len (prompts ))
7875])
7976print (f"Generated answers are the same: { generated_same } " )
80-
81- speedup = round (duration_regular / duration_cached , 2 )
82- print (f"Speed up of cached generation compared to the regular is: { speedup } " )
You can’t perform that action at this time.
0 commit comments