You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
34
42
)
43
+
parser.add_argument(
44
+
"--router-mode",
45
+
type=str,
46
+
choices=["round-robin", "random", "kv"],
47
+
default="round-robin",
48
+
help="How to route the request",
49
+
)
50
+
parser.add_argument(
51
+
"--kv-overlap-score-weight",
52
+
type=float,
53
+
default=1.0,
54
+
help="KV Router: Weight for overlap score in worker selection. Higher values prioritize KV cache reuse.",
55
+
)
56
+
parser.add_argument(
57
+
"--router-temperature",
58
+
type=float,
59
+
default=0.0,
60
+
help="KV Router: Temperature for worker sampling via softmax. Higher values promote more randomness, and 0 fallbacks to deterministic.",
61
+
)
62
+
parser.add_argument(
63
+
"--kv-events",
64
+
action="store_true",
65
+
dest="use_kv_events",
66
+
help=" KV Router: Whether to use KV events to maintain the view of cached blocks. If false, would use ApproxKvRouter for predicting block creation / deletion based only on incoming requests at a timer.",
0 commit comments