Skip to content

Commit c285452

Browse files
committed
[Bugfix] Fix harmony library format mismatch for streaming tool calls
This commit addresses an issue where the harmony library passes data to the model in a different format than what the model outputs, causing the model to become confused and stop responding when using /v1/chat/completions with stream and tools. The fix updates the message format to match the model's expected output: - Move recipient info from assistant start tag to channel tag - Change content type from 'json' to '<|constrain|>json' - Replace <|end|> token with <|call|> token for tool calls This is a temporary fix until the underlying format mismatch is properly resolved. Signed-off-by: kyt <[email protected]>
1 parent 7920de0 commit c285452

File tree

2 files changed

+43
-11
lines changed

2 files changed

+43
-11
lines changed

vllm/entrypoints/harmony_utils.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
from openai.types.responses.response_reasoning_item import (
1717
Content as ResponseReasoningTextContent)
1818
from openai.types.responses.tool import Tool
19-
from openai_harmony import (Author, ChannelConfig, Conversation,
20-
DeveloperContent, HarmonyEncodingName, Message,
21-
ReasoningEffort, Role, StreamableParser,
19+
from openai_harmony import (Author, ChannelConfig, DeveloperContent,
20+
HarmonyEncodingName, Message, ReasoningEffort,
21+
RenderOptions, Role, StreamableParser,
2222
SystemContent, TextContent, ToolDescription,
2323
load_harmony_encoding)
2424

@@ -213,14 +213,18 @@ def parse_chat_input(chat_msg) -> list[Message]:
213213
tool_calls = chat_msg.get("tool_calls")
214214
if role == "assistant" and tool_calls:
215215
msgs: list[Message] = []
216+
content = chat_msg.get("content") or ""
217+
analysis_msg = Message.from_role_and_content(Role.ASSISTANT, content)
218+
analysis_msg = analysis_msg.with_channel("analysis")
219+
msgs.append(analysis_msg)
220+
216221
for call in tool_calls:
217222
func = call.get("function", {})
218223
name = func.get("name", "")
219224
arguments = func.get("arguments", "") or ""
220225
msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
221-
msg = msg.with_channel("commentary")
222-
msg = msg.with_recipient(f"functions.{name}")
223-
msg = msg.with_content_type("json")
226+
msg = msg.with_channel(f"commentary to=functions.{name}")
227+
msg.with_content_type("json")
224228
msgs.append(msg)
225229
return msgs
226230

@@ -230,7 +234,7 @@ def parse_chat_input(chat_msg) -> list[Message]:
230234
content = chat_msg.get("content", "") or ""
231235
msg = Message.from_author_and_content(
232236
Author.new(Role.TOOL, f"functions.{name}"),
233-
content).with_channel("commentary")
237+
content).with_channel("commentary").with_recipient("assistant")
234238
return [msg]
235239

236240
# Default: user/assistant/system messages with content
@@ -245,9 +249,35 @@ def parse_chat_input(chat_msg) -> list[Message]:
245249

246250

247251
def render_for_completion(messages: list[Message]) -> list[int]:
248-
conversation = Conversation.from_messages(messages)
249-
token_ids = get_encoding().render_conversation_for_completion(
250-
conversation, Role.ASSISTANT)
252+
if not messages:
253+
return []
254+
255+
token_ids = []
256+
encoding = get_encoding()
257+
end_token_ids = encoding.encode("<|end|>", allowed_special={"<|end|>"})
258+
call_token_ids = encoding.encode("<|call|>", allowed_special={"<|call|>"})
259+
260+
has_function_tools = any(
261+
msg.author.role == Role.DEVELOPER and msg.content[0] and hasattr(
262+
msg.content[0], 'tools') and msg.content[0].tools is not None
263+
and msg.content[0].tools["functions"] is not None for msg in messages)
264+
265+
for i, msg in enumerate(messages):
266+
msg_tokens = encoding.render(
267+
msg,
268+
RenderOptions(conversation_has_function_tools=has_function_tools))
269+
is_tool_call = (msg.author.role == Role.ASSISTANT and msg.channel
270+
and "functions." in msg.channel)
271+
if (i < len(messages) - 1 and is_tool_call and end_token_ids
272+
and call_token_ids and msg_tokens
273+
and msg_tokens[-1] == end_token_ids[0]):
274+
msg_tokens[-1] = call_token_ids[0]
275+
token_ids.extend(msg_tokens)
276+
277+
start_assistant_tokens = encoding.encode("<|start|>assistant",
278+
allowed_special={"<|start|>"})
279+
token_ids.extend(start_assistant_tokens)
280+
251281
return token_ids
252282

253283

vllm/entrypoints/openai/serving_chat.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1569,7 +1569,9 @@ def _make_request_with_harmony(
15691569
sys_msg = get_system_message(
15701570
reasoning_effort=request.reasoning_effort,
15711571
browser_description=None,
1572-
python_description=None)
1572+
python_description=None,
1573+
with_custom_tools=request.tools is not None
1574+
)
15731575
messages.append(sys_msg)
15741576

15751577
# Add developer message.

0 commit comments

Comments
 (0)