Streaming improvements

manstis · manstis · commit 65d4cba08aa3 · 2025-07-17T14:44:03.000+01:00
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -3,16 +3,19 @@
 import json
 import logging
 import re
-from typing import Any, AsyncIterator
+from typing import Any, AsyncIterator, Iterator
 
 from cachetools import TTLCache  # type: ignore
 
 from llama_stack_client import APIConnectionError
 from llama_stack_client.lib.agents.agent import AsyncAgent  # type: ignore
 from llama_stack_client import AsyncLlamaStackClient  # type: ignore
-from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
 from llama_stack_client.types import UserMessage  # type: ignore
 
+from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
+from llama_stack_client.types.shared import ToolCall
+from llama_stack_client.types.shared.interleaved_content_item import TextContentItem
+
 from fastapi import APIRouter, HTTPException, Request, Depends, status
 from fastapi.responses import StreamingResponse
 
@@ -122,7 +125,9 @@ def stream_end_event(metadata_map: dict) -> str:
     )
 
 
-def stream_build_event(chunk: Any, chunk_id: int, metadata_map: dict) -> str | None:
+# pylint: disable=R1702
+# pylint: disable=R0912
+def stream_build_event(chunk: Any, chunk_id: int, metadata_map: dict) -> Iterator[str]:
     """Build a streaming event from a chunk response.
 
     This function processes chunks from the LLama Stack streaming response and formats
@@ -137,52 +142,256 @@ def stream_build_event(chunk: Any, chunk_id: int, metadata_map: dict) -> str | N
         chunk_id: The current chunk ID counter (gets incremented for each token)
 
     Returns:
-        str | None: A formatted SSE data string with event information, or None if
-                   the chunk doesn't contain processable event data
+        Iterator[str]: An iterable list of formatted SSE data strings with event information
     """
-    # pylint: disable=R1702
-    if hasattr(chunk.event, "payload"):
-        if chunk.event.payload.event_type == "step_progress":
-            if hasattr(chunk.event.payload.delta, "text"):
-                text = chunk.event.payload.delta.text
-                return format_stream_data(
+    # -----------------------------------
+    # Error handling
+    # -----------------------------------
+    if hasattr(chunk, "error"):
+        yield format_stream_data(
+            {
+                "event": "error",
+                "data": {
+                    "id": chunk_id,
+                    "token": chunk.error["message"],
+                },
+            }
+        )
+        return
+
+    # -----------------------------------
+    # Turn handling
+    # -----------------------------------
+    if chunk.event.payload.event_type in {"turn_start", "turn_awaiting_input"}:
+        yield format_stream_data(
+            {
+                "event": "token",
+                "data": {
+                    "id": chunk_id,
+                    "token": "",
+                },
+            }
+        )
+        return
+
+    if chunk.event.payload.event_type == "turn_complete":
+        yield format_stream_data(
+            {
+                "event": "turn_complete",
+                "data": {
+                    "id": chunk_id,
+                    "token": chunk.event.payload.turn.output_message.content,
+                },
+            }
+        )
+        return
+
+    # -----------------------------------
+    # Shield handling
+    # -----------------------------------
+    if (
+        chunk.event.payload.event_type == "step_complete"
+        and chunk.event.payload.step_type == "shield_call"
+    ):
+        violation = chunk.event.payload.step_details.violation
+        if not violation:
+            yield format_stream_data(
+                {
+                    "event": "token",
+                    "data": {
+                        "id": chunk_id,
+                        "role": chunk.event.payload.step_type,
+                        "token": "No Violation",
+                    },
+                }
+            )
+        else:
+            yield format_stream_data(
+                {
+                    "event": "token",
+                    "data": {
+                        "id": chunk_id,
+                        "role": chunk.event.payload.step_type,
+                        "token": f"{violation.metadata} {violation.user_message}",
+                    },
+                }
+            )
+        return
+
+    # -----------------------------------
+    # Inference handling
+    # -----------------------------------
+    if chunk.event.payload.step_type == "inference":
+        if chunk.event.payload.event_type == "step_start":
+            yield format_stream_data(
+                {
+                    "event": "token",
+                    "data": {
+                        "id": chunk_id,
+                        "role": chunk.event.payload.step_type,
+                        "token": "",
+                    },
+                }
+            )
+
+        elif chunk.event.payload.event_type == "step_progress":
+            if chunk.event.payload.delta.type == "tool_call":
+                if isinstance(chunk.event.payload.delta.tool_call, str):
+                    yield format_stream_data(
+                        {
+                            "event": "tool_call",
+                            "data": {
+                                "id": chunk_id,
+                                "role": chunk.event.payload.step_type,
+                                "token": chunk.event.payload.delta.tool_call,
+                            },
+                        }
+                    )
+                elif isinstance(chunk.event.payload.delta.tool_call, ToolCall):
+                    yield format_stream_data(
+                        {
+                            "event": "tool_call",
+                            "data": {
+                                "id": chunk_id,
+                                "role": chunk.event.payload.step_type,
+                                "token": chunk.event.payload.delta.tool_call.tool_name,
+                            },
+                        }
+                    )
+
+            elif chunk.event.payload.delta.type == "text":
+                yield format_stream_data(
                     {
                         "event": "token",
                         "data": {
                             "id": chunk_id,
                             "role": chunk.event.payload.step_type,
-                            "token": text,
+                            "token": chunk.event.payload.delta.text,
                         },
                     }
                 )
-        if (
-            chunk.event.payload.event_type == "step_complete"
-            and chunk.event.payload.step_details.step_type == "tool_execution"
-        ):
+
+        elif chunk.event.payload.event_type == "step_complete":
+            yield format_stream_data(
+                {
+                    "event": "step_complete",
+                    "data": {
+                        "id": chunk_id,
+                        "token": "",
+                    },
+                }
+            )
+        return
+
+    # -----------------------------------
+    # Tool Execution handling
+    # -----------------------------------
+    if chunk.event.payload.step_type == "tool_execution":
+        if chunk.event.payload.event_type == "step_start":
+            yield format_stream_data(
+                {
+                    "event": "tool_call",
+                    "data": {
+                        "id": chunk_id,
+                        # PatternFly Chat UI expects 'role=inference' to render correctly
+                        "role": "inference",  # chunk.event.payload.step_type,
+                        "token": "",
+                    },
+                }
+            )
+
+        elif chunk.event.payload.event_type == "step_complete":
+            for t in chunk.event.payload.step_details.tool_calls:
+                yield format_stream_data(
+                    {
+                        "event": "tool_call",
+                        "data": {
+                            "id": chunk_id,
+                            # PatternFly Chat UI expects 'role=inference' to render correctly
+                            "role": "inference",  # chunk.event.payload.step_type,
+                            "token": f"Tool:{t.tool_name} arguments:{t.arguments}",
+                        },
+                    }
+                )
+
             for r in chunk.event.payload.step_details.tool_responses:
-                if r.tool_name == "knowledge_search" and r.content:
-                    for text_content_item in r.content:
+                if r.tool_name == "query_from_memory":
+                    inserted_context = interleaved_content_as_str(r.content)
+                    yield format_stream_data(
+                        {
+                            "event": "tool_call",
+                            "data": {
+                                "id": chunk_id,
+                                # PatternFly Chat UI expects 'role=inference' to render correctly
+                                "role": "inference",  # chunk.event.payload.step_type,
+                                "token": f"Fetched {len(inserted_context)} bytes from memory",
+                            },
+                        }
+                    )
+
+                elif r.tool_name == "knowledge_search" and r.content:
+                    summary = ""
+                    for i, text_content_item in enumerate(r.content):
                         if isinstance(text_content_item, TextContentItem):
+                            if i == 0:
+                                summary = text_content_item.text
+                                summary = summary[: summary.find("\n")]
                             for match in METADATA_PATTERN.findall(
                                 text_content_item.text
                             ):
                                 meta = json.loads(match.replace("'", '"'))
                                 metadata_map[meta["document_id"]] = meta
-            if chunk.event.payload.step_details.tool_calls:
-                tool_name = str(
-                    chunk.event.payload.step_details.tool_calls[0].tool_name
-                )
-                return format_stream_data(
-                    {
-                        "event": "token",
-                        "data": {
-                            "id": chunk_id,
-                            "role": chunk.event.payload.step_type,
-                            "token": tool_name,
-                        },
-                    }
-                )
-    return None
+                    yield format_stream_data(
+                        {
+                            "event": "tool_call",
+                            "data": {
+                                "id": chunk_id,
+                                # PatternFly Chat UI expects 'role=inference' to render correctly
+                                "role": "inference",  # chunk.event.payload.step_type,
+                                "token": f"Tool:{r.tool_name} summary:{summary}\n",
+                            },
+                        }
+                    )
+
+                else:
+                    yield format_stream_data(
+                        {
+                            "event": "tool_call",
+                            "data": {
+                                "id": chunk_id,
+                                # PatternFly Chat UI expects 'role=inference' to render correctly
+                                "role": "inference",  # chunk.event.payload.step_type,
+                                "token": f"Tool:{r.tool_name} response:{r.content}",
+                            },
+                        }
+                    )
+
+            # We swallow the 'step_complete' event and re-emit 'token' events with the tool details
+            # Ensure we send a 'step_complete' event so the UI knows the 'tool_execution' completed.
+            yield format_stream_data(
+                {
+                    "event": "step_complete",
+                    "data": {
+                        "id": chunk_id,
+                        "token": "",
+                    },
+                }
+            )
+
+        return
+
+    # -----------------------------------
+    # Catch-all for everything else
+    # -----------------------------------
+    yield format_stream_data(
+        {
+            "event": "error",
+            "data": {
+                "id": chunk_id,
+                "token": "manstis",
+            },
+        }
+    )
 
 
 @router.post("/streaming_query")
@@ -222,7 +431,7 @@ async def response_generator(turn_response: Any) -> AsyncIterator[str]:
             yield stream_start_event(conversation_id)
 
             async for chunk in turn_response:
-                if event := stream_build_event(chunk, chunk_id, metadata_map):
+                for event in stream_build_event(chunk, chunk_id, metadata_map):
                     complete_response += json.loads(event.replace("data: ", ""))[
                         "data"
                     ]["token"]