Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ line-length = 100
[tool.mypy]
disable_error_code = ["union-attr", "return-value", "arg-type", "import-untyped"]
ignore_missing_imports = true
mypy_path = ["src"]
namespace_packages = true
explicit_package_bases = true

[tool.pdm]
distribution = true
Expand All @@ -34,19 +37,19 @@ version = "0.1.0"
description = "RAG content for OpenShift LightSpeed."
authors = []
dependencies = [
"PyYAML==6.0.2",
"PyYAML>=6.0.2",
"huggingface_hub>=0.33.4",
"llama-index==0.12.51",
"llama-index-vector-stores-faiss==0.3.0",
"llama-index-embeddings-huggingface==0.4.0",
"llama-index-readers-file==0.4.11",
"faiss-cpu==1.11.0.post1",
"llama-index>=0.12.51",
"llama-index-vector-stores-faiss>=0.3.0",
"llama-index-embeddings-huggingface>=0.4.0",
"llama-index-readers-file>=0.4.11",
"faiss-cpu>=1.11.0.post1",
"llama-index-vector-stores-postgres>=0.5.4",
"torch==2.7.1",
"llama-stack==0.2.16",
"llama-stack-client==0.2.16",
"aiosqlite==0.21.0",
"sqlite-vec==0.1.6",
"torch>=2.7.1",
"llama-stack==0.2.22",
"llama-stack-client==0.2.22",
"aiosqlite>=0.21.0",
"sqlite-vec>=0.1.6",
"tomlkit",
]
requires-python = "==3.12.*"
Expand Down
62 changes: 41 additions & 21 deletions scripts/query_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import yaml
from llama_index.core import Settings, load_index_from_storage
from llama_index.core.llms.utils import resolve_llm
from llama_index.core.schema import NodeWithScore, TextNode
from llama_index.core.storage.storage_context import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.faiss import FaissVectorStore
Expand All @@ -35,20 +36,34 @@ def _llama_index_query(args: argparse.Namespace) -> None:

if args.node is not None:
node = storage_context.docstore.get_node(args.node)
result = {
"query": args.query,
"type": "single_node",
"node_id": args.node,
"node": {
"id": node.node_id,
"text": node.text,
"metadata": node.metadata if hasattr(node, "metadata") else {},
},
}
if args.json:
print(json.dumps(result, indent=2))
if isinstance(node, TextNode):
result = {
"query": args.query,
"type": "single_node",
"node_id": args.node,
Comment on lines 37 to +43
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle “node not found” explicitly and avoid ambiguous type message; also prefer sys.exit.

docstore.get_node can raise KeyError or return None. Treat that as “not found” instead of “not a TextNode”, and exit via sys.exit to be safe in non-interactive runs.

-    if args.node is not None:
-        node = storage_context.docstore.get_node(args.node)
+    if args.node is not None:
+        try:
+            node = storage_context.docstore.get_node(args.node)
+        except KeyError:
+            msg = f"Node {args.node} not found"
+            logging.warning(msg)
+            if args.json:
+                print(json.dumps({
+                    "query": args.query,
+                    "type": "single_node",
+                    "node_id": args.node,
+                    "error": msg,
+                }, indent=2))
+            sys.exit(1)
         if isinstance(node, TextNode):
           ...
-        else:
+        else:
             logging.warning(
                 f"Node {args.node} is not a TextNode, type: {type(node).__name__}"
             )
             if args.json:
                 result = {
                     "query": args.query,
                     "type": "single_node",
                     "node_id": args.node,
                     "error": f"Node is not a TextNode (type: {type(node).__name__})",
                 }
                 print(json.dumps(result, indent=2))
-            exit(1)
+            sys.exit(1)

Also applies to: 55-66

🤖 Prompt for AI Agents
In scripts/query_rag.py around lines 37-43 (and similarly 55-66), handle the
case where storage_context.docstore.get_node may raise KeyError or return None:
wrap the get_node call in a try/except KeyError (and check for None) and if
missing print/log a clear "node not found" message and call sys.exit(1); if the
node is present but not a TextNode print/log a clear "node is not a TextNode"
message and call sys.exit(1) instead of falling through to ambiguous behavior.
Ensure sys is imported at the top if not already.

"node": {
"id": node.node_id,
"text": node.text,
"metadata": node.metadata if hasattr(node, "metadata") else {},
},
}
if args.json:
print(json.dumps(result, indent=2))
else:
print(node)
else:
print(node)
logging.warning(
f"Node {args.node} is not a TextNode, type: {type(node).__name__}"
)
if args.json:
result = {
"query": args.query,
"type": "single_node",
"node_id": args.node,
"error": f"Node is not a TextNode (type: {type(node).__name__})",
}
print(json.dumps(result, indent=2))
exit(1)
else:
retriever = vector_index.as_retriever(similarity_top_k=args.top_k)
nodes = retriever.retrieve(args.query)
Expand Down Expand Up @@ -88,13 +103,18 @@ def _llama_index_query(args: argparse.Namespace) -> None:
"nodes": [],
}
for node in nodes: # type: ignore
node_data = {
"id": node.node_id,
"score": node.score,
"text": node.text,
"metadata": node.metadata if hasattr(node, "metadata") else {},
}
result["nodes"].append(node_data)
if isinstance(node, NodeWithScore):
node_data = {
"id": node.node_id,
"score": node.score,
"text": node.text,
"metadata": node.metadata if hasattr(node, "metadata") else {},
}
result["nodes"].append(node_data)
else:
logging.debug(
f"Skipping node of type {type(node).__name__}, expected NodeWithScore"
)
Comment on lines 105 to +117
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Access fields via NodeWithScore.node to avoid AttributeError across versions; drop the loop’s type: ignore.

In llama-index 0.12, NodeWithScore holds the node in .node. Accessing .node_id, .text, .metadata directly on the wrapper may fail.

-        for node in nodes:  # type: ignore
-            if isinstance(node, NodeWithScore):
-                node_data = {
-                    "id": node.node_id,
-                    "score": node.score,
-                    "text": node.text,
-                    "metadata": node.metadata if hasattr(node, "metadata") else {},
-                }
-                result["nodes"].append(node_data)
+        from typing import Sequence, cast
+        retrieved = cast(Sequence[NodeWithScore], nodes)
+        for nw in retrieved:
+            if isinstance(nw, NodeWithScore):
+                base = nw.node
+                # Prefer get_content() to work for non-TextNode nodes too
+                text = getattr(base, "text", None)
+                if text is None and hasattr(base, "get_content"):
+                    text = base.get_content() or ""
+                node_data = {
+                    "id": getattr(base, "node_id", getattr(base, "id_", "")),
+                    "score": nw.score,
+                    "text": text,
+                    "metadata": getattr(base, "metadata", {}) or {},
+                }
+                result["nodes"].append(node_data)
             else:
                 logging.debug(
                     f"Skipping node of type {type(node).__name__}, expected NodeWithScore"
                 )

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In scripts/query_rag.py around lines 105–117, the loop currently assumes
NodeWithScore exposes node_id/text/metadata directly and uses a "# type:
ignore"; change it to handle both wrapper and plain Node without ignoring types:
if isinstance(node, NodeWithScore) set inner = node.node else set inner = node,
then read inner.node_id, inner.score (from wrapper), inner.text and
inner.metadata; remove the "# type: ignore" comment and ensure you use the
wrapper's score when present so the code works for llama-index 0.12+ and earlier
versions.


if args.json:
print(json.dumps(result, indent=2))
Expand Down Expand Up @@ -134,7 +154,7 @@ def _llama_stack_query(args: argparse.Namespace) -> None:
yaml.safe_dump(cfg, open(cfg_file, "w", encoding="utf-8"))

stack_lib = importlib.import_module("llama_stack")
client = stack_lib.distribution.library_client.LlamaStackAsLibraryClient(cfg_file)
client = stack_lib.core.library_client.LlamaStackAsLibraryClient(cfg_file)
client.initialize()

# No need to register the DB as it's defined in llama-stack.yaml
Expand Down
10 changes: 4 additions & 6 deletions src/lightspeed_rag_content/document_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,10 @@ def __init__(self, config: _Config):
# Not using importlib to help with typechecking
import llama_stack # pylint: disable=C0415

self.document_class = llama_stack.apis.tools.rag_tool.RAGDocument # type: ignore
self.client_class = (
llama_stack.distribution.library_client.LlamaStackAsLibraryClient # type: ignore
)
self.document_class = llama_stack.apis.tools.rag_tool.RAGDocument
self.client_class = llama_stack.core.library_client.LlamaStackAsLibraryClient
self.documents: list[
dict[str, Any] | llama_stack.apis.tools.rag_tool.RAGDocument # type: ignore
dict[str, Any] | llama_stack.apis.tools.rag_tool.RAGDocument
] = []

def write_yaml_config(self, index_id: str, filename: str, db_file: str) -> None:
Expand Down Expand Up @@ -313,7 +311,7 @@ def _start_llama_stack(self, cfg_file: str) -> Any:
"""Start llama-stack as a library and return the client.

Return type is really
llama_stack.distribution.library_client.LlamaStackAsLibraryClient
llama_stack.core.library_client.LlamaStackAsLibraryClient

But we do dynamic import, so we don't have it for static typechecking
"""
Expand Down
Loading