diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py b/src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py index 1a84ce52..07f2c0a5 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py @@ -218,7 +218,9 @@ def _process_result( memory_items = [] for read_item_i in read_items[0]: read_item_i.memory = ( - f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n" + f"[Outer internet view] Title: {title}\nNewsTime:" + f" {publish_time}\nSummary:" + f" {summary}\n" f"Content: {read_item_i.memory}" ) read_item_i.metadata.source = "web" diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py index 3f5cc7cf..1f6a5a41 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/recall.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/recall.py @@ -179,7 +179,7 @@ def _vector_recall( query_embedding: list[list[float]], memory_scope: str, top_k: int = 20, - max_num: int = 5, + max_num: int = 3, cube_name: str | None = None, ) -> list[TextualMemoryItem]: """ diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py index 340490c7..9ac1646e 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/searcher.py @@ -87,6 +87,12 @@ def search( self._update_usage_history(final_results, info) logger.info(f"[SEARCH] Done. Total {len(final_results)} results.") + res_results = "" + for _num_i, result in enumerate(final_results): + res_results += "\n" + ( + result.id + "|" + result.metadata.memory_type + "|" + result.memory + ) + logger.info(f"[SEARCH] Results. {res_results}") return final_results @timed @@ -108,9 +114,10 @@ def _parse_task(self, query, info, mode, top_k=5): context = list({node["memory"] for node in related_nodes}) # optional: supplement context with internet knowledge - if self.internet_retriever: + """if self.internet_retriever: extra = self.internet_retriever.retrieve_from_internet(query=query, top_k=3) context.extend(item.memory.partition("\nContent: ")[-1] for item in extra) + """ # parse goal using LLM parsed_goal = self.task_goal_parser.parse( diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/utils.py index de389ef2..1b7b2894 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/utils.py @@ -8,18 +8,20 @@ 5. Need for internet search: If the user's task instruction only involves objective facts or can be completed without introducing external knowledge, set "internet_search" to False. Otherwise, set it to True. 6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval. -Task description: -\"\"\"$task\"\"\" - Former conversation (if any): \"\"\" $conversation \"\"\" +Task description(User Question): +\"\"\"$task\"\"\" + Context (if any): \"\"\"$context\"\"\" -Return strictly in this JSON format: +Return strictly in this JSON format, note that the +keys/tags/rephrased_instruction/memories should use the same language as the +input query: { "keys": [...], "tags": [...], diff --git a/src/memos/reranker/http_bge.py b/src/memos/reranker/http_bge.py index 08ff295a..a852f325 100644 --- a/src/memos/reranker/http_bge.py +++ b/src/memos/reranker/http_bge.py @@ -1,6 +1,8 @@ # memos/reranker/http_bge.py from __future__ import annotations +import re + from typing import TYPE_CHECKING import requests @@ -11,6 +13,8 @@ if TYPE_CHECKING: from memos.memories.textual.item import TextualMemoryItem +_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*") + class HTTPBGEReranker(BaseReranker): """ @@ -43,7 +47,10 @@ def rerank( if not graph_results: return [] - documents = [getattr(item, "memory", None) for item in graph_results] + documents = [ + (_TAG1.sub("", m) if isinstance((m := getattr(item, "memory", None)), str) else m) + for item in graph_results + ] documents = [d for d in documents if isinstance(d, str) and d] if not documents: return []