Skip to content

Commit 7bb26a9

Browse files
authored
feat: internet search speed and reranker (#282)
* feat: modify search rephrase prompt * feat: modify task parser prompt * feat: add searched log
1 parent a6f9649 commit 7bb26a9

File tree

5 files changed

+26
-8
lines changed

5 files changed

+26
-8
lines changed

src/memos/memories/textual/tree_text_memory/retrieve/bochasearch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,9 @@ def _process_result(
218218
memory_items = []
219219
for read_item_i in read_items[0]:
220220
read_item_i.memory = (
221-
f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
221+
f"[Outer internet view] Title: {title}\nNewsTime:"
222+
f" {publish_time}\nSummary:"
223+
f" {summary}\n"
222224
f"Content: {read_item_i.memory}"
223225
)
224226
read_item_i.metadata.source = "web"

src/memos/memories/textual/tree_text_memory/retrieve/recall.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def _vector_recall(
179179
query_embedding: list[list[float]],
180180
memory_scope: str,
181181
top_k: int = 20,
182-
max_num: int = 5,
182+
max_num: int = 3,
183183
cube_name: str | None = None,
184184
) -> list[TextualMemoryItem]:
185185
"""

src/memos/memories/textual/tree_text_memory/retrieve/searcher.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ def search(
8787
self._update_usage_history(final_results, info)
8888

8989
logger.info(f"[SEARCH] Done. Total {len(final_results)} results.")
90+
res_results = ""
91+
for _num_i, result in enumerate(final_results):
92+
res_results += "\n" + (
93+
result.id + "|" + result.metadata.memory_type + "|" + result.memory
94+
)
95+
logger.info(f"[SEARCH] Results. {res_results}")
9096
return final_results
9197

9298
@timed
@@ -108,9 +114,10 @@ def _parse_task(self, query, info, mode, top_k=5):
108114
context = list({node["memory"] for node in related_nodes})
109115

110116
# optional: supplement context with internet knowledge
111-
if self.internet_retriever:
117+
"""if self.internet_retriever:
112118
extra = self.internet_retriever.retrieve_from_internet(query=query, top_k=3)
113119
context.extend(item.memory.partition("\nContent: ")[-1] for item in extra)
120+
"""
114121

115122
# parse goal using LLM
116123
parsed_goal = self.task_goal_parser.parse(

src/memos/memories/textual/tree_text_memory/retrieve/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,20 @@
88
5. Need for internet search: If the user's task instruction only involves objective facts or can be completed without introducing external knowledge, set "internet_search" to False. Otherwise, set it to True.
99
6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
1010
11-
Task description:
12-
\"\"\"$task\"\"\"
13-
1411
Former conversation (if any):
1512
\"\"\"
1613
$conversation
1714
\"\"\"
1815
16+
Task description(User Question):
17+
\"\"\"$task\"\"\"
18+
1919
Context (if any):
2020
\"\"\"$context\"\"\"
2121
22-
Return strictly in this JSON format:
22+
Return strictly in this JSON format, note that the
23+
keys/tags/rephrased_instruction/memories should use the same language as the
24+
input query:
2325
{
2426
"keys": [...],
2527
"tags": [...],

src/memos/reranker/http_bge.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# memos/reranker/http_bge.py
22
from __future__ import annotations
33

4+
import re
5+
46
from typing import TYPE_CHECKING
57

68
import requests
@@ -11,6 +13,8 @@
1113
if TYPE_CHECKING:
1214
from memos.memories.textual.item import TextualMemoryItem
1315

16+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
17+
1418

1519
class HTTPBGEReranker(BaseReranker):
1620
"""
@@ -43,7 +47,10 @@ def rerank(
4347
if not graph_results:
4448
return []
4549

46-
documents = [getattr(item, "memory", None) for item in graph_results]
50+
documents = [
51+
(_TAG1.sub("", m) if isinstance((m := getattr(item, "memory", None)), str) else m)
52+
for item in graph_results
53+
]
4754
documents = [d for d in documents if isinstance(d, str) and d]
4855
if not documents:
4956
return []

0 commit comments

Comments
 (0)