Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion examples/core_memories/tree_textual_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ def embed_memory_item(memory: str) -> list[float]:
print(f"{i}'th similar result is: " + str(r["memory"]))
print(f"Successfully search {len(results)} memories")

# try this when use 'fine' mode (Note that you should pass the internet Config, refer to examples/core_memories/textual_internet_memoy.py)
results_fine_search = my_tree_textual_memory.search(
"Recent news in NewYork",
top_k=10,
mode="fine",
info={"query": "Recent news in NewYork", "user_id": "111", "session": "2234"},
)
for i, r in enumerate(results_fine_search):
r = r.to_dict()
print(f"{i}'th similar result is: " + str(r["memory"]))
print(f"Successfully search {len(results_fine_search)} memories")

# find related nodes
related_nodes = my_tree_textual_memory.get_relevant_subgraph("Painting")

Expand Down Expand Up @@ -235,7 +247,6 @@ def embed_memory_item(memory: str) -> list[float]:
# close the synchronous thread in memory manager
my_tree_textual_memory.memory_manager.close()


# my_tree_textual_memory.dump
my_tree_textual_memory.dump("tmp/my_tree_textual_memory")
my_tree_textual_memory.drop()
6 changes: 6 additions & 0 deletions src/memos/configs/internet_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pydantic import Field, field_validator, model_validator

from memos.chunkers.factory import ChunkerConfigFactory
from memos.configs.base import BaseConfig
from memos.exceptions import ConfigurationError

Expand Down Expand Up @@ -47,6 +48,11 @@ class XinyuSearchConfig(BaseInternetRetrieverConfig):
num_per_request: int = Field(
default=10, description="Number of results per API request (not used for Xinyu)"
)
chunker: ChunkerConfigFactory = Field(
...,
default_factory=ChunkerConfigFactory,
description="Chunker configuration",
)


class InternetRetrieverConfigFactory(BaseConfig):
Expand Down
2 changes: 1 addition & 1 deletion src/memos/memories/textual/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __str__(self) -> str:
class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
"""Extended metadata for structured memory, layered retrieval, and lifecycle tracking."""

memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] = Field(
default="WorkingMemory", description="Memory lifecycle type."
)
sources: list[str] | None = Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Any, ClassVar

from memos.chunkers.factory import ChunkerFactory
from memos.configs.internet_retriever import InternetRetrieverConfigFactory
from memos.embedders.base import BaseEmbedder
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
Expand Down Expand Up @@ -66,6 +67,7 @@ def from_config(
access_key=config.api_key, # Use api_key as access_key for xinyu
search_engine_id=config.search_engine_id,
embedder=embedder,
chunker=ChunkerFactory.from_config(config.chunker),
max_results=config.max_results,
)
else:
Expand Down
12 changes: 2 additions & 10 deletions src/memos/memories/textual/tree_text_memory/retrieve/searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def retrieve_from_internet():
"""
Retrieve information from the internet using Google Custom Search API.
"""
if not self.internet_retriever:
if not self.internet_retriever or mode == "fast":
return []
if memory_type not in ["All"]:
return []
Expand All @@ -149,7 +149,7 @@ def retrieve_from_internet():
query=query,
query_embedding=query_embedding[0],
graph_results=internet_items,
top_k=top_k * 2,
top_k=max(top_k, 10),
parsed_goal=parsed_goal,
)
return ranked_memories
Expand Down Expand Up @@ -184,14 +184,6 @@ def retrieve_from_internet():
TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
)

# Step 4: Reasoning over all retrieved and ranked memory
if mode == "fine":
searched_res = self.reasoner.reason(
query=query,
ranked_memories=searched_res,
parsed_goal=parsed_goal,
)

# Step 5: Update usage history with current timestamp
now_time = datetime.now().isoformat()
usage_record = json.dumps(
Expand Down
147 changes: 92 additions & 55 deletions src/memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import json
import uuid

from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime

import requests

from memos.chunkers.base import BaseChunker
from memos.embedders.factory import OllamaEmbedder
from memos.log import get_logger
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
Expand Down Expand Up @@ -93,8 +95,8 @@ def search(self, query: str, max_results: int | None = None) -> list[dict]:
"online_search": {
"max_entries": max_results,
"cache_switch": False,
"baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
"bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
"baidu_field": {"switch": False, "mode": "relevance", "type": "page"},
"bing_field": {"switch": True, "mode": "relevance", "type": "page"},
"sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
},
"request_id": "memos" + str(uuid.uuid4()),
Expand All @@ -112,6 +114,7 @@ def __init__(
access_key: str,
search_engine_id: str,
embedder: OllamaEmbedder,
chunker: BaseChunker,
max_results: int = 20,
):
"""
Expand All @@ -124,6 +127,7 @@ def __init__(
"""
self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
self.embedder = embedder
self.chunker = chunker

def retrieve_from_internet(
self, query: str, top_k: int = 10, parsed_goal=None
Expand All @@ -143,63 +147,25 @@ def retrieve_from_internet(
search_results = self.xinyu_api.search(query, max_results=top_k)

# Convert to TextualMemoryItem format
memory_items = []

for _, result in enumerate(search_results):
# Extract basic information from Xinyu response format
title = result.get("title", "")
content = result.get("content", "")
summary = result.get("summary", "")
url = result.get("url", "")
publish_time = result.get("publish_time", "")
if publish_time:
memory_items: list[TextualMemoryItem] = []

with ThreadPoolExecutor(max_workers=8) as executor:
futures = [
executor.submit(self._process_result, result, query, parsed_goal)
for result in search_results
]
for future in as_completed(futures):
try:
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
"%Y-%m-%d"
)
memory_items.extend(future.result())
except Exception as e:
logger.error(f"xinyu search error: {e}")
publish_time = datetime.now().strftime("%Y-%m-%d")
else:
publish_time = datetime.now().strftime("%Y-%m-%d")
source = result.get("source", "")
site = result.get("site", "")
if site:
site = site.split("|")[0]
logger.error(f"Error processing search result: {e}")

# Combine memory content
memory_content = (
f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
)
unique_memory_items = {}
for item in memory_items:
if item.memory not in unique_memory_items:
unique_memory_items[item.memory] = item

# Create metadata
metadata = TreeNodeTextualMemoryMetadata(
user_id=None,
session_id=None,
status="activated",
type="fact", # Search results are usually factual information
memory_time=publish_time,
source="web",
confidence=85.0, # Confidence level for search information
entities=self._extract_entities(title, content, summary),
tags=self._extract_tags(title, content, summary, parsed_goal),
visibility="public",
memory_type="LongTermMemory", # Search results as working memory
key=title,
sources=[url] if url else [],
embedding=self.embedder.embed([memory_content])[0],
created_at=datetime.now().isoformat(),
usage=[],
background=f"Xinyu search result from {site or source}",
)
# Create TextualMemoryItem
memory_item = TextualMemoryItem(
id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
)

memory_items.append(memory_item)

return memory_items
return list(unique_memory_items.values())

def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
"""
Expand Down Expand Up @@ -333,3 +299,74 @@ def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None
tags.extend(parsed_goal.tags)

return list(set(tags))[:15] # Limit to 15 tags

def _process_result(
self, result: dict, query: str, parsed_goal: str
) -> list[TextualMemoryItem]:
title = result.get("title", "")
content = result.get("content", "")
summary = result.get("summary", "")
url = result.get("url", "")
publish_time = result.get("publish_time", "")
if publish_time:
try:
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
"%Y-%m-%d"
)
except Exception as e:
logger.error(f"xinyu search error: {e}")
publish_time = datetime.now().strftime("%Y-%m-%d")
else:
publish_time = datetime.now().strftime("%Y-%m-%d")
source = result.get("source", "")
site = result.get("site", "")
if site:
site = site.split("|")[0]

qualified_chunks = self._chunk(content)

memory_items = []
for chunk_text, chunk_emb, score in qualified_chunks:
memory_content = (
f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
f"Content: {chunk_text}\nSource: {url}"
)
metadata = TreeNodeTextualMemoryMetadata(
user_id=None,
session_id=None,
status="activated",
type="fact",
source="web",
confidence=score,
entities=self._extract_entities(title, content, summary),
tags=self._extract_tags(title, content, summary, parsed_goal),
visibility="public",
memory_type="OuterMemory",
key=f"[{source}]" + title,
sources=[url] if url else [],
embedding=chunk_emb,
created_at=datetime.now().isoformat(),
usage=[],
background=f"Xinyu search result from {site or source}",
)
memory_items.append(
TextualMemoryItem(id=str(uuid.uuid4()), memory=memory_content, metadata=metadata)
)

return memory_items

def _chunk(self, content: str) -> list[tuple[str, list[float], float]]:
"""
Use SentenceChunker to split content into chunks and embed each.

Returns:
List of (chunk_text, chunk_embedding, dummy_score)
"""
chunks = self.chunker.chunk(content)
if not chunks:
return []

chunk_texts = [c.text for c in chunks]
chunk_embeddings = self.embedder.embed(chunk_texts)

return [(text, emb, 1.0) for text, emb in zip(chunk_texts, chunk_embeddings, strict=False)]
1 change: 0 additions & 1 deletion tests/memories/textual/test_tree_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def test_searcher_fine_mode_triggers_reasoner(mock_searcher):
top_k=1,
mode="fine",
)
assert mock_searcher.reasoner.reason.called
assert len(result) == 1


Expand Down