Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
15142e3
fix: time hullucination
CaralHsi Aug 8, 2025
60890ee
fix: bug in src/memos/graph_dbs/neo4j_community.py
CaralHsi Aug 8, 2025
748c547
feat: use different template for different language input (#232)
Nyakult Aug 8, 2025
fc269d3
Merge branch 'main' into fix/chat_time_bug
CaralHsi Aug 8, 2025
0c0fd8b
fix: time hullucination (#234)
CaralHsi Aug 8, 2025
cb450be
fix: chat time bug (#235)
CaralHsi Aug 12, 2025
1718b64
push locomo rag eval code (#180)
CSLiuPeng Aug 13, 2025
b1ec575
feat: add further questions for dialogue (#236)
fridayL Aug 13, 2025
9851e22
Fix: fix list user bugs and multi-user-examples get_all args (#237)
fridayL Aug 14, 2025
084b14e
fix: nebula bug (#242)
CaralHsi Aug 14, 2025
ebc4cde
Feat: change reference position and reorganize code (#240)
fridayL Aug 14, 2025
bdcc6d7
feat: reject answer (#243)
CaralHsi Aug 14, 2025
9824ed2
feat: support retrieval from specified memos_cube (#244)
CaralHsi Aug 15, 2025
f1a58c9
Feat/reject answer (#245)
CaralHsi Aug 15, 2025
6477d6e
feat: modify reference format (#246)
CaralHsi Aug 15, 2025
49eb6cb
feat: memos add moscube turnoff (#247)
fridayL Aug 15, 2025
4f49ee6
Fix: fix memcube path bug for docker and change further question prom…
fridayL Aug 19, 2025
c141a02
Feat: add chat complete api for no-stream and rewrite chat func for …
fridayL Aug 26, 2025
c14868b
fix: mem-reader bug (#255)
CaralHsi Aug 26, 2025
d58e548
feat: modify nebula session pool (#259)
CaralHsi Aug 28, 2025
0ac8355
fix: general_text add user_id (#260)
fridayL Aug 29, 2025
e729218
feat: Asynchronous processing of logs, notifications and memory addit…
lijicode Aug 29, 2025
fe0624e
feat: mos add load sdk for user (#263)
fridayL Sep 1, 2025
0d85609
feat: enhance NebulaGraph pool management & improve Searcher usage lo…
CaralHsi Sep 2, 2025
9e347b8
Feat/add custom logger (#217)
CarltonXiang Sep 2, 2025
1b195a5
Feat: update chatbot for postprocessing memory (#267)
fridayL Sep 4, 2025
25f7a5a
Feat/add traceid (#270)
CarltonXiang Sep 4, 2025
d60ad8b
feat: modify mem-reader prompt (#273)
CaralHsi Sep 4, 2025
d86b0b5
Feat/add traceid (#274)
CarltonXiang Sep 8, 2025
296bc92
Feat: fix stream output and add openai stream (#276)
fridayL Sep 8, 2025
79ad733
feat: add reranker (#277)
CaralHsi Sep 9, 2025
909df45
fix: reranker config bug (#278)
CaralHsi Sep 9, 2025
d48a7c8
feat: adjust similarity threshold (#279)
CaralHsi Sep 9, 2025
48da7ca
feat: set minimun returned memories back to 3 (#280)
CaralHsi Sep 9, 2025
a6f9649
Feat: change mem prompt (#281)
fridayL Sep 9, 2025
7bb26a9
feat: internet search speed and reranker (#282)
CaralHsi Sep 9, 2025
78e14ea
feat: update filter mem (#285)
fridayL Sep 9, 2025
cc564b1
feat: updatebug (#287)
fridayL Sep 9, 2025
10fc2be
feat: modify self intro (#288)
CaralHsi Sep 10, 2025
584fae8
Chore: Change version to v1.0.1 (#290)
fridayL Sep 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ MemOS is licensed under the [Apache 2.0 License](./LICENSE).
Stay up to date with the latest MemOS announcements, releases, and community highlights.


- **2025-09-10** - 🎉 *MemOS v1.0.1 (Group Q&A Bot)*: Group Q&A bot based on MemOS Cube, updated KV-Cache performance comparison data across different GPU deployment schemes, optimized test benchmarks and statistics, added plaintext memory Reranker sorting, optimized plaintext memory hallucination issues, and Playground version updates. [Try PlayGround](https://memos-playground.openmem.net/login/)
- **2025-08-07** - 🎉 *MemOS v1.0.0 (MemCube Release)*: First MemCube with word game demo, LongMemEval evaluation, BochaAISearchRetriever integration, NebulaGraph support, enhanced search capabilities, and official Playground launch.
- **2025-07-29** – 🎉 *MemOS v0.2.2 (Nebula Update)*: Internet search+Nebula DB integration, refactored memory scheduler, KV Cache stress tests, MemCube Cookbook release (CN/EN), and 4b/1.7b/0.6b memory ops models.
- **2025-07-21** – 🎉 *MemOS v0.2.1 (Neo Release)*: Lightweight Neo version with plaintext+KV Cache functionality, Docker/multi-tenant support, MCP expansion, and new Cookbook/Mud game examples.
Expand Down
337 changes: 337 additions & 0 deletions evaluation/scripts/locomo/locomo_rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
"""
Modify the code from the mem0 project
"""

import argparse
import concurrent.futures
import json
import os
import threading
import time

from collections import defaultdict

import numpy as np
import tiktoken

from dotenv import load_dotenv
from jinja2 import Template
from openai import OpenAI
from tqdm import tqdm


load_dotenv()

PROMPT = """
# Question:
{{QUESTION}}

# Context:
{{CONTEXT}}

# Short answer:
"""

TECHNIQUES = ["mem0", "rag"]


class RAGManager:
def __init__(self, data_path="data/locomo/locomo10_rag.json", chunk_size=500, k=2):
self.model = os.getenv("MODEL")
self.client = OpenAI()
self.data_path = data_path
self.chunk_size = chunk_size
self.k = k

def generate_response(self, question, context):
template = Template(PROMPT)
prompt = template.render(CONTEXT=context, QUESTION=question)

max_retries = 3
retries = 0

while retries <= max_retries:
try:
t1 = time.time()
response = self.client.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": "You are a helpful assistant that can answer "
"questions based on the provided context."
"If the question involves timing, use the conversation date for reference."
"Provide the shortest possible answer."
"Use words directly from the conversation when possible."
"Avoid using subjects in your answer.",
},
{"role": "user", "content": prompt},
],
temperature=0,
)
t2 = time.time()
if response and response.choices:
content = response.choices[0].message.content
if content is not None:
return content.strip(), t2 - t1
else:
return "No content returned", t2 - t1
print("❎ No content returned!")
else:
return "Empty response", t2 - t1
except Exception as e:
retries += 1
if retries > max_retries:
raise e
time.sleep(1) # Wait before retrying

def clean_chat_history(self, chat_history):
cleaned_chat_history = ""
for c in chat_history:
cleaned_chat_history += f"{c['timestamp']} | {c['speaker']}: {c['text']}\n"

return cleaned_chat_history

def calculate_embedding(self, document):
response = self.client.embeddings.create(model=os.getenv("EMBEDDING_MODEL"), input=document)
return response.data[0].embedding

def calculate_similarity(self, embedding1, embedding2):
return np.dot(embedding1, embedding2) / (
np.linalg.norm(embedding1) * np.linalg.norm(embedding2)
)

def search(self, query, chunks, embeddings, k=1):
"""
Search for the top-k most similar chunks to the query.

Args:
query: The query string
chunks: List of text chunks
embeddings: List of embeddings for each chunk
k: Number of top chunks to return (default: 1)

Returns:
combined_chunks: The combined text of the top-k chunks
search_time: Time taken for the search
"""
t1 = time.time()
query_embedding = self.calculate_embedding(query)
similarities = [
self.calculate_similarity(query_embedding, embedding) for embedding in embeddings
]

# Get indices of top-k most similar chunks
top_indices = [np.argmax(similarities)] if k == 1 else np.argsort(similarities)[-k:][::-1]
# Combine the top-k chunks
combined_chunks = "\n<->\n".join([chunks[i] for i in top_indices])

t2 = time.time()
return combined_chunks, t2 - t1

def create_chunks(self, chat_history, chunk_size=500):
"""
Create chunks using tiktoken for more accurate token counting
"""
# Get the encoding for the model
encoding = tiktoken.encoding_for_model(os.getenv("EMBEDDING_MODEL"))

documents = self.clean_chat_history(chat_history)

if chunk_size == -1:
return [documents], []

chunks = []

# Encode the document
tokens = encoding.encode(documents)

# Split into chunks based on token count
for i in range(0, len(tokens), chunk_size):
chunk_tokens = tokens[i : i + chunk_size]
chunk = encoding.decode(chunk_tokens)
chunks.append(chunk)

embeddings = []
for chunk in chunks:
embedding = self.calculate_embedding(chunk)
embeddings.append(embedding)

return chunks, embeddings

def process_all_conversations(self, output_file_path):
with open(self.data_path) as f:
data = json.load(f)

final_results = defaultdict(list)
for key, value in tqdm(data.items(), desc="Processing conversations"):
chat_history = value["conversation"]
questions = value["question"]

chunks, embeddings = self.create_chunks(chat_history, self.chunk_size)

for item in tqdm(questions, desc="Answering questions", leave=False):
question = item["question"]
answer = item.get("answer", "")
category = item["category"]

if self.chunk_size == -1:
context = chunks[0]
search_time = 0
else:
context, search_time = self.search(question, chunks, embeddings, k=self.k)
response, response_time = self.generate_response(question, context)

final_results[key].append(
{
"question": question,
"answer": answer,
"category": category,
"context": context,
"response": response,
"search_time": search_time,
"response_time": response_time,
}
)
with open(output_file_path, "w+") as f:
json.dump(final_results, f, indent=4)

# Save results
with open(output_file_path, "w+") as f:
json.dump(final_results, f, indent=4)
print("The original rag file have been generated!")


class Experiment:
def __init__(self, technique_type, chunk_size):
self.technique_type = technique_type
self.chunk_size = chunk_size

def run(self):
print(
f"Running experiment with technique: {self.technique_type}, chunk size: {self.chunk_size}"
)


def process_item(item_data):
k, v = item_data
local_results = defaultdict(list)

for item in tqdm(v):
gt_answer = str(item["answer"])
pred_answer = str(item["response"])
category = str(item["category"])
question = str(item["question"])
search_time = str(item["search_time"])
response_time = str(item["response_time"])
search_context = str(item["context"])

# Skip category 5
if category == "5":
continue

local_results[k].append(
{
"question": question,
"golden_answer": gt_answer,
"answer": pred_answer,
"category": int(category),
"response_duration_ms": float(response_time) * 1000,
"search_duration_ms": float(search_time) * 1000,
"search_context": search_context,
# "llm_score_std":np.std(llm_score)
}
)

return local_results


def rename_json_keys(file_path):
with open(file_path, encoding="utf-8") as f:
data = json.load(f)

new_data = {}
for old_key in data:
new_key = f"locomo_exp_user_{old_key}"
new_data[new_key] = data[old_key]

with open(file_path, "w", encoding="utf-8") as f:
json.dump(new_data, f, indent=2, ensure_ascii=False)


def generate_response_file(file_path):
parser = argparse.ArgumentParser(description="Evaluate RAG results")

parser.add_argument(
"--output_folder",
type=str,
default="default_locomo_responses.json",
help="Path to save the evaluation results",
)
parser.add_argument(
"--max_workers", type=int, default=10, help="Maximum number of worker threads"
)
parser.add_argument("--chunk_size", type=int, default=2000, help="Chunk size for processing")
parser.add_argument("--num_chunks", type=int, default=2, help="Number of chunks to process")

args = parser.parse_args()
with open(file_path) as f:
data = json.load(f)

results = defaultdict(list)
results_lock = threading.Lock()

# Use ThreadPoolExecutor with specified workers
with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
futures = [executor.submit(process_item, item_data) for item_data in data.items()]

for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
local_results = future.result()
with results_lock:
for k, items in local_results.items():
results[k].extend(items)

# Save results to JSON file
with open(file_path, "w") as f:
json.dump(results, f, indent=4)

rename_json_keys(file_path)
print(f"Results saved to {file_path}")


def main():
parser = argparse.ArgumentParser(description="Run memory experiments")
parser.add_argument(
"--technique_type", choices=TECHNIQUES, default="rag", help="Memory technique to use"
)
parser.add_argument("--chunk_size", type=int, default=2000, help="Chunk size for processing")
parser.add_argument(
"--output_folder",
type=str,
default="results/locomo/mem0-default/",
help="Output path for results",
)
parser.add_argument("--top_k", type=int, default=30, help="Number of top memories to retrieve")
parser.add_argument("--num_chunks", type=int, default=2, help="Number of chunks to process")
parser.add_argument("--frame", type=str, default="mem0")
parser.add_argument("--version", type=str, default="default")

args = parser.parse_args()

response_path = f"{args.frame}_locomo_responses.json"

if args.technique_type == "rag":
output_file_path = os.path.join(args.output_folder, response_path)
rag_manager = RAGManager(
data_path="data/locomo/locomo10_rag.json", chunk_size=args.chunk_size, k=args.num_chunks
)
rag_manager.process_all_conversations(output_file_path)
"""Generate response files"""
generate_response_file(output_file_path)


if __name__ == "__main__":
start = time.time()
main()
end = time.time()
print(f"Execution time is:{end - start}")
Loading
Loading