From 3ec40353748974366d9aa386bce4d6b85535dc73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Sun, 13 Jul 2025 21:12:23 +0800 Subject: [PATCH 1/4] feat: add multi-language feature in mem-reader --- src/memos/templates/mem_reader_prompts.py | 30 ++++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 6fb7a13c..7a103257 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,10 +1,10 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """ You are a memory extraction expert. -Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}. +Your task is to extract memories from the perspective of user, based on a conversation between user and assistant. This means identifying what user would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as assistant) that impacted or were acknowledged by user. Please perform: -1. Identify information that reflects ${user_a}'s experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from ${user_b} that ${user_a} acknowledged or responded to. +1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to. 2. Resolve all time, person, and event references clearly: - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible. - Clearly distinguish between event time and message time. @@ -12,33 +12,36 @@ - Include specific locations if mentioned. - Resolve all pronouns, aliases, and ambiguous references into full names or identities. - Disambiguate people with the same name if applicable. -3. Always write from a third-person perspective, referring to ${user_a} as +3. Always write from a third-person perspective, referring to user as "The user" or by name if name mentioned, rather than using first-person ("I", "me", "my"). For example, write "The user felt exhausted..." instead of "I felt exhausted...". -4. Do not omit any information that ${user_a} is likely to remember. +4. Do not omit any information that user is likely to remember. - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor. - Prioritize completeness and fidelity over conciseness. - - Do not generalize or skip details that could be personally meaningful to ${user_a}. + - Do not generalize or skip details that could be personally meaningful to user. Return a single valid JSON object with the following structure: { "memory list": [ { - "key": , + "key": , "memory_type": , - "value": , - "tags": + "value": , + "tags": }, ... ], - "summary": + "summary": } Language rules: -- The `value` fields and `summary` must match the language of the input conversation. -- All metadata fields (`key`, `memory_type`, `tags`) must be in English. +- The `key`, `value`, `tags`, `summary` fields must match the language of the input conversation. +- Keep `memory_type` in English. Example: Conversation: @@ -78,6 +81,9 @@ You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object. The input is a single piece of text: `[DOCUMENT_CHUNK]`. You must generate a single JSON object with two top-level keys: `summary` and `tags`. +Written in English if the input conversation is in English, or in Chinese if +the conversation is in Chinese, or any language which align with the conversation language. + 1. `summary`: - A dense, searchable summary of the ENTIRE `[DOCUMENT_CHUNK]`. - The purpose is for semantic search embedding. From a17756d197fd88e0c0ed5633620013727df07bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Sun, 13 Jul 2025 21:14:50 +0800 Subject: [PATCH 2/4] feat: add multi-language feature in mem-reader --- src/memos/templates/mem_reader_prompts.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 7a103257..b89a5ca4 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,10 +1,9 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """ You are a memory extraction expert. -Your task is to extract memories from the perspective of user, based on a conversation between user and assistant. This means identifying what user would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as assistant) that impacted or were acknowledged by user. - +Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}. Please perform: -1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to. +1. Identify information that reflects ${user_a}'s experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from ${user_b} that ${user_a} acknowledged or responded to. 2. Resolve all time, person, and event references clearly: - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible. - Clearly distinguish between event time and message time. @@ -12,13 +11,13 @@ - Include specific locations if mentioned. - Resolve all pronouns, aliases, and ambiguous references into full names or identities. - Disambiguate people with the same name if applicable. -3. Always write from a third-person perspective, referring to user as +3. Always write from a third-person perspective, referring to ${user_a} as "The user" or by name if name mentioned, rather than using first-person ("I", "me", "my"). For example, write "The user felt exhausted..." instead of "I felt exhausted...". -4. Do not omit any information that user is likely to remember. +4. Do not omit any information that ${user_a} is likely to remember. - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor. - Prioritize completeness and fidelity over conciseness. - - Do not generalize or skip details that could be personally meaningful to user. + - Do not generalize or skip details that could be personally meaningful to ${user_a}. Return a single valid JSON object with the following structure: From 61b5da2e675731862e02ca86121ac10eaef41c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Sun, 13 Jul 2025 21:20:58 +0800 Subject: [PATCH 3/4] feat: add multi-language feature in tree reorganizer --- src/memos/templates/tree_reorganize_prompts.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/memos/templates/tree_reorganize_prompts.py b/src/memos/templates/tree_reorganize_prompts.py index 8b62a254..04435eeb 100644 --- a/src/memos/templates/tree_reorganize_prompts.py +++ b/src/memos/templates/tree_reorganize_prompts.py @@ -17,6 +17,9 @@ - Provide a list of 5–10 relevant English `tags`. - Write a short `background` note (50–100 words) covering any extra context, sources, or traceability info. +Language rules: +- The `key`, `value`, `tags`, `background` fields must match the language of the input conversation. + Return valid JSON: {{ "key": "", @@ -41,6 +44,9 @@ Example: If you have items about a project across multiple phases, group them by milestone, team, or event. +Language rules: +- The `theme` fields must match the language of the input conversation. + Return valid JSON: {{ "clusters": [ @@ -84,7 +90,7 @@ - Node 2: "The venue was booked for a wedding in August." Answer: CONFLICT -Always respond with ONE word: [CAUSE | CONDITION | RELATE_TO | CONFLICT | NONE] +Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE_TO | CONFLICT | NONE] """ INFER_FACT_PROMPT = """ @@ -125,6 +131,9 @@ - "Mary organized the 2023 sustainability summit in Berlin." - "Mary presented a keynote on renewable energy at the same summit." +Language rules: +- The `key`, `value`, `tags`, `background` fields must match the language of the input. + Good Aggregate: {{ "key": "Mary's Sustainability Summit Role", From 771a09fe74b11c7843d866a933da9cef727277a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B8=AD=E9=98=B3=E9=98=B3?= Date: Sun, 13 Jul 2025 21:53:20 +0800 Subject: [PATCH 4/4] feat: reinforce language --- src/memos/templates/mem_reader_prompts.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index b89a5ca4..e8808653 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -1,5 +1,6 @@ SIMPLE_STRUCT_MEM_READER_PROMPT = """ You are a memory extraction expert. +Always respond in the same language as the conversation. If the conversation is in Chinese, respond in Chinese. Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}. Please perform: @@ -70,6 +71,21 @@ "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach." } +Another Example in Chinese(Note: You MUST output the SAME language as the +input conversation!!): +{ + "memory list": [ + { + "key": "项目会议", + "memory_type": "LongTermMemory", + "value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。", + "tags": ["项目", "时间表", "会议", "截止日期"] + }, + ... + ], + "summary": "Tom 目前专注于管理一个进度紧张的新项目..." +} + Conversation: ${conversation} @@ -78,6 +94,9 @@ SIMPLE_STRUCT_DOC_READER_PROMPT = """ You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object. +Always respond in the same language as the conversation. If the conversation +is in Chinese, MUST respond in Chinese. + The input is a single piece of text: `[DOCUMENT_CHUNK]`. You must generate a single JSON object with two top-level keys: `summary` and `tags`. Written in English if the input conversation is in English, or in Chinese if