Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 5 additions & 68 deletions src/memos/mem_os/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
filter_nodes_by_tree_ids,
remove_embedding_recursive,
sort_children_by_memory_type,
split_continuous_references,
)
from memos.mem_os.utils.reference_utils import (
process_streaming_references_complete,
)
from memos.mem_scheduler.schemas.general_schemas import (
ANSWER_LABEL,
Expand Down Expand Up @@ -406,71 +408,6 @@ def _build_enhance_system_prompt(
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
return MEMOS_PRODUCT_ENHANCE_PROMPT

def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
"""
Complete streaming reference processing to ensure reference tags are never split.

Args:
text_buffer (str): The accumulated text buffer.

Returns:
tuple[str, str]: (processed_text, remaining_buffer)
"""
import re

# Pattern to match complete reference tags: [refid:memoriesID]
complete_pattern = r"\[\d+:[^\]]+\]"

# Find all complete reference tags
complete_matches = list(re.finditer(complete_pattern, text_buffer))

if complete_matches:
# Find the last complete tag
last_match = complete_matches[-1]
end_pos = last_match.end()

# Get text up to the end of the last complete tag
processed_text = text_buffer[:end_pos]
remaining_buffer = text_buffer[end_pos:]

# Apply reference splitting to the processed text
processed_text = split_continuous_references(processed_text)

return processed_text, remaining_buffer

# Check for incomplete reference tags
# Look for opening bracket with number and colon
opening_pattern = r"\[\d+:"
opening_matches = list(re.finditer(opening_pattern, text_buffer))

if opening_matches:
# Find the last opening tag
last_opening = opening_matches[-1]
opening_start = last_opening.start()

# Check if we have a complete opening pattern
if last_opening.end() <= len(text_buffer):
# We have a complete opening pattern, keep everything in buffer
return "", text_buffer
else:
# Incomplete opening pattern, return text before it
processed_text = text_buffer[:opening_start]
# Apply reference splitting to the processed text
processed_text = split_continuous_references(processed_text)
return processed_text, text_buffer[opening_start:]

# Check for partial opening pattern (starts with [ but not complete)
if "[" in text_buffer:
ref_start = text_buffer.find("[")
processed_text = text_buffer[:ref_start]
# Apply reference splitting to the processed text
processed_text = split_continuous_references(processed_text)
return processed_text, text_buffer[ref_start:]

# No reference tags found, apply reference splitting and return all text
processed_text = split_continuous_references(text_buffer)
return processed_text, ""

def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
"""
Extract reference information from the response and return clean text.
Expand Down Expand Up @@ -868,7 +805,7 @@ def chat_with_references(
full_response += chunk

# Process buffer to ensure complete reference tags
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)

if processed_chunk:
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
Expand All @@ -877,7 +814,7 @@ def chat_with_references(

# Process any remaining buffer
if buffer:
processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
if processed_chunk:
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
yield chunk_data
Expand Down
44 changes: 0 additions & 44 deletions src/memos/mem_os/utils/format_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1363,47 +1363,3 @@ def clean_json_response(response: str) -> str:
str: Clean JSON string without markdown formatting
"""
return response.replace("```json", "").replace("```", "").strip()


def split_continuous_references(text: str) -> str:
"""
Split continuous reference tags into individual reference tags.

Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]

Only processes text if:
1. '[' appears exactly once
2. ']' appears exactly once
3. Contains commas between '[' and ']'

Args:
text (str): Text containing reference tags

Returns:
str: Text with split reference tags, or original text if conditions not met
"""
# Early return if text is empty
if not text:
return text
# Check if '[' appears exactly once
if text.count("[") != 1:
return text
# Check if ']' appears exactly once
if text.count("]") != 1:
return text
# Find positions of brackets
open_bracket_pos = text.find("[")
close_bracket_pos = text.find("]")

# Check if brackets are in correct order
if open_bracket_pos >= close_bracket_pos:
return text
# Extract content between brackets
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
# Check if there's a comma between brackets
if "," not in content_between_brackets:
return text
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))

return text
133 changes: 133 additions & 0 deletions src/memos/mem_os/utils/reference_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
def split_continuous_references(text: str) -> str:
"""
Split continuous reference tags into individual reference tags.

Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]

Only processes text if:
1. '[' appears exactly once
2. ']' appears exactly once
3. Contains commas between '[' and ']'

Args:
text (str): Text containing reference tags

Returns:
str: Text with split reference tags, or original text if conditions not met
"""
# Early return if text is empty
if not text:
return text
# Check if '[' appears exactly once
if text.count("[") != 1:
return text
# Check if ']' appears exactly once
if text.count("]") != 1:
return text
# Find positions of brackets
open_bracket_pos = text.find("[")
close_bracket_pos = text.find("]")

# Check if brackets are in correct order
if open_bracket_pos >= close_bracket_pos:
return text
# Extract content between brackets
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
# Check if there's a comma between brackets
if "," not in content_between_brackets:
return text
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))

return text


def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
"""
Complete streaming reference processing to ensure reference tags are never split.

Args:
text_buffer (str): The accumulated text buffer.

Returns:
tuple[str, str]: (processed_text, remaining_buffer)
"""
import re

# Pattern to match complete reference tags: [refid:memoriesID]
complete_pattern = r"\[\d+:[^\]]+\]"

# Find all complete reference tags
complete_matches = list(re.finditer(complete_pattern, text_buffer))

if complete_matches:
# Find the last complete tag
last_match = complete_matches[-1]
end_pos = last_match.end()

# Check if there's any incomplete reference after the last complete one
remaining_text = text_buffer[end_pos:]

# Look for potential incomplete reference patterns after the last complete tag
incomplete_pattern = r"\[\d*:?[^\]]*$"
if re.search(incomplete_pattern, remaining_text):
# There's a potential incomplete reference, find where it starts
incomplete_match = re.search(incomplete_pattern, remaining_text)
if incomplete_match:
incomplete_start = end_pos + incomplete_match.start()
processed_text = text_buffer[:incomplete_start]
remaining_buffer = text_buffer[incomplete_start:]

# Apply reference splitting to the processed text
processed_text = split_continuous_references(processed_text)
return processed_text, remaining_buffer

# No incomplete reference after the last complete tag, process all
processed_text = split_continuous_references(text_buffer)
return processed_text, ""

# Check for incomplete reference tags - be more specific about what constitutes a potential reference
# Look for opening bracket with number and colon that could be a reference tag
opening_pattern = r"\[\d+:"
opening_matches = list(re.finditer(opening_pattern, text_buffer))

if opening_matches:
# Find the last opening tag
last_opening = opening_matches[-1]
opening_start = last_opening.start()

# Check if this might be a complete reference tag (has closing bracket after the pattern)
remaining_text = text_buffer[last_opening.end() :]
if "]" in remaining_text:
# This looks like a complete reference tag, process it
processed_text = split_continuous_references(text_buffer)
return processed_text, ""
else:
# Incomplete reference tag, keep it in buffer
processed_text = text_buffer[:opening_start]
processed_text = split_continuous_references(processed_text)
return processed_text, text_buffer[opening_start:]

# More sophisticated check for potential reference patterns
# Only hold back text if we see a pattern that could be the start of a reference tag
potential_ref_pattern = r"\[\d*:?$" # Matches [, [1, [12:, etc. at end of buffer
if re.search(potential_ref_pattern, text_buffer):
# Find the position of the potential reference start
match = re.search(potential_ref_pattern, text_buffer)
if match:
ref_start = match.start()
processed_text = text_buffer[:ref_start]
processed_text = split_continuous_references(processed_text)
return processed_text, text_buffer[ref_start:]

# Check for standalone [ only at the very end of the buffer
# This prevents cutting off mathematical expressions like [ \Delta U = Q - W ]
if text_buffer.endswith("["):
# Only hold back the single [ character
processed_text = text_buffer[:-1]
processed_text = split_continuous_references(processed_text)
return processed_text, "["

# No reference-like patterns found, process all text
processed_text = split_continuous_references(text_buffer)
return processed_text, ""
Loading