fix: filter history to one user and one assistant per turn

2026-03-02 08:09:43 +08:00 · 2026-02-28 18:09:02 +08:00
parent 6ed85029c5
commit a773eb7893
1 changed files with 53 additions and 40 deletions
--- a/bridge/agent_initializer.py
+++ b/bridge/agent_initializer.py
@@ -168,62 +168,75 @@ class AgentInitializer:
    @staticmethod
    def _filter_text_only_messages(messages: list) -> list:
        """
-        Filter messages to keep only user text and assistant text.
+        Extract clean user/assistant turn pairs from raw message history.

-        Strips out:
-        - assistant messages that only contain tool_use (no text)
-        - user messages that only contain tool_result (no text)
-        - internal hint messages injected by the agent loop
+        Groups messages into turns (each starting with a real user query),
+        then keeps only:
+        - The first user text in each turn (the actual user input)
+        - The last assistant text in each turn (the final answer)

-        Keeps:
-        - user messages with actual text content
-        - assistant messages with text content (tool_use blocks removed,
-          only the text portion is kept)
+        All tool_use, tool_result, intermediate assistant thoughts, and
+        internal hint messages injected by the agent loop are discarded.
        """
-        filtered = []
-        for msg in messages:
-            role = msg.get("role")
-            content = msg.get("content")

+        def _extract_text(content) -> str:
            if isinstance(content, str):
-                if content.strip():
-                    filtered.append(msg)
-                continue
-
-            if not isinstance(content, list):
-                continue
-
-            if role == "user":
-                text_parts = [
+                return content.strip()
+            if isinstance(content, list):
+                parts = [
                    b.get("text", "")
                    for b in content
                    if isinstance(b, dict) and b.get("type") == "text"
                ]
+                return "\n".join(p for p in parts if p).strip()
+            return ""
+
+        def _is_real_user_msg(msg: dict) -> bool:
+            """True for actual user input, False for tool_result or internal hints."""
+            if msg.get("role") != "user":
+                return False
+            content = msg.get("content")
+            if isinstance(content, list):
                has_tool_result = any(
                    isinstance(b, dict) and b.get("type") == "tool_result"
                    for b in content
                )
                if has_tool_result:
-                    continue
-                text = "\n".join(p for p in text_parts if p).strip()
-                if text:
-                    filtered.append({
-                        "role": "user",
-                        "content": [{"type": "text", "text": text}]
-                    })
+                    return False
+            text = _extract_text(content)
+            return bool(text)

-            elif role == "assistant":
-                text_parts = [
-                    b.get("text", "")
-                    for b in content
-                    if isinstance(b, dict) and b.get("type") == "text"
-                ]
-                text = "\n".join(p for p in text_parts if p).strip()
+        # Group into turns: each turn starts with a real user message
+        turns = []
+        current_turn = None
+        for msg in messages:
+            if _is_real_user_msg(msg):
+                if current_turn is not None:
+                    turns.append(current_turn)
+                current_turn = {"user": msg, "assistants": []}
+            elif current_turn is not None and msg.get("role") == "assistant":
+                text = _extract_text(msg.get("content"))
                if text:
-                    filtered.append({
-                        "role": "assistant",
-                        "content": [{"type": "text", "text": text}]
-                    })
+                    current_turn["assistants"].append(text)
+        if current_turn is not None:
+            turns.append(current_turn)
+
+        # Build result: one user msg + one assistant msg per turn
+        filtered = []
+        for turn in turns:
+            user_text = _extract_text(turn["user"].get("content"))
+            if not user_text:
+                continue
+            filtered.append({
+                "role": "user",
+                "content": [{"type": "text", "text": user_text}]
+            })
+            if turn["assistants"]:
+                final_reply = turn["assistants"][-1]
+                filtered.append({
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": final_reply}]
+                })

        return filtered