fix: remove 8-line AI response truncation in convo_miner (#692) (#708)

The _chunk_by_exchange() function was silently truncating AI responses to 8 lines via ai_lines[:8]. Any content beyond line 8 was discarded, violating the project's verbatim storage principle. Now the full AI response is preserved. When a combined exchange exceeds CHUNK_SIZE (800 chars, aligned with miner.py), it is split across consecutive drawers instead of being truncated.
2026-04-12 17:23:57 -04:00
parent d52d6c9622
commit 9b60c6edd7
1 changed files with 25 additions and 2 deletions
@@ -28,6 +28,7 @@ CONVO_EXTENSIONS = {
 }
 MIN_CHUNK_SIZE = 30
 CHUNK_SIZE = 800  # chars per drawer — align with miner.py
 MAX_FILE_SIZE = 10 * 1024 * 1024  # 10 MB — skip files larger than this
@@ -51,7 +52,12 @@ def chunk_exchanges(content: str) -> list:
 def _chunk_by_exchange(lines: list) -> list:
-    """One user turn (>) + the AI response that follows = one chunk."""
+    """One user turn (>) + the AI response that follows = one or more chunks.
    The full AI response is preserved verbatim.  When the combined
    user-turn + response exceeds CHUNK_SIZE the response is split across
    consecutive drawers so nothing is silently discarded.
    """
    chunks = []
    i = 0
@@ -73,7 +79,24 @@ def _chunk_by_exchange(lines: list) -> list:
            ai_response = " ".join(ai_lines)
            content = f"{user_turn}\n{ai_response}" if ai_response else user_turn
-            if len(content.strip()) > MIN_CHUNK_SIZE:
+            # Split into multiple drawers when the exchange exceeds CHUNK_SIZE
            if len(content) > CHUNK_SIZE:
                # First chunk: user turn + as much response as fits
                first_part = content[:CHUNK_SIZE]
                if len(first_part.strip()) > MIN_CHUNK_SIZE:
                    chunks.append(
                        {"content": first_part, "chunk_index": len(chunks)}
                    )
                # Remaining response in CHUNK_SIZE-sized continuation drawers
                remainder = content[CHUNK_SIZE:]
                while remainder:
                    part = remainder[:CHUNK_SIZE]
                    remainder = remainder[CHUNK_SIZE:]
                    if len(part.strip()) > MIN_CHUNK_SIZE:
                        chunks.append(
                            {"content": part, "chunk_index": len(chunks)}
                        )
            elif len(content.strip()) > MIN_CHUNK_SIZE:
                chunks.append(
                    {
                        "content": content,