fix(normalize): discard user/gemini turns before session_metadata sentinel
Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/4511e9aa-38e7-440e-a6f8-eda91e576f0f Co-authored-by: igorls <4753812+igorls@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
a3e3691e86
commit
e7fe6cae14
+11
-3
@@ -296,9 +296,12 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
|
||||
|
||||
Detection requires a ``session_metadata`` record so this parser does
|
||||
not false-positive against Claude Code or Codex JSONL passed through
|
||||
the dispatch chain. ``message_update`` entries are skipped — they
|
||||
have no message text. Multiple text blocks within a single message's
|
||||
content array are concatenated in order, separated by newlines.
|
||||
the dispatch chain. Any ``user``/``gemini`` lines that appear before
|
||||
``session_metadata`` are discarded — they are treated as preamble
|
||||
noise, not conversational turns. ``message_update`` entries are
|
||||
skipped — they have no message text. Multiple text blocks within a
|
||||
single message's content array are concatenated in order, separated
|
||||
by newlines.
|
||||
"""
|
||||
lines = [line.strip() for line in content.strip().split("\n") if line.strip()]
|
||||
messages = []
|
||||
@@ -316,6 +319,11 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
|
||||
has_session_metadata = True
|
||||
continue
|
||||
|
||||
# Discard everything (including user/gemini turns) until the
|
||||
# session_metadata sentinel has been seen.
|
||||
if not has_session_metadata:
|
||||
continue
|
||||
|
||||
if entry_type not in ("user", "gemini"):
|
||||
# Skips message_update, system events, anything else.
|
||||
continue
|
||||
|
||||
@@ -594,6 +594,25 @@ def test_gemini_jsonl_does_not_match_codex():
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_gemini_jsonl_messages_before_session_metadata_discarded():
|
||||
"""user/gemini turns that appear before the session_metadata sentinel must
|
||||
be silently discarded, not counted as conversational messages. Only turns
|
||||
after the sentinel contribute to the transcript."""
|
||||
lines = [
|
||||
json.dumps({"type": "user", "content": [{"text": "preamble Q"}]}),
|
||||
json.dumps({"type": "gemini", "content": [{"text": "preamble A"}]}),
|
||||
json.dumps({"type": "session_metadata", "sessionId": "s"}),
|
||||
json.dumps({"type": "user", "content": [{"text": "real Q"}]}),
|
||||
json.dumps({"type": "gemini", "content": [{"text": "real A"}]}),
|
||||
]
|
||||
result = _try_gemini_jsonl("\n".join(lines))
|
||||
assert result is not None
|
||||
assert "preamble Q" not in result
|
||||
assert "preamble A" not in result
|
||||
assert "> real Q" in result
|
||||
assert "real A" in result
|
||||
|
||||
|
||||
# ── _try_claude_ai_json ───────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user