From c92256f08f4cb33c178e22878eaa197a7196095c Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 26 Apr 2026 18:37:41 -0300
Subject: [PATCH] chore(corpus-origin): tag merged evidence by tier + pin
 confidence-source contract

Two follow-ups to PR #1221's merge-fields behavior, both raised by the
Copilot review on that PR:

- Evidence merge now prefixes each entry with `Tier-1 heuristic: ` or
  `Tier-2 LLM: ` so the on-disk `origin.json` audit record retains tier
  provenance. The pre-#1221 code labeled heuristic evidence; the
  merge-fields refactor flattened that. Re-prefixing is idempotent.

- Tests now assert that the merged `confidence` is the heuristic's, not
  the LLM's. Added inline assertions to the two existing
  contradiction/disagreement tests, plus a dedicated
  `test_merge_tier_fields_confidence_matches_heuristic_call` that
  compares to `detect_origin_heuristic` directly so a future regression
  letting Tier 2 confidence leak through cannot pass silently.

Tests: 1378 pass. Ruff check + format both clean (CI-pinned 0.4.x).
---
 mempalace/cli.py                        | 17 ++++-
 tests/test_corpus_origin_integration.py | 84 +++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/mempalace/cli.py b/mempalace/cli.py
index b6e48df..92743a2 100644
--- a/mempalace/cli.py
+++ b/mempalace/cli.py
@@ -149,8 +149,21 @@ def _run_pass_zero(project_dir, palace_dir, llm_provider) -> dict:
                 result.user_name = llm_result.user_name
             if llm_result.agent_persona_names:
                 result.agent_persona_names = list(llm_result.agent_persona_names)
-            # Combine evidence — keep both signal trails for the audit record.
-            result.evidence = list(result.evidence) + list(llm_result.evidence)
+            # Combine evidence — keep both signal trails for the audit record,
+            # prefixed so the on-disk origin.json says which tier produced
+            # each entry. Idempotent: re-prefixing an already-tagged entry
+            # is a no-op.
+            tier1_prefix = "Tier-1 heuristic: "
+            tier2_prefix = "Tier-2 LLM: "
+            heuristic_evidence = [
+                str(e) if str(e).startswith(tier1_prefix) else f"{tier1_prefix}{e}"
+                for e in result.evidence
+            ]
+            llm_evidence = [
+                str(e) if str(e).startswith(tier2_prefix) else f"{tier2_prefix}{e}"
+                for e in llm_result.evidence
+            ]
+            result.evidence = heuristic_evidence + llm_evidence
         except Exception as exc:  # noqa: BLE001 — never block init on LLM failure
             print(f"  LLM corpus-origin tier failed ({exc}); using heuristic only.")
 
diff --git a/tests/test_corpus_origin_integration.py b/tests/test_corpus_origin_integration.py
index b896f28..08c3027 100644
--- a/tests/test_corpus_origin_integration.py
+++ b/tests/test_corpus_origin_integration.py
@@ -1483,6 +1483,14 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool():
         f"Merged result must KEEP heuristic's True, not flip to False. "
         f"Got: {res}"
     )
+    # The bool and the confidence are paired — both must come from the
+    # heuristic. The mocked LLM returned 0.90; if the merge accidentally
+    # took LLM's confidence, this would equal 0.90.
+    assert res["confidence"] != 0.90, (
+        f"Merged confidence equals the mocked LLM's 0.90 — looks like "
+        f"LLM's confidence leaked through the merge. Heuristic's confidence "
+        f"must be preserved alongside its bool. Got: {res}"
+    )
     # Persona/user/platform from LLM should still be merged in.
     assert res["agent_persona_names"] == [
         "Echo",
@@ -1539,6 +1547,13 @@ def test_merge_tier_fields_heuristic_no_no_personas_leak():
     assert (
         res["agent_persona_names"] == []
     ), f"No personas should leak when both tiers report none. Got: {res}"
+    # Heuristic owns confidence. Mocked LLM returned 0.95; heuristic's
+    # narrative-branch confidence is 0.9. Verifying we kept 0.9 catches
+    # any future regression that lets LLM confidence override heuristic.
+    assert res["confidence"] == 0.9, (
+        f"Heuristic confidently classified narrative at 0.9; mocked LLM "
+        f"returned 0.95. Merge must keep heuristic's 0.9. Got: {res}"
+    )
 
 
 def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence():
@@ -1597,6 +1612,75 @@ def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence():
     assert len(res["evidence"]) >= 2, (
         f"Combined evidence should include both heuristic + LLM lines. " f"Got: {res['evidence']}"
     )
+    # Each entry must carry its tier prefix so on-disk origin.json is
+    # auditable — readers can tell which tier produced which signal line.
+    tier1_lines = [e for e in res["evidence"] if e.startswith("Tier-1 heuristic: ")]
+    tier2_lines = [e for e in res["evidence"] if e.startswith("Tier-2 LLM: ")]
+    assert tier1_lines, (
+        f"Expected at least one 'Tier-1 heuristic: ' prefixed evidence line. "
+        f"Got: {res['evidence']}"
+    )
+    assert tier2_lines, (
+        f"Expected at least one 'Tier-2 LLM: ' prefixed evidence line. " f"Got: {res['evidence']}"
+    )
+    # Every entry should be tier-prefixed (no untagged passthrough).
+    untagged = [
+        e
+        for e in res["evidence"]
+        if not (e.startswith("Tier-1 heuristic: ") or e.startswith("Tier-2 LLM: "))
+    ]
+    assert not untagged, f"Untagged evidence entries leaked into merge: {untagged}"
+
+
+def test_merge_tier_fields_confidence_matches_heuristic_call():
+    """Pin the contract: merged confidence equals what `detect_origin_heuristic`
+    returns for the same samples — independent of what the LLM produced.
+
+    Catches a regression class where some future refactor lets Tier 2's
+    confidence creep back into the merged result.
+    """
+    from unittest.mock import MagicMock, patch
+
+    from mempalace.cli import _run_pass_zero
+    from mempalace.corpus_origin import CorpusOriginResult, detect_origin_heuristic
+
+    samples = _ai_dialogue_samples()
+    expected_confidence = detect_origin_heuristic(samples).confidence
+
+    fake_provider = MagicMock()
+    # LLM picks a deliberately distinct confidence so any leak is visible.
+    llm_distinct_result = CorpusOriginResult(
+        likely_ai_dialogue=True,
+        confidence=0.123456,
+        primary_platform="Claude (Anthropic)",
+        user_name=None,
+        agent_persona_names=[],
+        evidence=["LLM said yes with an unusual confidence"],
+    )
+
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        project_dir = Path(tmp_dir) / "project"
+        project_dir.mkdir()
+        for i, sample in enumerate(samples):
+            (project_dir / f"log{i}.md").write_text(sample)
+        palace_dir = Path(tmp_dir) / "palace"
+
+        with patch("mempalace.cli.detect_origin_llm", return_value=llm_distinct_result):
+            wrapped = _run_pass_zero(
+                project_dir=str(project_dir),
+                palace_dir=str(palace_dir),
+                llm_provider=fake_provider,
+            )
+
+    assert wrapped is not None
+    res = wrapped["result"]
+    assert res["confidence"] == expected_confidence, (
+        f"Merged confidence {res['confidence']} did not match "
+        f"detect_origin_heuristic's {expected_confidence}. Looks like "
+        f"LLM's 0.123456 (or another source) leaked through the merge."
+    )
 
 
 def test_merge_tier_fields_no_llm_provider_returns_heuristic_only():