From c92256f08f4cb33c178e22878eaa197a7196095c Mon Sep 17 00:00:00 2001 From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com> Date: Sun, 26 Apr 2026 18:37:41 -0300 Subject: [PATCH] chore(corpus-origin): tag merged evidence by tier + pin confidence-source contract Two follow-ups to PR #1221's merge-fields behavior, both raised by the Copilot review on that PR: - Evidence merge now prefixes each entry with `Tier-1 heuristic: ` or `Tier-2 LLM: ` so the on-disk `origin.json` audit record retains tier provenance. The pre-#1221 code labeled heuristic evidence; the merge-fields refactor flattened that. Re-prefixing is idempotent. - Tests now assert that the merged `confidence` is the heuristic's, not the LLM's. Added inline assertions to the two existing contradiction/disagreement tests, plus a dedicated `test_merge_tier_fields_confidence_matches_heuristic_call` that compares to `detect_origin_heuristic` directly so a future regression letting Tier 2 confidence leak through cannot pass silently. Tests: 1378 pass. Ruff check + format both clean (CI-pinned 0.4.x). --- mempalace/cli.py | 17 ++++- tests/test_corpus_origin_integration.py | 84 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index b6e48df..92743a2 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -149,8 +149,21 @@ def _run_pass_zero(project_dir, palace_dir, llm_provider) -> dict: result.user_name = llm_result.user_name if llm_result.agent_persona_names: result.agent_persona_names = list(llm_result.agent_persona_names) - # Combine evidence — keep both signal trails for the audit record. - result.evidence = list(result.evidence) + list(llm_result.evidence) + # Combine evidence — keep both signal trails for the audit record, + # prefixed so the on-disk origin.json says which tier produced + # each entry. Idempotent: re-prefixing an already-tagged entry + # is a no-op. + tier1_prefix = "Tier-1 heuristic: " + tier2_prefix = "Tier-2 LLM: " + heuristic_evidence = [ + str(e) if str(e).startswith(tier1_prefix) else f"{tier1_prefix}{e}" + for e in result.evidence + ] + llm_evidence = [ + str(e) if str(e).startswith(tier2_prefix) else f"{tier2_prefix}{e}" + for e in llm_result.evidence + ] + result.evidence = heuristic_evidence + llm_evidence except Exception as exc: # noqa: BLE001 — never block init on LLM failure print(f" LLM corpus-origin tier failed ({exc}); using heuristic only.") diff --git a/tests/test_corpus_origin_integration.py b/tests/test_corpus_origin_integration.py index b896f28..08c3027 100644 --- a/tests/test_corpus_origin_integration.py +++ b/tests/test_corpus_origin_integration.py @@ -1483,6 +1483,14 @@ def test_merge_tier_fields_heuristic_yes_llm_no_keeps_heuristic_bool(): f"Merged result must KEEP heuristic's True, not flip to False. " f"Got: {res}" ) + # The bool and the confidence are paired — both must come from the + # heuristic. The mocked LLM returned 0.90; if the merge accidentally + # took LLM's confidence, this would equal 0.90. + assert res["confidence"] != 0.90, ( + f"Merged confidence equals the mocked LLM's 0.90 — looks like " + f"LLM's confidence leaked through the merge. Heuristic's confidence " + f"must be preserved alongside its bool. Got: {res}" + ) # Persona/user/platform from LLM should still be merged in. assert res["agent_persona_names"] == [ "Echo", @@ -1539,6 +1547,13 @@ def test_merge_tier_fields_heuristic_no_no_personas_leak(): assert ( res["agent_persona_names"] == [] ), f"No personas should leak when both tiers report none. Got: {res}" + # Heuristic owns confidence. Mocked LLM returned 0.95; heuristic's + # narrative-branch confidence is 0.9. Verifying we kept 0.9 catches + # any future regression that lets LLM confidence override heuristic. + assert res["confidence"] == 0.9, ( + f"Heuristic confidently classified narrative at 0.9; mocked LLM " + f"returned 0.95. Merge must keep heuristic's 0.9. Got: {res}" + ) def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence(): @@ -1597,6 +1612,75 @@ def test_merge_tier_fields_heuristic_yes_llm_yes_combines_evidence(): assert len(res["evidence"]) >= 2, ( f"Combined evidence should include both heuristic + LLM lines. " f"Got: {res['evidence']}" ) + # Each entry must carry its tier prefix so on-disk origin.json is + # auditable — readers can tell which tier produced which signal line. + tier1_lines = [e for e in res["evidence"] if e.startswith("Tier-1 heuristic: ")] + tier2_lines = [e for e in res["evidence"] if e.startswith("Tier-2 LLM: ")] + assert tier1_lines, ( + f"Expected at least one 'Tier-1 heuristic: ' prefixed evidence line. " + f"Got: {res['evidence']}" + ) + assert tier2_lines, ( + f"Expected at least one 'Tier-2 LLM: ' prefixed evidence line. " f"Got: {res['evidence']}" + ) + # Every entry should be tier-prefixed (no untagged passthrough). + untagged = [ + e + for e in res["evidence"] + if not (e.startswith("Tier-1 heuristic: ") or e.startswith("Tier-2 LLM: ")) + ] + assert not untagged, f"Untagged evidence entries leaked into merge: {untagged}" + + +def test_merge_tier_fields_confidence_matches_heuristic_call(): + """Pin the contract: merged confidence equals what `detect_origin_heuristic` + returns for the same samples — independent of what the LLM produced. + + Catches a regression class where some future refactor lets Tier 2's + confidence creep back into the merged result. + """ + from unittest.mock import MagicMock, patch + + from mempalace.cli import _run_pass_zero + from mempalace.corpus_origin import CorpusOriginResult, detect_origin_heuristic + + samples = _ai_dialogue_samples() + expected_confidence = detect_origin_heuristic(samples).confidence + + fake_provider = MagicMock() + # LLM picks a deliberately distinct confidence so any leak is visible. + llm_distinct_result = CorpusOriginResult( + likely_ai_dialogue=True, + confidence=0.123456, + primary_platform="Claude (Anthropic)", + user_name=None, + agent_persona_names=[], + evidence=["LLM said yes with an unusual confidence"], + ) + + import tempfile + + with tempfile.TemporaryDirectory() as tmp_dir: + project_dir = Path(tmp_dir) / "project" + project_dir.mkdir() + for i, sample in enumerate(samples): + (project_dir / f"log{i}.md").write_text(sample) + palace_dir = Path(tmp_dir) / "palace" + + with patch("mempalace.cli.detect_origin_llm", return_value=llm_distinct_result): + wrapped = _run_pass_zero( + project_dir=str(project_dir), + palace_dir=str(palace_dir), + llm_provider=fake_provider, + ) + + assert wrapped is not None + res = wrapped["result"] + assert res["confidence"] == expected_confidence, ( + f"Merged confidence {res['confidence']} did not match " + f"detect_origin_heuristic's {expected_confidence}. Looks like " + f"LLM's 0.123456 (or another source) leaked through the merge." + ) def test_merge_tier_fields_no_llm_provider_returns_heuristic_only():