Merge pull request #1224 from MemPalace/feat/privacy-warn-external-llm

feat(privacy): warn when LLM tier sends content to external API
This commit is contained in:
Igor Lins e Silva
2026-04-26 19:18:13 -03:00
committed by GitHub
4 changed files with 248 additions and 0 deletions
+112
View File
@@ -1629,3 +1629,115 @@ def test_merge_tier_fields_no_llm_provider_returns_heuristic_only():
assert res["agent_persona_names"] == []
assert res["user_name"] is None
assert res["primary_platform"] is None
# ─────────────────────────────────────────────────────────────────────────
# External-API privacy warning (issue #24).
#
# When mempalace init resolves an LLM provider whose endpoint will send
# user content off the local machine/network, init MUST print a clear
# warning naming the provider, stating that MemPalace doesn't control
# how the provider logs/retains/uses the data, and pointing at --no-llm.
# Local providers (Ollama on localhost, LM Studio on LAN, etc.) MUST NOT
# trigger the warning.
# ─────────────────────────────────────────────────────────────────────────
def test_init_prints_privacy_warning_when_provider_is_external(
ai_dialogue_corpus: Path, tmp_path: Path, capsys
):
"""When cmd_init successfully acquires a provider whose
is_external_service is True, output must contain the privacy
warning text including the EXTERNAL marker.
"""
from mempalace.cli import cmd_init
palace = tmp_path / "palace"
args = _init_args(ai_dialogue_corpus) # default = LLM ON
fake_provider = MagicMock()
fake_provider.check_available.return_value = (True, "ok")
fake_provider.is_external_service = True
fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')
with (
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
patch("mempalace.cli.get_provider", return_value=fake_provider),
patch("mempalace.cli._maybe_run_mine_after_init"),
patch("mempalace.room_detector_local.detect_rooms_local"),
):
cmd_init(args)
out = capsys.readouterr().out
assert "EXTERNAL API" in out, (
f"Privacy warning must mention 'EXTERNAL API' when provider is external. " f"Got: {out!r}"
)
assert (
"--no-llm" in out
), f"Privacy warning must point users at --no-llm to opt out. Got: {out!r}"
# The warning should also tell users MemPalace isn't responsible
# for downstream provider behavior.
assert (
"does not control" in out.lower()
or "not responsible" in out.lower()
or "logs" in out.lower()
or "retains" in out.lower()
), (
f"Privacy warning must clarify MemPalace doesn't control how the "
f"provider handles the data. Got: {out!r}"
)
def test_init_no_privacy_warning_when_provider_is_local(
ai_dialogue_corpus: Path, tmp_path: Path, capsys
):
"""When cmd_init successfully acquires a LOCAL provider (e.g. Ollama
on localhost, LM Studio on LAN), the privacy warning MUST NOT fire —
nothing is leaving the user's machine/network.
"""
from mempalace.cli import cmd_init
palace = tmp_path / "palace"
args = _init_args(ai_dialogue_corpus) # default = LLM ON
fake_provider = MagicMock()
fake_provider.check_available.return_value = (True, "ok")
fake_provider.is_external_service = False # Local provider — no warning
fake_provider.classify.return_value = MagicMock(text='{"classifications": []}')
with (
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
patch("mempalace.cli.get_provider", return_value=fake_provider),
patch("mempalace.cli._maybe_run_mine_after_init"),
patch("mempalace.room_detector_local.detect_rooms_local"),
):
cmd_init(args)
out = capsys.readouterr().out
assert "EXTERNAL API" not in out, (
f"Privacy warning fired for a LOCAL provider — should not have. " f"Got: {out!r}"
)
def test_init_no_privacy_warning_with_no_llm_flag(ai_dialogue_corpus: Path, tmp_path: Path, capsys):
"""With --no-llm, no provider is acquired at all, so the privacy
warning has nothing to fire on. Output must not contain it.
"""
from mempalace.cli import cmd_init
palace = tmp_path / "palace"
args = _init_args(ai_dialogue_corpus, no_llm=True)
with (
patch("mempalace.cli.MempalaceConfig", return_value=_stub_cfg(palace)),
patch("mempalace.cli.get_provider") as mock_get,
patch("mempalace.cli._maybe_run_mine_after_init"),
patch("mempalace.room_detector_local.detect_rooms_local"),
):
cmd_init(args)
mock_get.assert_not_called(), "--no-llm must short-circuit before provider acquisition"
out = capsys.readouterr().out
assert (
"EXTERNAL API" not in out
), f"Privacy warning fired on --no-llm path — should not have. Got: {out!r}"
+53
View File
@@ -325,3 +325,56 @@ def test_anthropic_no_key_raises_on_classify(monkeypatch):
p = AnthropicProvider(model="claude-haiku")
with pytest.raises(LLMError, match="requires ANTHROPIC_API_KEY"):
p.classify("s", "u")
# ── is_external_service property (issue #24 — privacy warning support) ──
#
# `is_external_service` is True when this provider's endpoint sends data
# off the user's machine/network. Used by mempalace init to print a
# privacy warning before first run when an external API will receive
# folder content. URL-based heuristic: localhost, 127.x, ::1, .local,
# RFC1918 (10/8, 192.168/16, 172.16-31/12), and IPv6 ULA (fc/fd::) are
# all treated as local. Everything else is treated as external.
def test_ollama_provider_default_endpoint_is_local():
"""OllamaProvider's default endpoint is http://localhost:11434, which
must be classified as local — no privacy warning fires for the
typical user running Ollama on their own machine."""
p = OllamaProvider(model="gemma4:e4b")
assert p.is_external_service is False, (
f"Default OllamaProvider endpoint must be local; got "
f"is_external_service={p.is_external_service} for endpoint={p.endpoint}"
)
def test_openai_compat_provider_localhost_endpoint_is_local():
"""LM Studio / llama.cpp server / vLLM commonly bind to localhost.
Those setups must NOT trigger the external-API warning."""
p = OpenAICompatProvider(model="any", endpoint="http://localhost:1234")
assert p.is_external_service is False
p_127 = OpenAICompatProvider(model="any", endpoint="http://127.0.0.1:8000")
assert p_127.is_external_service is False
p_lan = OpenAICompatProvider(model="any", endpoint="http://192.168.1.50:1234")
assert p_lan.is_external_service is False, "LAN (RFC1918) endpoints must be local"
def test_openai_compat_provider_cloud_endpoint_is_external():
"""A user pointing openai-compat at OpenAI's hosted API or any other
non-local endpoint MUST trigger the external warning."""
p = OpenAICompatProvider(model="gpt-4o", endpoint="https://api.openai.com")
assert p.is_external_service is True, (
f"https://api.openai.com must be classified external; got "
f"is_external_service={p.is_external_service}"
)
def test_anthropic_provider_default_endpoint_is_external():
"""AnthropicProvider's default endpoint is https://api.anthropic.com,
which is always external by definition. The privacy warning MUST
fire by default for users who pass --llm-provider anthropic."""
p = AnthropicProvider(model="claude-haiku-4-5", api_key="sk-test")
assert p.is_external_service is True, (
f"Default AnthropicProvider endpoint must be external; got "
f"is_external_service={p.is_external_service} for endpoint={p.endpoint}"
)