From 4d98b0524084f7682f2b7df04c77be2e4312c081 Mon Sep 17 00:00:00 2001 From: Arnold Wender Date: Fri, 24 Apr 2026 11:09:16 +0200 Subject: [PATCH 1/2] fix(kg): validate ISO-8601 date formats at MCP boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tool_kg_query (as_of), tool_kg_add (valid_from), and tool_kg_invalidate (ended) accepted any string and forwarded it to SQLite without format validation. Parameterized queries prevent SQL injection, but invalid date strings silently produce empty result sets — callers cannot distinguish "no fact at this time" from "your date format was unrecognized." This is especially painful for natural-language LLM callers that synthesize dates like "March 2026" or "Jan 2025". Add sanitize_iso_date() in config.py alongside the other input validators. It accepts YYYY, YYYY-MM, and YYYY-MM-DD forms; passes through None/empty; and raises ValueError with a field-named message on anything else. Call it from the three kg MCP tool wrappers before values reach the storage layer so the caller gets a clear error instead of a silent miss. Closes #1164 --- mempalace/config.py | 28 ++++++++++++++++ mempalace/mcp_server.py | 4 +++ tests/test_config.py | 70 +++++++++++++++++++++++++++++++++++++++- tests/test_mcp_server.py | 46 ++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 1 deletion(-) diff --git a/mempalace/config.py b/mempalace/config.py index cacd1f9..4005779 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -81,6 +81,34 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str: return value +# ISO-8601 date validator for knowledge-graph temporal parameters +# (as_of, valid_from, valid_to, ended). Parameterized queries already +# prevent SQL injection, but unvalidated date strings silently miss +# every row — callers cannot distinguish "no fact at this time" from +# "your date format was unrecognized." Accept YYYY, YYYY-MM, YYYY-MM-DD. +_ISO_DATE_RE = re.compile(r"^\d{4}(?:-(?:0[1-9]|1[0-2])(?:-(?:0[1-9]|[12]\d|3[01]))?)?$") + + +def sanitize_iso_date(value, field_name: str = "date"): + """Validate an ISO-8601 date string, accepting None or empty as-is. + + Accepts ``YYYY``, ``YYYY-MM``, or ``YYYY-MM-DD``. Raises ValueError + on any other non-empty input so the MCP layer can surface a clear + error to the caller instead of silently returning empty results. + """ + if value is None or value == "": + return value + if not isinstance(value, str): + raise ValueError(f"{field_name} must be a string") + value = value.strip() + if not _ISO_DATE_RE.match(value): + raise ValueError( + f"{field_name}={value!r} is not a valid ISO-8601 date " + f"(expected YYYY, YYYY-MM, or YYYY-MM-DD)" + ) + return value + + def sanitize_content(value: str, max_length: int = 100_000) -> str: """Validate drawer/diary content length.""" if not isinstance(value, str) or not value.strip(): diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 43897c8..8aecd05 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -55,6 +55,7 @@ from .config import ( # noqa: E402 sanitize_kg_value, sanitize_name, sanitize_content, + sanitize_iso_date, ) from .version import __version__ # noqa: E402 from .backends.chroma import ( # noqa: E402 @@ -1021,6 +1022,7 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"): """Query the knowledge graph for an entity's relationships.""" try: entity = sanitize_kg_value(entity, "entity") + as_of = sanitize_iso_date(as_of, "as_of") except ValueError as e: return {"error": str(e)} if direction not in ("outgoing", "incoming", "both"): @@ -1037,6 +1039,7 @@ def tool_kg_add( subject = sanitize_kg_value(subject, "subject") predicate = sanitize_name(predicate, "predicate") object = sanitize_kg_value(object, "object") + valid_from = sanitize_iso_date(valid_from, "valid_from") except ValueError as e: return {"success": False, "error": str(e)} @@ -1062,6 +1065,7 @@ def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = N subject = sanitize_kg_value(subject, "subject") predicate = sanitize_name(predicate, "predicate") object = sanitize_kg_value(object, "object") + ended = sanitize_iso_date(ended, "ended") except ValueError as e: return {"success": False, "error": str(e)} _wal_log( diff --git a/tests/test_config.py b/tests/test_config.py index d7707d9..f5064e2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,7 +3,13 @@ import json import tempfile import pytest -from mempalace.config import MempalaceConfig, normalize_wing_name, sanitize_kg_value, sanitize_name +from mempalace.config import ( + MempalaceConfig, + normalize_wing_name, + sanitize_iso_date, + sanitize_kg_value, + sanitize_name, +) def test_default_config(): @@ -212,3 +218,65 @@ def test_kg_value_rejects_null_bytes(): def test_kg_value_rejects_over_length(): with pytest.raises(ValueError): sanitize_kg_value("a" * 129) + + +# --- sanitize_iso_date --- + + +def test_iso_date_accepts_year_only(): + assert sanitize_iso_date("2026") == "2026" + + +def test_iso_date_accepts_year_month(): + assert sanitize_iso_date("2026-03") == "2026-03" + + +def test_iso_date_accepts_full_date(): + assert sanitize_iso_date("2026-03-15") == "2026-03-15" + + +def test_iso_date_passes_through_none(): + assert sanitize_iso_date(None) is None + + +def test_iso_date_passes_through_empty_string(): + assert sanitize_iso_date("") == "" + + +def test_iso_date_strips_whitespace(): + assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15" + + +def test_iso_date_rejects_natural_language(): + with pytest.raises(ValueError): + sanitize_iso_date("March 2026") + + +def test_iso_date_rejects_abbreviated_month(): + with pytest.raises(ValueError): + sanitize_iso_date("Jan 2025") + + +def test_iso_date_rejects_us_format(): + with pytest.raises(ValueError): + sanitize_iso_date("03/15/2026") + + +def test_iso_date_rejects_invalid_month(): + with pytest.raises(ValueError): + sanitize_iso_date("2026-13") + + +def test_iso_date_rejects_invalid_day(): + with pytest.raises(ValueError): + sanitize_iso_date("2026-02-32") + + +def test_iso_date_rejects_non_string(): + with pytest.raises(ValueError): + sanitize_iso_date(20260315) + + +def test_iso_date_error_names_field(): + with pytest.raises(ValueError, match="valid_from"): + sanitize_iso_date("yesterday", "valid_from") diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 480b6bd..1b80f36 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -665,6 +665,52 @@ class TestKGTools: result = tool_kg_stats() assert result["entities"] >= 4 + # --- Date validation at the MCP boundary (issue #1164) --- + + def test_kg_add_rejects_invalid_valid_from(self, monkeypatch, config, palace_path, kg): + _patch_mcp_server(monkeypatch, config, kg) + from mempalace.mcp_server import tool_kg_add + + result = tool_kg_add( + subject="Alice", + predicate="likes", + object="coffee", + valid_from="Jan 2025", + ) + assert result["success"] is False + assert "valid_from" in result["error"] + assert "ISO-8601" in result["error"] + + def test_kg_query_rejects_invalid_as_of(self, monkeypatch, config, palace_path, seeded_kg): + _patch_mcp_server(monkeypatch, config, seeded_kg) + from mempalace.mcp_server import tool_kg_query + + result = tool_kg_query(entity="Max", as_of="March 2026") + assert "error" in result + assert "as_of" in result["error"] + + def test_kg_invalidate_rejects_invalid_ended(self, monkeypatch, config, palace_path, seeded_kg): + _patch_mcp_server(monkeypatch, config, seeded_kg) + from mempalace.mcp_server import tool_kg_invalidate + + result = tool_kg_invalidate( + subject="Max", + predicate="does", + object="chess", + ended="yesterday", + ) + assert result["success"] is False + assert "ended" in result["error"] + + def test_kg_query_accepts_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg): + _patch_mcp_server(monkeypatch, config, seeded_kg) + from mempalace.mcp_server import tool_kg_query + + # YYYY and YYYY-MM are valid ISO-8601 forms — must not be rejected. + for value in ("2026", "2026-03", "2026-03-15"): + result = tool_kg_query(entity="Max", as_of=value) + assert "error" not in result, f"rejected valid date {value!r}: {result}" + # ── Diary Tools ───────────────────────────────────────────────────────── From abe85763d4da974b8652bdfe3654bee3a7534034 Mon Sep 17 00:00:00 2001 From: Arnold Wender Date: Sun, 26 Apr 2026 12:50:43 +0200 Subject: [PATCH 2/2] fix(kg): reject partial ISO dates to avoid silent empty result sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per qodo-ai review on PR #1167: sanitize_iso_date() previously accepted YYYY and YYYY-MM, but KnowledgeGraph.query_entity() compares valid_from/ valid_to TEXT columns lexicographically against as_of. Lexicographic comparison treats '2026-01-01' as greater than '2026' (because '-' > end-of-string), so partial as_of values silently excluded valid facts — re-introducing the silent-empty-results problem this PR was meant to fix. Tighten _ISO_DATE_RE to require YYYY-MM-DD only. Update docstring and error message accordingly. Invert the two test cases that asserted partials were accepted. --- mempalace/config.py | 18 +++++++++++------- tests/test_config.py | 12 ++++++++---- tests/test_mcp_server.py | 15 +++++++++++---- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/mempalace/config.py b/mempalace/config.py index 4005779..2252a49 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -85,16 +85,21 @@ def sanitize_kg_value(value: str, field_name: str = "value") -> str: # (as_of, valid_from, valid_to, ended). Parameterized queries already # prevent SQL injection, but unvalidated date strings silently miss # every row — callers cannot distinguish "no fact at this time" from -# "your date format was unrecognized." Accept YYYY, YYYY-MM, YYYY-MM-DD. -_ISO_DATE_RE = re.compile(r"^\d{4}(?:-(?:0[1-9]|1[0-2])(?:-(?:0[1-9]|[12]\d|3[01]))?)?$") +# "your date format was unrecognized." Require full YYYY-MM-DD: KG +# queries compare TEXT dates lexicographically, so partials like "2026" +# would re-introduce silent empty results (e.g. "2026-01-01" <= "2026" +# is False), defeating the purpose of validation. +_ISO_DATE_RE = re.compile(r"^\d{4}-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\d|3[01])$") def sanitize_iso_date(value, field_name: str = "date"): """Validate an ISO-8601 date string, accepting None or empty as-is. - Accepts ``YYYY``, ``YYYY-MM``, or ``YYYY-MM-DD``. Raises ValueError - on any other non-empty input so the MCP layer can surface a clear - error to the caller instead of silently returning empty results. + Accepts only ``YYYY-MM-DD``. Raises ValueError on any other + non-empty input so the MCP layer can surface a clear error to the + caller instead of silently returning empty results. Partial dates + (``YYYY``, ``YYYY-MM``) are rejected because KG queries compare + TEXT dates lexicographically and would silently exclude valid facts. """ if value is None or value == "": return value @@ -103,8 +108,7 @@ def sanitize_iso_date(value, field_name: str = "date"): value = value.strip() if not _ISO_DATE_RE.match(value): raise ValueError( - f"{field_name}={value!r} is not a valid ISO-8601 date " - f"(expected YYYY, YYYY-MM, or YYYY-MM-DD)" + f"{field_name}={value!r} is not a valid ISO-8601 date " f"(expected YYYY-MM-DD)" ) return value diff --git a/tests/test_config.py b/tests/test_config.py index f5064e2..204faae 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -223,12 +223,16 @@ def test_kg_value_rejects_over_length(): # --- sanitize_iso_date --- -def test_iso_date_accepts_year_only(): - assert sanitize_iso_date("2026") == "2026" +def test_iso_date_rejects_year_only(): + # Partial dates re-introduce silent empty result sets via lexicographic + # TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False). + with pytest.raises(ValueError): + sanitize_iso_date("2026") -def test_iso_date_accepts_year_month(): - assert sanitize_iso_date("2026-03") == "2026-03" +def test_iso_date_rejects_year_month(): + with pytest.raises(ValueError): + sanitize_iso_date("2026-03") def test_iso_date_accepts_full_date(): diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 1b80f36..136b6f3 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -702,14 +702,21 @@ class TestKGTools: assert result["success"] is False assert "ended" in result["error"] - def test_kg_query_accepts_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg): + def test_kg_query_rejects_partial_iso_dates(self, monkeypatch, config, palace_path, seeded_kg): _patch_mcp_server(monkeypatch, config, seeded_kg) from mempalace.mcp_server import tool_kg_query - # YYYY and YYYY-MM are valid ISO-8601 forms — must not be rejected. - for value in ("2026", "2026-03", "2026-03-15"): + # Partial ISO dates are rejected: KG queries compare TEXT dates + # lexicographically, so "2026-01-01" <= "2026" is False, which + # silently excludes facts. Reject at the boundary — only YYYY-MM-DD + # produces correct results. + for value in ("2026", "2026-03"): result = tool_kg_query(entity="Max", as_of=value) - assert "error" not in result, f"rejected valid date {value!r}: {result}" + assert "error" in result, f"accepted partial date {value!r}: {result}" + + # Full ISO-8601 dates still pass. + result = tool_kg_query(entity="Max", as_of="2026-03-15") + assert "error" not in result, f"rejected valid date: {result}" # ── Diary Tools ─────────────────────────────────────────────────────────