fix(kg): validate ISO-8601 date formats at MCP boundary

tool_kg_query (as_of), tool_kg_add (valid_from), and tool_kg_invalidate
(ended) accepted any string and forwarded it to SQLite without format
validation. Parameterized queries prevent SQL injection, but invalid
date strings silently produce empty result sets — callers cannot
distinguish "no fact at this time" from "your date format was
unrecognized." This is especially painful for natural-language LLM
callers that synthesize dates like "March 2026" or "Jan 2025".

Add sanitize_iso_date() in config.py alongside the other input
validators. It accepts YYYY, YYYY-MM, and YYYY-MM-DD forms; passes
through None/empty; and raises ValueError with a field-named message
on anything else. Call it from the three kg MCP tool wrappers before
values reach the storage layer so the caller gets a clear error
instead of a silent miss.

Closes #1164
This commit is contained in:
Arnold Wender
2026-04-24 11:09:16 +02:00
parent fdfaf017ab
commit 4d98b05240
4 changed files with 147 additions and 1 deletions
+4
View File
@@ -55,6 +55,7 @@ from .config import ( # noqa: E402
sanitize_kg_value,
sanitize_name,
sanitize_content,
sanitize_iso_date,
)
from .version import __version__ # noqa: E402
from .backends.chroma import ( # noqa: E402
@@ -1021,6 +1022,7 @@ def tool_kg_query(entity: str, as_of: str = None, direction: str = "both"):
"""Query the knowledge graph for an entity's relationships."""
try:
entity = sanitize_kg_value(entity, "entity")
as_of = sanitize_iso_date(as_of, "as_of")
except ValueError as e:
return {"error": str(e)}
if direction not in ("outgoing", "incoming", "both"):
@@ -1037,6 +1039,7 @@ def tool_kg_add(
subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate")
object = sanitize_kg_value(object, "object")
valid_from = sanitize_iso_date(valid_from, "valid_from")
except ValueError as e:
return {"success": False, "error": str(e)}
@@ -1062,6 +1065,7 @@ def tool_kg_invalidate(subject: str, predicate: str, object: str, ended: str = N
subject = sanitize_kg_value(subject, "subject")
predicate = sanitize_name(predicate, "predicate")
object = sanitize_kg_value(object, "object")
ended = sanitize_iso_date(ended, "ended")
except ValueError as e:
return {"success": False, "error": str(e)}
_wal_log(