abe85763d4
Per qodo-ai review on PR #1167: sanitize_iso_date() previously accepted YYYY and YYYY-MM, but KnowledgeGraph.query_entity() compares valid_from/ valid_to TEXT columns lexicographically against as_of. Lexicographic comparison treats '2026-01-01' as greater than '2026' (because '-' > end-of-string), so partial as_of values silently excluded valid facts — re-introducing the silent-empty-results problem this PR was meant to fix. Tighten _ISO_DATE_RE to require YYYY-MM-DD only. Update docstring and error message accordingly. Invert the two test cases that asserted partials were accepted.
287 lines
8.2 KiB
Python
287 lines
8.2 KiB
Python
import os
|
|
import json
|
|
import tempfile
|
|
|
|
import pytest
|
|
from mempalace.config import (
|
|
MempalaceConfig,
|
|
normalize_wing_name,
|
|
sanitize_iso_date,
|
|
sanitize_kg_value,
|
|
sanitize_name,
|
|
)
|
|
|
|
|
|
def test_default_config():
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert "palace" in cfg.palace_path
|
|
assert cfg.collection_name == "mempalace_drawers"
|
|
|
|
|
|
def test_config_from_file():
|
|
tmpdir = tempfile.mkdtemp()
|
|
with open(os.path.join(tmpdir, "config.json"), "w") as f:
|
|
json.dump({"palace_path": "/custom/palace"}, f)
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
assert cfg.palace_path == "/custom/palace"
|
|
|
|
|
|
def test_embedding_device_defaults_to_auto(monkeypatch):
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert cfg.embedding_device == "auto"
|
|
|
|
|
|
def test_embedding_device_from_config_is_normalized(tmp_path, monkeypatch):
|
|
monkeypatch.delenv("MEMPALACE_EMBEDDING_DEVICE", raising=False)
|
|
with open(tmp_path / "config.json", "w") as f:
|
|
json.dump({"embedding_device": " CUDA "}, f)
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
|
assert cfg.embedding_device == "cuda"
|
|
|
|
|
|
def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
|
|
with open(tmp_path / "config.json", "w") as f:
|
|
json.dump({"embedding_device": "cpu"}, f)
|
|
monkeypatch.setenv("MEMPALACE_EMBEDDING_DEVICE", " CoreML ")
|
|
|
|
cfg = MempalaceConfig(config_dir=str(tmp_path))
|
|
assert cfg.embedding_device == "coreml"
|
|
|
|
|
|
def test_env_override():
|
|
raw = "/env/palace"
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
# palace_path normalizes with abspath + expanduser to match the
|
|
# --palace CLI code path. On Unix that's a no-op for "/env/palace";
|
|
# on Windows abspath prepends the current drive letter.
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_expanduser():
|
|
# Tilde must be expanded to match the --palace CLI code path. We don't
|
|
# assert "~" is absent from the final string because Windows 8.3 short
|
|
# paths (e.g. C:\Users\RUNNER~1\...) legitimately contain tildes — the
|
|
# equality check is authoritative.
|
|
raw = os.path.join("~", "mempalace-test")
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
assert cfg.palace_path.endswith("mempalace-test")
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_abspath_collapses_traversal():
|
|
# Build a raw path with a .. segment using the platform separator so
|
|
# the assertion is portable (Windows uses \, POSIX uses /).
|
|
raw = os.path.join(tempfile.gettempdir(), "palace", "..", "mempalace-test")
|
|
expected = os.path.abspath(os.path.expanduser(raw))
|
|
os.environ["MEMPALACE_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
# .. segments must be collapsed, not preserved literally.
|
|
assert ".." not in cfg.palace_path
|
|
assert cfg.palace_path == expected
|
|
finally:
|
|
del os.environ["MEMPALACE_PALACE_PATH"]
|
|
|
|
|
|
def test_env_path_legacy_alias_normalized():
|
|
# Legacy MEMPAL_PALACE_PATH gets the same normalization treatment as
|
|
# MEMPALACE_PALACE_PATH. We don't assert "~" is absent from the final
|
|
# string because Windows 8.3 short paths (e.g. C:\Users\RUNNER~1\...)
|
|
# legitimately contain tildes — the equality check below is authoritative.
|
|
os.environ.pop("MEMPALACE_PALACE_PATH", None)
|
|
raw = os.path.join("~", "legacy-alias", "..", "mempalace-test")
|
|
os.environ["MEMPAL_PALACE_PATH"] = raw
|
|
try:
|
|
cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
|
|
assert ".." not in cfg.palace_path
|
|
assert cfg.palace_path == os.path.abspath(os.path.expanduser(raw))
|
|
finally:
|
|
del os.environ["MEMPAL_PALACE_PATH"]
|
|
|
|
|
|
def test_init():
|
|
tmpdir = tempfile.mkdtemp()
|
|
cfg = MempalaceConfig(config_dir=tmpdir)
|
|
cfg.init()
|
|
assert os.path.exists(os.path.join(tmpdir, "config.json"))
|
|
|
|
|
|
# --- normalize_wing_name ---
|
|
|
|
|
|
def test_normalize_wing_name_hyphen():
|
|
assert normalize_wing_name("mempal-private") == "mempal_private"
|
|
|
|
|
|
def test_normalize_wing_name_space():
|
|
assert normalize_wing_name("My Project") == "my_project"
|
|
|
|
|
|
def test_normalize_wing_name_already_clean():
|
|
assert normalize_wing_name("memorymark") == "memorymark"
|
|
|
|
|
|
def test_normalize_wing_name_mixed():
|
|
assert normalize_wing_name("My-Cool App") == "my_cool_app"
|
|
|
|
|
|
# --- sanitize_name ---
|
|
|
|
|
|
def test_sanitize_name_ascii():
|
|
assert sanitize_name("hello") == "hello"
|
|
|
|
|
|
def test_sanitize_name_latvian():
|
|
assert sanitize_name("Jānis") == "Jānis"
|
|
|
|
|
|
def test_sanitize_name_cjk():
|
|
assert sanitize_name("太郎") == "太郎"
|
|
|
|
|
|
def test_sanitize_name_cyrillic():
|
|
assert sanitize_name("Алексей") == "Алексей"
|
|
|
|
|
|
def test_sanitize_name_rejects_leading_underscore():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("_foo")
|
|
|
|
|
|
def test_sanitize_name_rejects_path_traversal():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("../etc/passwd")
|
|
|
|
|
|
def test_sanitize_name_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_name("")
|
|
|
|
|
|
# --- sanitize_kg_value ---
|
|
|
|
|
|
def test_kg_value_accepts_commas():
|
|
assert sanitize_kg_value("Alice, Bob, and Carol") == "Alice, Bob, and Carol"
|
|
|
|
|
|
def test_kg_value_accepts_colons():
|
|
assert sanitize_kg_value("role: engineer") == "role: engineer"
|
|
|
|
|
|
def test_kg_value_accepts_parentheses():
|
|
assert sanitize_kg_value("Python (programming)") == "Python (programming)"
|
|
|
|
|
|
def test_kg_value_accepts_slashes():
|
|
assert sanitize_kg_value("owner/repo") == "owner/repo"
|
|
|
|
|
|
def test_kg_value_accepts_hash():
|
|
assert sanitize_kg_value("issue #123") == "issue #123"
|
|
|
|
|
|
def test_kg_value_accepts_unicode():
|
|
assert sanitize_kg_value("Jānis Bērziņš") == "Jānis Bērziņš"
|
|
|
|
|
|
def test_kg_value_strips_whitespace():
|
|
assert sanitize_kg_value(" hello ") == "hello"
|
|
|
|
|
|
def test_kg_value_rejects_empty():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("")
|
|
|
|
|
|
def test_kg_value_rejects_whitespace_only():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value(" ")
|
|
|
|
|
|
def test_kg_value_rejects_null_bytes():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("hello\x00world")
|
|
|
|
|
|
def test_kg_value_rejects_over_length():
|
|
with pytest.raises(ValueError):
|
|
sanitize_kg_value("a" * 129)
|
|
|
|
|
|
# --- sanitize_iso_date ---
|
|
|
|
|
|
def test_iso_date_rejects_year_only():
|
|
# Partial dates re-introduce silent empty result sets via lexicographic
|
|
# TEXT comparison in KG queries (e.g. "2026-01-01" <= "2026" is False).
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026")
|
|
|
|
|
|
def test_iso_date_rejects_year_month():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026-03")
|
|
|
|
|
|
def test_iso_date_accepts_full_date():
|
|
assert sanitize_iso_date("2026-03-15") == "2026-03-15"
|
|
|
|
|
|
def test_iso_date_passes_through_none():
|
|
assert sanitize_iso_date(None) is None
|
|
|
|
|
|
def test_iso_date_passes_through_empty_string():
|
|
assert sanitize_iso_date("") == ""
|
|
|
|
|
|
def test_iso_date_strips_whitespace():
|
|
assert sanitize_iso_date(" 2026-03-15 ") == "2026-03-15"
|
|
|
|
|
|
def test_iso_date_rejects_natural_language():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("March 2026")
|
|
|
|
|
|
def test_iso_date_rejects_abbreviated_month():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("Jan 2025")
|
|
|
|
|
|
def test_iso_date_rejects_us_format():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("03/15/2026")
|
|
|
|
|
|
def test_iso_date_rejects_invalid_month():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026-13")
|
|
|
|
|
|
def test_iso_date_rejects_invalid_day():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date("2026-02-32")
|
|
|
|
|
|
def test_iso_date_rejects_non_string():
|
|
with pytest.raises(ValueError):
|
|
sanitize_iso_date(20260315)
|
|
|
|
|
|
def test_iso_date_error_names_field():
|
|
with pytest.raises(ValueError, match="valid_from"):
|
|
sanitize_iso_date("yesterday", "valid_from")
|