fix(hooks): always mine the active transcript as convos, additive to MEMPAL_DIR
#1230 fixed --mode convos for the case where MEMPAL_DIR was unset, but left two configurations broken: - MEMPAL_DIR set to a project dir: convos never mined (MEMPAL_DIR overrode the transcript path); only project files were ingested. - MEMPAL_DIR set to a conversations dir per the old hooks/README: the projects miner ran on JSONL — same wrong-miner behaviour. The shell hooks (mempal_save_hook.sh, mempal_precompact_hook.sh) had the same MEMPAL_DIR-overrides-transcript bug AND were missing --mode on every spawned `mempalace mine` call. Make the auto-ingest *additive*. _get_mine_dir → _get_mine_targets, returning a list of (dir, mode) pairs: - MEMPAL_DIR (when valid) contributes (dir, "projects") - A valid transcript JSONL contributes (parent, "convos") - Both can appear together; the hook spawns one ingest per target Same change applied to the shell save and precompact hooks. Precompact also gained transcript_path parsing so it can run the convos mine synchronously before context is compressed. hooks/README.md updated to describe MEMPAL_DIR as a project-files target, never a convos target.
This commit is contained in:
+88
-31
@@ -12,7 +12,7 @@ from mempalace.hooks_cli import (
|
||||
SAVE_INTERVAL,
|
||||
_count_human_messages,
|
||||
_extract_recent_messages,
|
||||
_get_mine_dir,
|
||||
_get_mine_targets,
|
||||
_log,
|
||||
_maybe_auto_ingest,
|
||||
_mempalace_python,
|
||||
@@ -494,6 +494,43 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path):
|
||||
assert cmd[cmd.index("--mode") + 1] == "projects"
|
||||
|
||||
|
||||
def test_maybe_auto_ingest_with_both_set(tmp_path):
|
||||
"""When MEMPAL_DIR and a transcript are both set, BOTH spawns happen."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
convo_dir = tmp_path / "convos"
|
||||
convo_dir.mkdir()
|
||||
transcript = convo_dir / "session.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||
_maybe_auto_ingest(str(transcript))
|
||||
assert mock_popen.call_count == 2
|
||||
cmds = [call.args[0] for call in mock_popen.call_args_list]
|
||||
modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
|
||||
assert modes == {"projects", "convos"}
|
||||
|
||||
|
||||
def test_mine_sync_with_both_set(tmp_path):
|
||||
"""Precompact sync runs BOTH mines when MEMPAL_DIR + transcript are set."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
convo_dir = tmp_path / "convos"
|
||||
convo_dir.mkdir()
|
||||
transcript = convo_dir / "session.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||
with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
|
||||
_mine_sync(str(transcript))
|
||||
assert mock_run.call_count == 2
|
||||
cmds = [call.args[0] for call in mock_run.call_args_list]
|
||||
modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds}
|
||||
assert modes == {"projects", "convos"}
|
||||
|
||||
|
||||
def test_maybe_auto_ingest_oserror(tmp_path):
|
||||
"""OSError during subprocess spawn is silenced."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
@@ -550,70 +587,90 @@ def test_mine_already_running_corrupt_file(tmp_path):
|
||||
assert _mine_already_running() is False
|
||||
|
||||
|
||||
# --- _get_mine_dir ---
|
||||
# --- _get_mine_targets ---
|
||||
|
||||
|
||||
def test_get_mine_dir_mempal_dir(tmp_path):
|
||||
"""MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode."""
|
||||
def test_get_mine_targets_mempal_dir_only(tmp_path):
|
||||
"""MEMPAL_DIR alone yields a single projects target, expanded/resolved."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
result_dir, result_mode = _get_mine_dir(str(transcript))
|
||||
assert Path(result_dir).resolve() == mempal_dir.resolve()
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets("")
|
||||
assert len(targets) == 1
|
||||
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
|
||||
assert targets[0][1] == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_mempal_dir_tilde(tmp_path):
|
||||
def test_get_mine_targets_mempal_dir_tilde(tmp_path):
|
||||
"""MEMPAL_DIR with a tilde prefix is expanded correctly."""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
home = Path.home()
|
||||
# Build a ~-relative path only if tmp_path is inside home
|
||||
try:
|
||||
rel = mempal_dir.relative_to(home)
|
||||
except ValueError:
|
||||
pytest.skip("tmp_path is not under home, cannot build ~-relative path")
|
||||
tilde_path = "~/" + str(rel)
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}):
|
||||
result_dir, result_mode = _get_mine_dir("")
|
||||
assert Path(result_dir).resolve() == mempal_dir.resolve()
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets("")
|
||||
assert len(targets) == 1
|
||||
assert Path(targets[0][0]).resolve() == mempal_dir.resolve()
|
||||
assert targets[0][1] == "projects"
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_fallback(tmp_path):
|
||||
"""Transcript fallback resolves to its parent dir in convos mode."""
|
||||
def test_get_mine_targets_transcript_only(tmp_path):
|
||||
"""A valid transcript JSONL alone yields a single convos target."""
|
||||
transcript = tmp_path / "t.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir(str(transcript))
|
||||
assert Path(result_dir).resolve() == tmp_path.resolve()
|
||||
assert result_mode == "convos"
|
||||
targets = _get_mine_targets(str(transcript))
|
||||
assert len(targets) == 1
|
||||
assert Path(targets[0][0]).resolve() == tmp_path.resolve()
|
||||
assert targets[0][1] == "convos"
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path):
|
||||
"""Transcript paths with '..' components are rejected and return no dir."""
|
||||
def test_get_mine_targets_both_set(tmp_path):
|
||||
"""When MEMPAL_DIR and a valid transcript are both set, BOTH targets appear.
|
||||
|
||||
This is the regression that motivates the additive-targets design:
|
||||
users who set MEMPAL_DIR previously had their conversations silently
|
||||
skipped because MEMPAL_DIR overrode the transcript path.
|
||||
"""
|
||||
mempal_dir = tmp_path / "project"
|
||||
mempal_dir.mkdir()
|
||||
convo_dir = tmp_path / "convos"
|
||||
convo_dir.mkdir()
|
||||
transcript = convo_dir / "session.jsonl"
|
||||
transcript.write_text("")
|
||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||
targets = _get_mine_targets(str(transcript))
|
||||
modes = {mode for _, mode in targets}
|
||||
assert modes == {"projects", "convos"}
|
||||
by_mode = {mode: Path(d) for d, mode in targets}
|
||||
assert by_mode["projects"].resolve() == mempal_dir.resolve()
|
||||
assert by_mode["convos"].resolve() == convo_dir.resolve()
|
||||
|
||||
|
||||
def test_get_mine_targets_transcript_path_traversal_rejected(tmp_path):
|
||||
"""Transcript paths with '..' components are rejected (no convos target)."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir("../../etc/passwd")
|
||||
assert result_dir == ""
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets("../../etc/passwd")
|
||||
assert targets == []
|
||||
|
||||
|
||||
def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path):
|
||||
def test_get_mine_targets_transcript_non_jsonl_rejected(tmp_path):
|
||||
"""Transcript paths without .jsonl/.json extension are rejected."""
|
||||
bad = tmp_path / "notes.txt"
|
||||
bad.write_text("content")
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result_dir, result_mode = _get_mine_dir(str(bad))
|
||||
assert result_dir == ""
|
||||
assert result_mode == "projects"
|
||||
targets = _get_mine_targets(str(bad))
|
||||
assert targets == []
|
||||
|
||||
|
||||
def test_get_mine_dir_empty():
|
||||
"""Returns empty dir when nothing is available."""
|
||||
def test_get_mine_targets_empty():
|
||||
"""Returns empty list when nothing is available."""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
assert _get_mine_dir("") == ("", "projects")
|
||||
assert _get_mine_targets("") == []
|
||||
|
||||
|
||||
# --- _parse_harness_input ---
|
||||
|
||||
@@ -16,7 +16,8 @@ class TestSaveHookAutoMines:
|
||||
|
||||
def test_hook_mines_transcript_path(self):
|
||||
"""The hook receives TRANSCRIPT_PATH from Claude Code.
|
||||
It should use that to mine the conversation, not depend on MEMPAL_DIR."""
|
||||
It should use that to mine the conversation as --mode convos,
|
||||
independently of MEMPAL_DIR (which is for project files only)."""
|
||||
hook_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)),
|
||||
"hooks",
|
||||
@@ -24,23 +25,17 @@ class TestSaveHookAutoMines:
|
||||
)
|
||||
src = open(hook_path).read()
|
||||
|
||||
# The hook ALREADY receives TRANSCRIPT_PATH in the JSON input.
|
||||
# It should use this to mine the current session's transcript
|
||||
# regardless of whether MEMPAL_DIR is set.
|
||||
# The hook must have a path that uses TRANSCRIPT_PATH to determine
|
||||
# what to mine, separate from the MEMPAL_DIR path.
|
||||
uses_transcript = "TRANSCRIPT_PATH" in src
|
||||
has_mine = "mempalace mine" in src
|
||||
# TRANSCRIPT_PATH must appear in the mining logic, not just the parse block
|
||||
transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src
|
||||
|
||||
assert uses_transcript and has_mine and transcript_drives_mine, (
|
||||
"Save hook only mines when MEMPAL_DIR is set (defaults to empty). "
|
||||
"The hook receives TRANSCRIPT_PATH from Claude Code — it should "
|
||||
"mine that file automatically so conversations are saved without "
|
||||
"the user setting an env var. Currently the hook says 'saved in "
|
||||
"background' but nothing actually saves."
|
||||
)
|
||||
# The hook must drive the conversation mine off TRANSCRIPT_PATH,
|
||||
# using `dirname` to derive the parent dir, and tagging it with
|
||||
# `--mode convos` so the convo miner runs (not the projects miner).
|
||||
assert "TRANSCRIPT_PATH" in src, "hook must read transcript_path"
|
||||
assert "mempalace mine" in src, "hook must invoke `mempalace mine`"
|
||||
assert (
|
||||
'dirname "$TRANSCRIPT_PATH"' in src
|
||||
), "hook must mine the transcript's parent directory"
|
||||
assert (
|
||||
"--mode convos" in src
|
||||
), "transcript mine must use --mode convos, not the projects miner"
|
||||
|
||||
def test_mempal_dir_default_not_empty(self):
|
||||
"""If MEMPAL_DIR is still used, it should have a sensible default,
|
||||
|
||||
Reference in New Issue
Block a user