diff --git a/hooks/README.md b/hooks/README.md index 7794527..469d231 100644 --- a/hooks/README.md +++ b/hooks/README.md @@ -67,7 +67,7 @@ Edit `mempal_save_hook.sh` to change: - **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption. - **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`) -- **`MEMPAL_DIR`** — Optional. Set to a conversations directory to auto-run `mempalace mine ` on each save trigger. Leave blank (default) to let the AI handle saving via the block reason message. +- **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos` — `MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files. - **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location. ### mempalace CLI diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh index a14a0d0..70377f6 100755 --- a/hooks/mempal_precompact_hook.sh +++ b/hooks/mempal_precompact_hook.sh @@ -41,17 +41,18 @@ # to save everything. After the AI saves, compaction proceeds normally. # # === MEMPALACE CLI === -# This repo uses: mempalace mine -# or: mempalace mine --mode convos -# Set MEMPAL_DIR below if you want the hook to auto-ingest before compaction. -# Leave blank to rely on the AI's own save instructions. +# The hook ALWAYS mines the active conversation transcript synchronously +# before compaction (via `mempalace mine --mode convos`). +# MEMPAL_DIR is an *additional*, optional target for project files — it +# does not replace the conversation mine. STATE_DIR="$HOME/.mempalace/hook_state" mkdir -p "$STATE_DIR" -# Optional: set to the directory you want auto-ingested before compaction. -# Example: MEMPAL_DIR="$HOME/conversations" -# Leave empty to skip auto-ingest (AI handles saving via the block reason). +# Optional: project directory (code / notes / docs) to also mine before +# compaction. Mined with `--mode projects`. The conversation transcript +# is always mined regardless — this is purely additive. +# Example: MEMPAL_DIR="$HOME/projects/my_app" MEMPAL_DIR="" # Resolve the Python interpreter. Same contract as mempal_save_hook.sh: @@ -64,15 +65,34 @@ fi # Read JSON input from stdin INPUT=$(cat) -SESSION_ID=$(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null) +# Parse session_id and transcript_path in one call. Sanitize both before +# interpolating into shell — same contract as mempal_save_hook.sh. +eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c " +import sys, json, re +data = json.load(sys.stdin) +sid = data.get('session_id', 'unknown') +tp = data.get('transcript_path', '') +safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s)) +print(f'SESSION_ID=\"{safe(sid)}\"') +print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"') +" 2>/dev/null) + +# Expand ~ in path +TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}" echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log" -# Optional: run mempalace ingest synchronously so memories land before compaction +# Run ingest synchronously so memories land before compaction. Two +# independent targets — both run if both are set: +# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos +# 2. MEMPAL_DIR → --mode projects +if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then + mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \ + >> "$STATE_DIR/hook.log" 2>&1 +fi if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - REPO_DIR="$(dirname "$SCRIPT_DIR")" - mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1 + mempalace mine "$MEMPAL_DIR" --mode projects \ + >> "$STATE_DIR/hook.log" 2>&1 fi # Silent: return empty JSON to not block. "decision": "allow" is invalid — diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh index 228b41b..d69ae85 100755 --- a/hooks/mempal_save_hook.sh +++ b/hooks/mempal_save_hook.sh @@ -45,10 +45,10 @@ # stop_hook_active=true so we let it through. No infinite loop. # # === MEMPALACE CLI === -# This repo uses: mempalace mine -# or: mempalace mine --mode convos -# Set MEMPAL_DIR below if you want the hook to auto-ingest after blocking. -# Leave blank to rely on the AI's own save instructions. +# The hook ALWAYS mines the active conversation transcript automatically +# (via `mempalace mine --mode convos`). MEMPAL_DIR is an +# *additional*, optional target for project files — it does not replace +# the conversation mine. # # === CONFIGURATION === @@ -56,9 +56,10 @@ SAVE_INTERVAL=15 # Save every N human messages (adjust to taste) STATE_DIR="$HOME/.mempalace/hook_state" mkdir -p "$STATE_DIR" -# Optional: set to the directory you want auto-ingested on each save trigger. -# Example: MEMPAL_DIR="$HOME/conversations" -# Leave empty to skip auto-ingest (AI handles saving via the block reason). +# Optional: project directory (code / notes / docs) to also mine each +# save trigger. Mined with `--mode projects`. The conversation transcript +# is always mined regardless — this is purely additive. +# Example: MEMPAL_DIR="$HOME/projects/my_app" MEMPAL_DIR="" # Resolve the Python interpreter the hook should use. @@ -157,19 +158,20 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log" - # Auto-mine the transcript. Two paths: - # 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in - # 2. MEMPAL_DIR (user-configured) — mine that directory - # At least one should work. If neither is set, nothing mines. - MINE_DIR="" + # Auto-mine. Two independent targets — both run if both are set: + # 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos + # (Claude Code session JSONL — must use the convo miner) + # 2. MEMPAL_DIR (user-configured project) → --mode projects + # (code, notes, docs) + # MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR + # pointed at their project still gets the active conversation mined. if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then - MINE_DIR="$(dirname "$TRANSCRIPT_PATH")" + mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \ + >> "$STATE_DIR/hook.log" 2>&1 & fi if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then - MINE_DIR="$MEMPAL_DIR" - fi - if [ -n "$MINE_DIR" ]; then - mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 & + mempalace mine "$MEMPAL_DIR" --mode projects \ + >> "$STATE_DIR/hook.log" 2>&1 & fi # MEMPAL_VERBOSE toggle: diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py index 49b77e2..0645c41 100644 --- a/mempalace/hooks_cli.py +++ b/mempalace/hooks_cli.py @@ -197,27 +197,27 @@ def _output(data: dict): sys.stdout.buffer.flush() -def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]: - """Determine directory to mine and the miner mode to use. +def _get_mine_targets(transcript_path: str = "") -> list[tuple[str, str]]: + """Return the list of ``(dir, mode)`` targets for auto-ingest. - Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``. - Empty ``dir`` means no ingest should run. + MEMPAL_DIR (when set and resolvable) contributes a ``"projects"`` + target. A valid transcript JSONL contributes a ``"convos"`` target + on its parent dir. Both may appear together — the hook runs one + ingest per target, so a user with MEMPAL_DIR pointed at their + project dir still gets the active conversation mined verbatim. - MEMPAL_DIR is treated as a project directory ("projects" mode). The - transcript-path fallback resolves to the parent of a Claude Code - session JSONL, which must be mined with the conversation miner — - running the projects miner there ingests JSONL as if it were source - code. + An empty list means no ingest should run. """ + targets: list[tuple[str, str]] = [] mempal_dir = os.environ.get("MEMPAL_DIR", "") if mempal_dir: resolved = Path(mempal_dir).expanduser().resolve() if resolved.is_dir(): - return str(resolved), "projects" + targets.append((str(resolved), "projects")) path = _validate_transcript_path(transcript_path) if path is not None and path.is_file(): - return str(path.parent), "convos" - return "", "projects" + targets.append((str(path.parent), "convos")) + return targets _MINE_PID_FILE = STATE_DIR / "mine.pid" @@ -275,36 +275,46 @@ def _spawn_mine(cmd: list) -> None: def _maybe_auto_ingest(transcript_path: str = ""): - """Run mempalace mine in background if a mine directory is available.""" - mine_dir, mode = _get_mine_dir(transcript_path) - if not mine_dir: + """Run mempalace mine in background for every available target.""" + targets = _get_mine_targets(transcript_path) + if not targets: return if _mine_already_running(): _log("Skipping auto-ingest: mine already running") return - try: - _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode]) - except OSError: - pass + for mine_dir, mode in targets: + try: + _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode]) + except OSError: + pass def _mine_sync(transcript_path: str = ""): - """Run mempalace mine synchronously (for precompact -- data must land first).""" - mine_dir, mode = _get_mine_dir(transcript_path) - if not mine_dir: + """Run mempalace mine synchronously for every target (precompact).""" + targets = _get_mine_targets(transcript_path) + if not targets: return - try: - STATE_DIR.mkdir(parents=True, exist_ok=True) - log_path = STATE_DIR / "hook.log" - with open(log_path, "a") as log_f: - subprocess.run( - [sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode], - stdout=log_f, - stderr=log_f, - timeout=60, - ) - except (OSError, subprocess.TimeoutExpired): - pass + STATE_DIR.mkdir(parents=True, exist_ok=True) + log_path = STATE_DIR / "hook.log" + for mine_dir, mode in targets: + try: + with open(log_path, "a") as log_f: + subprocess.run( + [ + sys.executable, + "-m", + "mempalace", + "mine", + mine_dir, + "--mode", + mode, + ], + stdout=log_f, + stderr=log_f, + timeout=60, + ) + except (OSError, subprocess.TimeoutExpired): + pass def _desktop_toast(body: str, title: str = "MemPalace"): diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py index 6763439..c105eae 100644 --- a/tests/test_hooks_cli.py +++ b/tests/test_hooks_cli.py @@ -12,7 +12,7 @@ from mempalace.hooks_cli import ( SAVE_INTERVAL, _count_human_messages, _extract_recent_messages, - _get_mine_dir, + _get_mine_targets, _log, _maybe_auto_ingest, _mempalace_python, @@ -494,6 +494,43 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path): assert cmd[cmd.index("--mode") + 1] == "projects" +def test_maybe_auto_ingest_with_both_set(tmp_path): + """When MEMPAL_DIR and a transcript are both set, BOTH spawns happen.""" + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + convo_dir = tmp_path / "convos" + convo_dir.mkdir() + transcript = convo_dir / "session.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): + with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: + _maybe_auto_ingest(str(transcript)) + assert mock_popen.call_count == 2 + cmds = [call.args[0] for call in mock_popen.call_args_list] + modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds} + assert modes == {"projects", "convos"} + + +def test_mine_sync_with_both_set(tmp_path): + """Precompact sync runs BOTH mines when MEMPAL_DIR + transcript are set.""" + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + convo_dir = tmp_path / "convos" + convo_dir.mkdir() + transcript = convo_dir / "session.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + _mine_sync(str(transcript)) + assert mock_run.call_count == 2 + cmds = [call.args[0] for call in mock_run.call_args_list] + modes = {cmd[cmd.index("--mode") + 1] for cmd in cmds} + assert modes == {"projects", "convos"} + + def test_maybe_auto_ingest_oserror(tmp_path): """OSError during subprocess spawn is silenced.""" mempal_dir = tmp_path / "project" @@ -550,70 +587,90 @@ def test_mine_already_running_corrupt_file(tmp_path): assert _mine_already_running() is False -# --- _get_mine_dir --- +# --- _get_mine_targets --- -def test_get_mine_dir_mempal_dir(tmp_path): - """MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode.""" +def test_get_mine_targets_mempal_dir_only(tmp_path): + """MEMPAL_DIR alone yields a single projects target, expanded/resolved.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() - transcript = tmp_path / "t.jsonl" - transcript.write_text("") with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): - result_dir, result_mode = _get_mine_dir(str(transcript)) - assert Path(result_dir).resolve() == mempal_dir.resolve() - assert result_mode == "projects" + targets = _get_mine_targets("") + assert len(targets) == 1 + assert Path(targets[0][0]).resolve() == mempal_dir.resolve() + assert targets[0][1] == "projects" -def test_get_mine_dir_mempal_dir_tilde(tmp_path): +def test_get_mine_targets_mempal_dir_tilde(tmp_path): """MEMPAL_DIR with a tilde prefix is expanded correctly.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() home = Path.home() - # Build a ~-relative path only if tmp_path is inside home try: rel = mempal_dir.relative_to(home) except ValueError: pytest.skip("tmp_path is not under home, cannot build ~-relative path") tilde_path = "~/" + str(rel) with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}): - result_dir, result_mode = _get_mine_dir("") - assert Path(result_dir).resolve() == mempal_dir.resolve() - assert result_mode == "projects" + targets = _get_mine_targets("") + assert len(targets) == 1 + assert Path(targets[0][0]).resolve() == mempal_dir.resolve() + assert targets[0][1] == "projects" -def test_get_mine_dir_transcript_fallback(tmp_path): - """Transcript fallback resolves to its parent dir in convos mode.""" +def test_get_mine_targets_transcript_only(tmp_path): + """A valid transcript JSONL alone yields a single convos target.""" transcript = tmp_path / "t.jsonl" transcript.write_text("") with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir(str(transcript)) - assert Path(result_dir).resolve() == tmp_path.resolve() - assert result_mode == "convos" + targets = _get_mine_targets(str(transcript)) + assert len(targets) == 1 + assert Path(targets[0][0]).resolve() == tmp_path.resolve() + assert targets[0][1] == "convos" -def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path): - """Transcript paths with '..' components are rejected and return no dir.""" +def test_get_mine_targets_both_set(tmp_path): + """When MEMPAL_DIR and a valid transcript are both set, BOTH targets appear. + + This is the regression that motivates the additive-targets design: + users who set MEMPAL_DIR previously had their conversations silently + skipped because MEMPAL_DIR overrode the transcript path. + """ + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + convo_dir = tmp_path / "convos" + convo_dir.mkdir() + transcript = convo_dir / "session.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + targets = _get_mine_targets(str(transcript)) + modes = {mode for _, mode in targets} + assert modes == {"projects", "convos"} + by_mode = {mode: Path(d) for d, mode in targets} + assert by_mode["projects"].resolve() == mempal_dir.resolve() + assert by_mode["convos"].resolve() == convo_dir.resolve() + + +def test_get_mine_targets_transcript_path_traversal_rejected(tmp_path): + """Transcript paths with '..' components are rejected (no convos target).""" with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir("../../etc/passwd") - assert result_dir == "" - assert result_mode == "projects" + targets = _get_mine_targets("../../etc/passwd") + assert targets == [] -def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path): +def test_get_mine_targets_transcript_non_jsonl_rejected(tmp_path): """Transcript paths without .jsonl/.json extension are rejected.""" bad = tmp_path / "notes.txt" bad.write_text("content") with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir(str(bad)) - assert result_dir == "" - assert result_mode == "projects" + targets = _get_mine_targets(str(bad)) + assert targets == [] -def test_get_mine_dir_empty(): - """Returns empty dir when nothing is available.""" +def test_get_mine_targets_empty(): + """Returns empty list when nothing is available.""" with patch.dict("os.environ", {}, clear=True): - assert _get_mine_dir("") == ("", "projects") + assert _get_mine_targets("") == [] # --- _parse_harness_input --- diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py index a702a42..93a51f2 100644 --- a/tests/test_save_hook_mines.py +++ b/tests/test_save_hook_mines.py @@ -16,7 +16,8 @@ class TestSaveHookAutoMines: def test_hook_mines_transcript_path(self): """The hook receives TRANSCRIPT_PATH from Claude Code. - It should use that to mine the conversation, not depend on MEMPAL_DIR.""" + It should use that to mine the conversation as --mode convos, + independently of MEMPAL_DIR (which is for project files only).""" hook_path = os.path.join( os.path.dirname(os.path.dirname(__file__)), "hooks", @@ -24,23 +25,17 @@ class TestSaveHookAutoMines: ) src = open(hook_path).read() - # The hook ALREADY receives TRANSCRIPT_PATH in the JSON input. - # It should use this to mine the current session's transcript - # regardless of whether MEMPAL_DIR is set. - # The hook must have a path that uses TRANSCRIPT_PATH to determine - # what to mine, separate from the MEMPAL_DIR path. - uses_transcript = "TRANSCRIPT_PATH" in src - has_mine = "mempalace mine" in src - # TRANSCRIPT_PATH must appear in the mining logic, not just the parse block - transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src - - assert uses_transcript and has_mine and transcript_drives_mine, ( - "Save hook only mines when MEMPAL_DIR is set (defaults to empty). " - "The hook receives TRANSCRIPT_PATH from Claude Code — it should " - "mine that file automatically so conversations are saved without " - "the user setting an env var. Currently the hook says 'saved in " - "background' but nothing actually saves." - ) + # The hook must drive the conversation mine off TRANSCRIPT_PATH, + # using `dirname` to derive the parent dir, and tagging it with + # `--mode convos` so the convo miner runs (not the projects miner). + assert "TRANSCRIPT_PATH" in src, "hook must read transcript_path" + assert "mempalace mine" in src, "hook must invoke `mempalace mine`" + assert ( + 'dirname "$TRANSCRIPT_PATH"' in src + ), "hook must mine the transcript's parent directory" + assert ( + "--mode convos" in src + ), "transcript mine must use --mode convos, not the projects miner" def test_mempal_dir_default_not_empty(self): """If MEMPAL_DIR is still used, it should have a sensible default,