diff --git a/README.md b/README.md index acbeb14..8157fca 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,10 @@ system prompt: Two Claude Code hooks save periodically and before context compression: [mempalaceofficial.com/guide/hooks](https://mempalaceofficial.com/guide/hooks.html). +For per-message recall on top of the file-level chunks the hooks produce, +run `mempalace sweep ` periodically — it stores one +verbatim drawer per user/assistant message, idempotent and resume-safe. + --- ## Requirements diff --git a/hooks/README.md b/hooks/README.md index 7794527..469d231 100644 --- a/hooks/README.md +++ b/hooks/README.md @@ -67,7 +67,7 @@ Edit `mempal_save_hook.sh` to change: - **`SAVE_INTERVAL=15`** — How many human messages between saves. Lower = more frequent saves, higher = less interruption. - **`STATE_DIR`** — Where hook state is stored (defaults to `~/.mempalace/hook_state/`) -- **`MEMPAL_DIR`** — Optional. Set to a conversations directory to auto-run `mempalace mine ` on each save trigger. Leave blank (default) to let the AI handle saving via the block reason message. +- **`MEMPAL_DIR`** — Optional **project directory** (code, notes, docs) to also mine on each save trigger, with `--mode projects`. The hook ALWAYS mines the active conversation transcript automatically with `--mode convos` — `MEMPAL_DIR` is purely additive, never an override. Leave blank if you don't want to ingest project files. - **`MEMPALACE_PYTHON`** — Optional env var. Python interpreter with mempalace + chromadb installed. Auto-detects: `MEMPALACE_PYTHON` env var → repo `venv/bin/python3` → system `python3`. Set this if your venv is in a non-standard location. ### mempalace CLI diff --git a/hooks/mempal_precompact_hook.sh b/hooks/mempal_precompact_hook.sh index a14a0d0..e811d65 100755 --- a/hooks/mempal_precompact_hook.sh +++ b/hooks/mempal_precompact_hook.sh @@ -41,17 +41,18 @@ # to save everything. After the AI saves, compaction proceeds normally. # # === MEMPALACE CLI === -# This repo uses: mempalace mine -# or: mempalace mine --mode convos -# Set MEMPAL_DIR below if you want the hook to auto-ingest before compaction. -# Leave blank to rely on the AI's own save instructions. +# The hook ALWAYS mines the active conversation transcript synchronously +# before compaction (via `mempalace mine --mode convos`). +# MEMPAL_DIR is an *additional*, optional target for project files — it +# does not replace the conversation mine. STATE_DIR="$HOME/.mempalace/hook_state" mkdir -p "$STATE_DIR" -# Optional: set to the directory you want auto-ingested before compaction. -# Example: MEMPAL_DIR="$HOME/conversations" -# Leave empty to skip auto-ingest (AI handles saving via the block reason). +# Optional: project directory (code / notes / docs) to also mine before +# compaction. Mined with `--mode projects`. The conversation transcript +# is always mined regardless — this is purely additive. +# Example: MEMPAL_DIR="$HOME/projects/my_app" MEMPAL_DIR="" # Resolve the Python interpreter. Same contract as mempal_save_hook.sh: @@ -64,15 +65,57 @@ fi # Read JSON input from stdin INPUT=$(cat) -SESSION_ID=$(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null) +# Parse session_id and transcript_path in one call. Sanitize both, then +# read sanitized values from one-per-line stdout into shell variables — +# avoids ``eval`` on generated code (#1231 review). Same contract as +# mempal_save_hook.sh. +mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c " +import sys, json, re +data = json.load(sys.stdin) +sid = data.get('session_id', 'unknown') +tp = data.get('transcript_path', '') +safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s)) +print(safe(sid)) +print(safe(tp)) +" 2>/dev/null) +SESSION_ID="${_mempal_parsed[0]:-unknown}" +TRANSCRIPT_PATH="${_mempal_parsed[1]:-}" + +# Expand ~ in path +TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}" + +# Validate that TRANSCRIPT_PATH looks like a transcript file. Mirrors +# mempalace.hooks_cli._validate_transcript_path so the shell hook +# rejects the same shapes the Python hook rejects (#1231 review). +is_valid_transcript_path() { + local path="$1" + [ -n "$path" ] || return 1 + case "$path" in + *.json|*.jsonl) ;; + *) return 1 ;; + esac + case "/$path/" in + */../*) return 1 ;; + esac + return 0 +} echo "[$(date '+%H:%M:%S')] PRE-COMPACT triggered for session $SESSION_ID" >> "$STATE_DIR/hook.log" -# Optional: run mempalace ingest synchronously so memories land before compaction +# Run ingest synchronously so memories land before compaction. Two +# independent targets — both run if both are set: +# 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos +# 2. MEMPAL_DIR → --mode projects +if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then + mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \ + >> "$STATE_DIR/hook.log" 2>&1 +elif [ -n "$TRANSCRIPT_PATH" ]; then + echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \ + >> "$STATE_DIR/hook.log" +fi if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then - SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - REPO_DIR="$(dirname "$SCRIPT_DIR")" - mempalace mine "$MEMPAL_DIR" >> "$STATE_DIR/hook.log" 2>&1 + mempalace mine "$MEMPAL_DIR" --mode projects \ + >> "$STATE_DIR/hook.log" 2>&1 fi # Silent: return empty JSON to not block. "decision": "allow" is invalid — diff --git a/hooks/mempal_save_hook.sh b/hooks/mempal_save_hook.sh index 228b41b..5efd157 100755 --- a/hooks/mempal_save_hook.sh +++ b/hooks/mempal_save_hook.sh @@ -45,10 +45,10 @@ # stop_hook_active=true so we let it through. No infinite loop. # # === MEMPALACE CLI === -# This repo uses: mempalace mine -# or: mempalace mine --mode convos -# Set MEMPAL_DIR below if you want the hook to auto-ingest after blocking. -# Leave blank to rely on the AI's own save instructions. +# The hook ALWAYS mines the active conversation transcript automatically +# (via `mempalace mine --mode convos`). MEMPAL_DIR is an +# *additional*, optional target for project files — it does not replace +# the conversation mine. # # === CONFIGURATION === @@ -56,9 +56,10 @@ SAVE_INTERVAL=15 # Save every N human messages (adjust to taste) STATE_DIR="$HOME/.mempalace/hook_state" mkdir -p "$STATE_DIR" -# Optional: set to the directory you want auto-ingested on each save trigger. -# Example: MEMPAL_DIR="$HOME/conversations" -# Leave empty to skip auto-ingest (AI handles saving via the block reason). +# Optional: project directory (code / notes / docs) to also mine each +# save trigger. Mined with `--mode projects`. The conversation transcript +# is always mined regardless — this is purely additive. +# Example: MEMPAL_DIR="$HOME/projects/my_app" MEMPAL_DIR="" # Resolve the Python interpreter the hook should use. @@ -82,9 +83,11 @@ fi INPUT=$(cat) # Parse all fields in a single Python call (3x faster than separate invocations) -# SECURITY: All values are sanitized before being interpolated into shell assignments. -# stop_hook_active is coerced to a strict True/False to prevent command injection via eval. -eval $(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c " +# without invoking ``eval`` on generated code: Python prints one sanitized +# value per line, the shell reads them via ``mapfile`` and does plain +# variable assignment — same data, smaller blast radius if the sanitizer +# is ever bypassed (#1231 review). +mapfile -t _mempal_parsed < <(echo "$INPUT" | "$MEMPAL_PYTHON_BIN" -c " import sys, json, re data = json.load(sys.stdin) sid = data.get('session_id', 'unknown') @@ -94,14 +97,36 @@ tp = data.get('transcript_path', '') safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s)) # Coerce stop_hook_active to strict boolean string sha = 'True' if sha_raw is True or str(sha_raw).lower() in ('true', '1', 'yes') else 'False' -print(f'SESSION_ID=\"{safe(sid)}\"') -print(f'STOP_HOOK_ACTIVE=\"{sha}\"') -print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"') +print(safe(sid)) +print(sha) +print(safe(tp)) " 2>/dev/null) +SESSION_ID="${_mempal_parsed[0]:-unknown}" +STOP_HOOK_ACTIVE="${_mempal_parsed[1]:-False}" +TRANSCRIPT_PATH="${_mempal_parsed[2]:-}" # Expand ~ in path TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}" +# Validate that TRANSCRIPT_PATH looks like a transcript file: +# - non-empty +# - .jsonl or .json suffix +# - no traversal segments (.. components) +# Mirrors mempalace.hooks_cli._validate_transcript_path so the shell hook +# rejects the same shapes the Python hook rejects (#1231 review). +is_valid_transcript_path() { + local path="$1" + [ -n "$path" ] || return 1 + case "$path" in + *.json|*.jsonl) ;; + *) return 1 ;; + esac + case "/$path/" in + */../*) return 1 ;; + esac + return 0 +} + # If we're already in a save cycle, let the AI stop normally # This is the infinite-loop prevention: block once → AI saves → tries to stop again → we let it through if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then @@ -157,19 +182,23 @@ if [ "$SINCE_LAST" -ge "$SAVE_INTERVAL" ] && [ "$EXCHANGE_COUNT" -gt 0 ]; then echo "[$(date '+%H:%M:%S')] TRIGGERING SAVE at exchange $EXCHANGE_COUNT" >> "$STATE_DIR/hook.log" - # Auto-mine the transcript. Two paths: - # 1. TRANSCRIPT_PATH (from Claude Code) — mine the directory it lives in - # 2. MEMPAL_DIR (user-configured) — mine that directory - # At least one should work. If neither is set, nothing mines. - MINE_DIR="" - if [ -n "$TRANSCRIPT_PATH" ] && [ -f "$TRANSCRIPT_PATH" ]; then - MINE_DIR="$(dirname "$TRANSCRIPT_PATH")" + # Auto-mine. Two independent targets — both run if both are set: + # 1. TRANSCRIPT_PATH (from Claude Code) → parent dir, --mode convos + # (Claude Code session JSONL — must use the convo miner) + # 2. MEMPAL_DIR (user-configured project) → --mode projects + # (code, notes, docs) + # MEMPAL_DIR is *additive*, not an override: a user with MEMPAL_DIR + # pointed at their project still gets the active conversation mined. + if is_valid_transcript_path "$TRANSCRIPT_PATH" && [ -f "$TRANSCRIPT_PATH" ]; then + mempalace mine "$(dirname "$TRANSCRIPT_PATH")" --mode convos \ + >> "$STATE_DIR/hook.log" 2>&1 & + elif [ -n "$TRANSCRIPT_PATH" ]; then + echo "[$(date '+%H:%M:%S')] Skipping invalid transcript path: $TRANSCRIPT_PATH" \ + >> "$STATE_DIR/hook.log" fi if [ -n "$MEMPAL_DIR" ] && [ -d "$MEMPAL_DIR" ]; then - MINE_DIR="$MEMPAL_DIR" - fi - if [ -n "$MINE_DIR" ]; then - mempalace mine "$MINE_DIR" >> "$STATE_DIR/hook.log" 2>&1 & + mempalace mine "$MEMPAL_DIR" --mode projects \ + >> "$STATE_DIR/hook.log" 2>&1 & fi # MEMPAL_VERBOSE toggle: diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py index 49b77e2..d4f8317 100644 --- a/mempalace/hooks_cli.py +++ b/mempalace/hooks_cli.py @@ -197,27 +197,23 @@ def _output(data: dict): sys.stdout.buffer.flush() -def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]: - """Determine directory to mine and the miner mode to use. +def _get_mine_targets() -> list[tuple[str, str]]: + """Return the list of ``(dir, mode)`` targets for auto-ingest. - Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``. - Empty ``dir`` means no ingest should run. + MEMPAL_DIR (when set and resolvable) contributes a ``"projects"`` + target. Transcript ingestion is handled separately by + ``_ingest_transcript`` — emitting it here too would double-mine the + same JSONL into a different wing on every hook fire (#1231 review). - MEMPAL_DIR is treated as a project directory ("projects" mode). The - transcript-path fallback resolves to the parent of a Claude Code - session JSONL, which must be mined with the conversation miner — - running the projects miner there ingests JSONL as if it were source - code. + An empty list means no MEMPAL_DIR ingest should run. """ + targets: list[tuple[str, str]] = [] mempal_dir = os.environ.get("MEMPAL_DIR", "") if mempal_dir: resolved = Path(mempal_dir).expanduser().resolve() if resolved.is_dir(): - return str(resolved), "projects" - path = _validate_transcript_path(transcript_path) - if path is not None and path.is_file(): - return str(path.parent), "convos" - return "", "projects" + targets.append((str(resolved), "projects")) + return targets _MINE_PID_FILE = STATE_DIR / "mine.pid" @@ -274,37 +270,58 @@ def _spawn_mine(cmd: list) -> None: _MINE_PID_FILE.write_text(str(proc.pid)) -def _maybe_auto_ingest(transcript_path: str = ""): - """Run mempalace mine in background if a mine directory is available.""" - mine_dir, mode = _get_mine_dir(transcript_path) - if not mine_dir: +def _maybe_auto_ingest(): + """Background-mine MEMPAL_DIR (project files) if set. + + Transcript convos are ingested separately via ``_ingest_transcript`` + in the hook handlers — this function does not handle them, to avoid + asymmetric interpreter handling and PID-file overwrite when both + targets fire from a single hook call (#1231 review). + """ + targets = _get_mine_targets() + if not targets: return if _mine_already_running(): _log("Skipping auto-ingest: mine already running") return - try: - _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode]) - except OSError: - pass + for mine_dir, mode in targets: + try: + _spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode]) + except OSError: + pass -def _mine_sync(transcript_path: str = ""): - """Run mempalace mine synchronously (for precompact -- data must land first).""" - mine_dir, mode = _get_mine_dir(transcript_path) - if not mine_dir: +def _mine_sync(): + """Synchronously mine MEMPAL_DIR (precompact path). + + Transcript convos are ingested separately via ``_ingest_transcript`` + in ``hook_precompact`` — keeping them out of this function avoids + timeout stacking against the harness 30s ceiling (#1231 review). + """ + targets = _get_mine_targets() + if not targets: return - try: - STATE_DIR.mkdir(parents=True, exist_ok=True) - log_path = STATE_DIR / "hook.log" - with open(log_path, "a") as log_f: - subprocess.run( - [sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode], - stdout=log_f, - stderr=log_f, - timeout=60, - ) - except (OSError, subprocess.TimeoutExpired): - pass + STATE_DIR.mkdir(parents=True, exist_ok=True) + log_path = STATE_DIR / "hook.log" + for mine_dir, mode in targets: + try: + with open(log_path, "a") as log_f: + subprocess.run( + [ + _mempalace_python(), + "-m", + "mempalace", + "mine", + mine_dir, + "--mode", + mode, + ], + stdout=log_f, + stderr=log_f, + timeout=60, + ) + except (OSError, subprocess.TimeoutExpired): + pass def _desktop_toast(body: str, title: str = "MemPalace"): @@ -603,7 +620,7 @@ def hook_stop(data: dict, harness: str): transcript_path, session_id, wing=project_wing, toast=toast ) _ingest_transcript(transcript_path) - _maybe_auto_ingest(transcript_path) + _maybe_auto_ingest() # Only advance save marker after successful save count = result.get("count", 0) if count > 0: @@ -633,7 +650,7 @@ def hook_stop(data: dict, harness: str): pass if transcript_path: _ingest_transcript(transcript_path) - _maybe_auto_ingest(transcript_path) + _maybe_auto_ingest() reason = STOP_BLOCK_REASON + f" Write diary entry to wing={project_wing}." _output({"decision": "block", "reason": reason}) else: @@ -666,8 +683,10 @@ def hook_precompact(data: dict, harness: str): if transcript_path: _ingest_transcript(transcript_path) - # Mine synchronously so data lands before compaction proceeds - _mine_sync(transcript_path) + # Mine MEMPAL_DIR synchronously so project data lands before + # compaction proceeds. Transcript convos were already kicked off + # above via _ingest_transcript. + _mine_sync() _output({}) diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py index 6763439..1ceb530 100644 --- a/tests/test_hooks_cli.py +++ b/tests/test_hooks_cli.py @@ -12,7 +12,7 @@ from mempalace.hooks_cli import ( SAVE_INTERVAL, _count_human_messages, _extract_recent_messages, - _get_mine_dir, + _get_mine_targets, _log, _maybe_auto_ingest, _mempalace_python, @@ -450,35 +450,26 @@ def test_maybe_auto_ingest_with_env(tmp_path): assert cmd[cmd.index("--mode") + 1] == "projects" -def test_maybe_auto_ingest_with_transcript(tmp_path): - """Transcript fallback spawns mine in convos mode against the JSONL parent.""" - transcript = tmp_path / "t.jsonl" - transcript.write_text("") - with patch.dict("os.environ", {}, clear=True): +def test_maybe_auto_ingest_uses_mempalace_python(tmp_path): + """Spawned mine command uses _mempalace_python(), not bare sys.executable. + + Hook subprocesses inherit the harness PATH which on GUI-launched + Claude Code may resolve to a system Python without chromadb. The + interpreter used here must be the same one the hook itself runs + under (typically the venv that owns mempalace). + """ + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): - with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: - _maybe_auto_ingest(str(transcript)) - mock_popen.assert_called_once() - cmd = mock_popen.call_args[0][0] - assert "mine" in cmd - assert str(tmp_path.resolve()) in cmd - assert cmd[cmd.index("--mode") + 1] == "convos" - - -def test_mine_sync_with_transcript_uses_convos_mode(tmp_path): - """Precompact sync path also picks convos mode for JSONL transcripts.""" - transcript = tmp_path / "t.jsonl" - transcript.write_text("") - with patch.dict("os.environ", {}, clear=True): - with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): - with patch("mempalace.hooks_cli.subprocess.run") as mock_run: - _mine_sync(str(transcript)) - mock_run.assert_called_once() - cmd = mock_run.call_args[0][0] - assert "mine" in cmd - assert str(tmp_path.resolve()) in cmd - assert cmd[cmd.index("--mode") + 1] == "convos" + with patch( + "mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python" + ): + with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: + _maybe_auto_ingest() + cmd = mock_popen.call_args[0][0] + assert cmd[0] == "/fake/venv/python" def test_mine_sync_with_env_uses_projects_mode(tmp_path): @@ -494,6 +485,57 @@ def test_mine_sync_with_env_uses_projects_mode(tmp_path): assert cmd[cmd.index("--mode") + 1] == "projects" +def test_mine_sync_uses_mempalace_python(tmp_path): + """Sync mine command uses _mempalace_python(), not bare sys.executable.""" + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"): + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + _mine_sync() + cmd = mock_run.call_args[0][0] + assert cmd[0] == "/fake/venv/python" + + +def test_maybe_auto_ingest_ignores_transcript_arg_path(tmp_path): + """_maybe_auto_ingest does NOT mine the transcript directory. + + Transcript convos are handled by _ingest_transcript (called separately + in hook handlers). _maybe_auto_ingest only handles MEMPAL_DIR — even + when invoked in a context where a transcript is also being processed, + no second spawn for the transcript dir should appear here. + """ + convo_dir = tmp_path / "convos" + convo_dir.mkdir() + transcript = convo_dir / "session.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {}, clear=True): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"): + with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: + _maybe_auto_ingest() + mock_popen.assert_not_called() + + +def test_mine_sync_ignores_transcript(tmp_path): + """_mine_sync does not run a convos mine for the transcript dir. + + The precompact transcript ingest is the responsibility of + _ingest_transcript; routing it through _mine_sync would stack a + second 60s timeout against the harness 30s ceiling. + """ + convo_dir = tmp_path / "convos" + convo_dir.mkdir() + transcript = convo_dir / "session.jsonl" + transcript.write_text("") + with patch.dict("os.environ", {}, clear=True): + with patch("mempalace.hooks_cli.STATE_DIR", tmp_path): + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + _mine_sync() + mock_run.assert_not_called() + + def test_maybe_auto_ingest_oserror(tmp_path): """OSError during subprocess spawn is silenced.""" mempal_dir = tmp_path / "project" @@ -550,70 +592,78 @@ def test_mine_already_running_corrupt_file(tmp_path): assert _mine_already_running() is False -# --- _get_mine_dir --- +# --- _get_mine_targets --- -def test_get_mine_dir_mempal_dir(tmp_path): - """MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode.""" +def test_get_mine_targets_mempal_dir_only(tmp_path): + """MEMPAL_DIR alone yields a single projects target, expanded/resolved.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() - transcript = tmp_path / "t.jsonl" - transcript.write_text("") with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): - result_dir, result_mode = _get_mine_dir(str(transcript)) - assert Path(result_dir).resolve() == mempal_dir.resolve() - assert result_mode == "projects" + targets = _get_mine_targets() + assert len(targets) == 1 + assert Path(targets[0][0]).resolve() == mempal_dir.resolve() + assert targets[0][1] == "projects" -def test_get_mine_dir_mempal_dir_tilde(tmp_path): +def test_get_mine_targets_mempal_dir_tilde(tmp_path): """MEMPAL_DIR with a tilde prefix is expanded correctly.""" mempal_dir = tmp_path / "project" mempal_dir.mkdir() home = Path.home() - # Build a ~-relative path only if tmp_path is inside home try: rel = mempal_dir.relative_to(home) except ValueError: pytest.skip("tmp_path is not under home, cannot build ~-relative path") tilde_path = "~/" + str(rel) with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}): - result_dir, result_mode = _get_mine_dir("") - assert Path(result_dir).resolve() == mempal_dir.resolve() - assert result_mode == "projects" + targets = _get_mine_targets() + assert len(targets) == 1 + assert Path(targets[0][0]).resolve() == mempal_dir.resolve() + assert targets[0][1] == "projects" -def test_get_mine_dir_transcript_fallback(tmp_path): - """Transcript fallback resolves to its parent dir in convos mode.""" +def test_get_mine_targets_no_transcript_target(tmp_path): + """_get_mine_targets does not emit a convos target for the transcript path. + + Transcript ingestion is owned by _ingest_transcript; emitting it + here too would double-mine the same JSONL into a different wing on + every hook fire (#1231 review). + """ transcript = tmp_path / "t.jsonl" transcript.write_text("") with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir(str(transcript)) - assert Path(result_dir).resolve() == tmp_path.resolve() - assert result_mode == "convos" + targets = _get_mine_targets() + assert targets == [] -def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path): - """Transcript paths with '..' components are rejected and return no dir.""" +def test_get_mine_targets_only_returns_mempal_dir(tmp_path): + """When MEMPAL_DIR is set, exactly one projects target — never a convos target.""" + mempal_dir = tmp_path / "project" + mempal_dir.mkdir() + with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}): + targets = _get_mine_targets() + assert len(targets) == 1 + assert targets[0][1] == "projects" + + +def test_validate_transcript_path_traversal_rejected_jsonl(tmp_path): + """Path traversal is rejected even when the path has a .jsonl suffix. + + The pre-fix test used "../../etc/passwd" which lacks an extension and + so was rejected by the suffix gate before the traversal check ever + fired (Copilot review on #1231). Use a .jsonl path with `..` + segments to exercise the traversal guard specifically. + """ + assert _validate_transcript_path("../t.jsonl") is None + assert _validate_transcript_path("a/../b.jsonl") is None + assert _validate_transcript_path("/tmp/../etc/t.jsonl") is None + + +def test_get_mine_targets_empty(): + """Returns empty list when MEMPAL_DIR is unset or invalid.""" with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir("../../etc/passwd") - assert result_dir == "" - assert result_mode == "projects" - - -def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path): - """Transcript paths without .jsonl/.json extension are rejected.""" - bad = tmp_path / "notes.txt" - bad.write_text("content") - with patch.dict("os.environ", {}, clear=True): - result_dir, result_mode = _get_mine_dir(str(bad)) - assert result_dir == "" - assert result_mode == "projects" - - -def test_get_mine_dir_empty(): - """Returns empty dir when nothing is available.""" - with patch.dict("os.environ", {}, clear=True): - assert _get_mine_dir("") == ("", "projects") + assert _get_mine_targets() == [] # --- _parse_harness_input --- @@ -733,22 +783,33 @@ def test_precompact_with_timeout(tmp_path): def test_precompact_mines_transcript_dir(tmp_path, monkeypatch): - """Precompact mines transcript directory when no MEMPAL_DIR.""" + """Precompact ingests the active transcript via _ingest_transcript. + + With no MEMPAL_DIR, _mine_sync is a no-op; the transcript ingest is + the only mining that should fire, and it goes through Popen + (background) inside _ingest_transcript. Pre-#1231-review this test + asserted against subprocess.run, which corresponded to the + duplicate-mine path that has now been removed. + """ transcript = tmp_path / "t.jsonl" - transcript.write_text("") + # _ingest_transcript skips files smaller than 100 bytes, so pad it. + transcript.write_text("x" * 200) monkeypatch.delenv("MEMPAL_DIR", raising=False) - with patch("mempalace.hooks_cli.subprocess.run") as mock_run: - result = _capture_hook_output( - hook_precompact, - {"session_id": "test", "transcript_path": str(transcript)}, - state_dir=tmp_path, - ) + with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen: + with patch("mempalace.hooks_cli.subprocess.run") as mock_run: + result = _capture_hook_output( + hook_precompact, + {"session_id": "test", "transcript_path": str(transcript)}, + state_dir=tmp_path, + ) assert result == {} - mock_run.assert_called_once() - # Mine dir is the transcript's parent (resolved) and mode is convos for JSONL. - call_args = mock_run.call_args[0][0] - assert str(tmp_path.resolve()) in call_args - assert call_args[call_args.index("--mode") + 1] == "convos" + mock_run.assert_not_called() + mock_popen.assert_called_once() + cmd = mock_popen.call_args[0][0] + # Mines the transcript's parent dir as convos, into wing "sessions". + assert str(tmp_path) in cmd + assert cmd[cmd.index("--mode") + 1] == "convos" + assert cmd[cmd.index("--wing") + 1] == "sessions" # --- run_hook --- diff --git a/tests/test_save_hook_mines.py b/tests/test_save_hook_mines.py index a702a42..e11234a 100644 --- a/tests/test_save_hook_mines.py +++ b/tests/test_save_hook_mines.py @@ -9,6 +9,9 @@ Written BEFORE the fix. """ import os +import sys + +import pytest class TestSaveHookAutoMines: @@ -16,7 +19,8 @@ class TestSaveHookAutoMines: def test_hook_mines_transcript_path(self): """The hook receives TRANSCRIPT_PATH from Claude Code. - It should use that to mine the conversation, not depend on MEMPAL_DIR.""" + It should use that to mine the conversation as --mode convos, + independently of MEMPAL_DIR (which is for project files only).""" hook_path = os.path.join( os.path.dirname(os.path.dirname(__file__)), "hooks", @@ -24,23 +28,17 @@ class TestSaveHookAutoMines: ) src = open(hook_path).read() - # The hook ALREADY receives TRANSCRIPT_PATH in the JSON input. - # It should use this to mine the current session's transcript - # regardless of whether MEMPAL_DIR is set. - # The hook must have a path that uses TRANSCRIPT_PATH to determine - # what to mine, separate from the MEMPAL_DIR path. - uses_transcript = "TRANSCRIPT_PATH" in src - has_mine = "mempalace mine" in src - # TRANSCRIPT_PATH must appear in the mining logic, not just the parse block - transcript_drives_mine = "MINE_DIR" in src and "dirname" in src and "TRANSCRIPT_PATH" in src - - assert uses_transcript and has_mine and transcript_drives_mine, ( - "Save hook only mines when MEMPAL_DIR is set (defaults to empty). " - "The hook receives TRANSCRIPT_PATH from Claude Code — it should " - "mine that file automatically so conversations are saved without " - "the user setting an env var. Currently the hook says 'saved in " - "background' but nothing actually saves." - ) + # The hook must drive the conversation mine off TRANSCRIPT_PATH, + # using `dirname` to derive the parent dir, and tagging it with + # `--mode convos` so the convo miner runs (not the projects miner). + assert "TRANSCRIPT_PATH" in src, "hook must read transcript_path" + assert "mempalace mine" in src, "hook must invoke `mempalace mine`" + assert ( + 'dirname "$TRANSCRIPT_PATH"' in src + ), "hook must mine the transcript's parent directory" + assert ( + "--mode convos" in src + ), "transcript mine must use --mode convos, not the projects miner" def test_mempal_dir_default_not_empty(self): """If MEMPAL_DIR is still used, it should have a sensible default, @@ -66,3 +64,69 @@ class TestSaveHookAutoMines: 'MEMPAL_DIR defaults to "" which silently disables mining. ' "Either set a default path or add transcript-based mining." ) + + +class TestShellHookTranscriptValidation: + """Both shell hooks must validate transcript paths before mining them. + + Mirrors mempalace.hooks_cli._validate_transcript_path so unsafe paths + (no extension, traversal segments) are rejected at the shell layer + too — added in #1231 review (Copilot #7, #8). + """ + + @staticmethod + def _hook_src(name: str) -> str: + path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "hooks", name) + return open(path).read() + + @staticmethod + def _strip_comments(src: str) -> str: + return "\n".join(line for line in src.splitlines() if not line.lstrip().startswith("#")) + + def test_save_hook_defines_and_uses_validator(self): + src = self._strip_comments(self._hook_src("mempal_save_hook.sh")) + assert "is_valid_transcript_path() {" in src, "validator function must be defined" + assert ( + 'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src + ), "validator must be invoked against TRANSCRIPT_PATH before mining" + + def test_precompact_hook_defines_and_uses_validator(self): + src = self._strip_comments(self._hook_src("mempal_precompact_hook.sh")) + assert "is_valid_transcript_path() {" in src, "validator function must be defined" + assert ( + 'is_valid_transcript_path "$TRANSCRIPT_PATH"' in src + ), "validator must be invoked against TRANSCRIPT_PATH before mining" + + @pytest.mark.skipif( + sys.platform == "win32", + reason="shell hooks are POSIX-only; Windows CI bash maps to wsl.exe with no distro", + ) + def test_validators_run_via_bash(self, tmp_path): + """Source the validator out of each hook and exercise it directly.""" + import subprocess + + for name in ("mempal_save_hook.sh", "mempal_precompact_hook.sh"): + src = self._hook_src(name) + # Extract just the function definition (first occurrence). + start = src.index("is_valid_transcript_path() {") + end = src.index("\n}\n", start) + 2 + func_src = src[start:end] + script = tmp_path / "v.sh" + script.write_text( + f"{func_src}\n" 'is_valid_transcript_path "$1" && echo OK || echo NO\n' + ) + + def run(arg: str) -> str: + return subprocess.run( + ["bash", str(script), arg], + capture_output=True, + text=True, + check=False, + ).stdout.strip() + + assert run("/tmp/sessions/abc.jsonl") == "OK" + assert run("/tmp/sessions/abc.json") == "OK" + assert run("") == "NO" + assert run("/tmp/notes.txt") == "NO" + assert run("../etc/passwd.jsonl") == "NO" + assert run("/tmp/../etc/t.jsonl") == "NO"