From 1e3e89a78fe7e0ba15c3b8806c25dcbd19c8491f Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 26 Apr 2026 23:25:12 -0300
Subject: [PATCH 1/2] fix(hooks): pass --mode convos when mining a Claude Code
transcript dir
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Stop and PreCompact hooks spawn `mempalace mine
` with no
`--mode` flag, which defaults to `projects` in cli.py. When MEMPAL_DIR
is unset, _get_mine_dir falls back to the parent of the transcript
JSONL — and miner.py's READABLE_EXTENSIONS includes `.jsonl`, so the
projects miner happily ingests Claude Code session JSONL as if it were
source code instead of conversation.
Make _get_mine_dir return (dir, mode): MEMPAL_DIR keeps `projects`,
the JSONL fallback yields `convos`. Both _maybe_auto_ingest and
_mine_sync now thread the mode into the spawned command.
---
mempalace/hooks_cli.py | 28 +++++++++++++-------
tests/test_hooks_cli.py | 58 ++++++++++++++++++++++++++++++++++-------
2 files changed, 67 insertions(+), 19 deletions(-)
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 01eca3f..e7b3981 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -197,16 +197,26 @@ def _output(data: dict):
sys.stdout.buffer.flush()
-def _get_mine_dir(transcript_path: str = "") -> str:
- """Determine directory to mine from MEMPAL_DIR or transcript path."""
+def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]:
+ """Determine directory to mine and the miner mode to use.
+
+ Returns ``(dir, mode)`` where ``mode`` is ``"projects"`` or ``"convos"``.
+ Empty ``dir`` means no ingest should run.
+
+ MEMPAL_DIR is treated as a project directory ("projects" mode). The
+ transcript-path fallback resolves to the parent of a Claude Code
+ session JSONL, which must be mined with the conversation miner —
+ running the projects miner there ingests JSONL as if it were source
+ code.
+ """
mempal_dir = os.environ.get("MEMPAL_DIR", "")
if mempal_dir and os.path.isdir(mempal_dir):
- return mempal_dir
+ return mempal_dir, "projects"
if transcript_path:
path = Path(transcript_path).expanduser()
if path.is_file():
- return str(path.parent)
- return ""
+ return str(path.parent), "convos"
+ return "", "projects"
_MINE_PID_FILE = STATE_DIR / "mine.pid"
@@ -265,21 +275,21 @@ def _spawn_mine(cmd: list) -> None:
def _maybe_auto_ingest(transcript_path: str = ""):
"""Run mempalace mine in background if a mine directory is available."""
- mine_dir = _get_mine_dir(transcript_path)
+ mine_dir, mode = _get_mine_dir(transcript_path)
if not mine_dir:
return
if _mine_already_running():
_log("Skipping auto-ingest: mine already running")
return
try:
- _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir])
+ _spawn_mine([sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode])
except OSError:
pass
def _mine_sync(transcript_path: str = ""):
"""Run mempalace mine synchronously (for precompact -- data must land first)."""
- mine_dir = _get_mine_dir(transcript_path)
+ mine_dir, mode = _get_mine_dir(transcript_path)
if not mine_dir:
return
try:
@@ -287,7 +297,7 @@ def _mine_sync(transcript_path: str = ""):
log_path = STATE_DIR / "hook.log"
with open(log_path, "a") as log_f:
subprocess.run(
- [sys.executable, "-m", "mempalace", "mine", mine_dir],
+ [sys.executable, "-m", "mempalace", "mine", mine_dir, "--mode", mode],
stdout=log_f,
stderr=log_f,
timeout=60,
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index c9a0022..7a19dda 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -17,6 +17,7 @@ from mempalace.hooks_cli import (
_maybe_auto_ingest,
_mempalace_python,
_mine_already_running,
+ _mine_sync,
_parse_harness_input,
_sanitize_session_id,
_validate_transcript_path,
@@ -434,7 +435,7 @@ def test_maybe_auto_ingest_no_env(tmp_path):
def test_maybe_auto_ingest_with_env(tmp_path):
- """With MEMPAL_DIR set to a valid directory, spawns subprocess."""
+ """With MEMPAL_DIR set, spawns mine in projects mode against that dir."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
@@ -443,10 +444,14 @@ def test_maybe_auto_ingest_with_env(tmp_path):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_maybe_auto_ingest()
mock_popen.assert_called_once()
+ cmd = mock_popen.call_args[0][0]
+ assert "mine" in cmd
+ assert str(mempal_dir) in cmd
+ assert cmd[cmd.index("--mode") + 1] == "projects"
def test_maybe_auto_ingest_with_transcript(tmp_path):
- """Falls back to transcript directory when MEMPAL_DIR is not set."""
+ """Transcript fallback spawns mine in convos mode against the JSONL parent."""
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {}, clear=True):
@@ -455,6 +460,38 @@ def test_maybe_auto_ingest_with_transcript(tmp_path):
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
_maybe_auto_ingest(str(transcript))
mock_popen.assert_called_once()
+ cmd = mock_popen.call_args[0][0]
+ assert "mine" in cmd
+ assert str(tmp_path) in cmd
+ assert cmd[cmd.index("--mode") + 1] == "convos"
+
+
+def test_mine_sync_with_transcript_uses_convos_mode(tmp_path):
+ """Precompact sync path also picks convos mode for JSONL transcripts."""
+ transcript = tmp_path / "t.jsonl"
+ transcript.write_text("")
+ with patch.dict("os.environ", {}, clear=True):
+ with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+ with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
+ _mine_sync(str(transcript))
+ mock_run.assert_called_once()
+ cmd = mock_run.call_args[0][0]
+ assert "mine" in cmd
+ assert str(tmp_path) in cmd
+ assert cmd[cmd.index("--mode") + 1] == "convos"
+
+
+def test_mine_sync_with_env_uses_projects_mode(tmp_path):
+ """Precompact sync path uses projects mode when MEMPAL_DIR is set."""
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
+ with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+ with patch("mempalace.hooks_cli.subprocess.run") as mock_run:
+ _mine_sync()
+ mock_run.assert_called_once()
+ cmd = mock_run.call_args[0][0]
+ assert cmd[cmd.index("--mode") + 1] == "projects"
def test_maybe_auto_ingest_oserror(tmp_path):
@@ -517,27 +554,27 @@ def test_mine_already_running_corrupt_file(tmp_path):
def test_get_mine_dir_mempal_dir(tmp_path):
- """MEMPAL_DIR takes priority over transcript_path."""
+ """MEMPAL_DIR takes priority and is treated as projects mode."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
- assert _get_mine_dir(str(transcript)) == str(mempal_dir)
+ assert _get_mine_dir(str(transcript)) == (str(mempal_dir), "projects")
def test_get_mine_dir_transcript_fallback(tmp_path):
- """Falls back to transcript parent dir when MEMPAL_DIR is not set."""
+ """Transcript fallback resolves to its parent dir in convos mode."""
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {}, clear=True):
- assert _get_mine_dir(str(transcript)) == str(tmp_path)
+ assert _get_mine_dir(str(transcript)) == (str(tmp_path), "convos")
def test_get_mine_dir_empty():
- """Returns empty string when nothing is available."""
+ """Returns empty dir when nothing is available."""
with patch.dict("os.environ", {}, clear=True):
- assert _get_mine_dir("") == ""
+ assert _get_mine_dir("") == ("", "projects")
# --- _parse_harness_input ---
@@ -669,9 +706,10 @@ def test_precompact_mines_transcript_dir(tmp_path, monkeypatch):
)
assert result == {}
mock_run.assert_called_once()
- # Verify mine dir is the transcript's parent
+ # Mine dir is the transcript's parent and mode is convos for JSONL.
call_args = mock_run.call_args[0][0]
- assert str(tmp_path) in call_args[-1]
+ assert str(tmp_path) in call_args
+ assert call_args[call_args.index("--mode") + 1] == "convos"
# --- run_hook ---
From 6a8beef604b5c614e842d6350750eaae1d431f55 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 27 Apr 2026 02:40:01 +0000
Subject: [PATCH 2/2] fix(hooks): harden _get_mine_dir path validation
- Normalize MEMPAL_DIR via Path.expanduser().resolve() so ~/proj paths
are correctly accepted instead of falling through to transcript fallback
- Replace bare Path.expanduser().is_file() transcript check with the
existing _validate_transcript_path() which adds .resolve(), enforces
.jsonl/.json extension, and rejects '..' path-traversal components
- Update tests to compare resolved paths (cross-platform correctness)
- Add tests for tilde expansion, path-traversal rejection, and
non-jsonl extension rejection in _get_mine_dir
Agent-Logs-Url: https://github.com/MemPalace/mempalace/sessions/f69176c7-d752-40ef-ba71-d0e4adc3a689
Co-authored-by: igorls <4753812+igorls@users.noreply.github.com>
---
mempalace/hooks_cli.py | 13 +++++-----
tests/test_hooks_cli.py | 55 +++++++++++++++++++++++++++++++++++------
2 files changed, 54 insertions(+), 14 deletions(-)
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index e7b3981..49b77e2 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -210,12 +210,13 @@ def _get_mine_dir(transcript_path: str = "") -> tuple[str, str]:
code.
"""
mempal_dir = os.environ.get("MEMPAL_DIR", "")
- if mempal_dir and os.path.isdir(mempal_dir):
- return mempal_dir, "projects"
- if transcript_path:
- path = Path(transcript_path).expanduser()
- if path.is_file():
- return str(path.parent), "convos"
+ if mempal_dir:
+ resolved = Path(mempal_dir).expanduser().resolve()
+ if resolved.is_dir():
+ return str(resolved), "projects"
+ path = _validate_transcript_path(transcript_path)
+ if path is not None and path.is_file():
+ return str(path.parent), "convos"
return "", "projects"
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index 7a19dda..6763439 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -446,7 +446,7 @@ def test_maybe_auto_ingest_with_env(tmp_path):
mock_popen.assert_called_once()
cmd = mock_popen.call_args[0][0]
assert "mine" in cmd
- assert str(mempal_dir) in cmd
+ assert str(mempal_dir.resolve()) in cmd
assert cmd[cmd.index("--mode") + 1] == "projects"
@@ -462,7 +462,7 @@ def test_maybe_auto_ingest_with_transcript(tmp_path):
mock_popen.assert_called_once()
cmd = mock_popen.call_args[0][0]
assert "mine" in cmd
- assert str(tmp_path) in cmd
+ assert str(tmp_path.resolve()) in cmd
assert cmd[cmd.index("--mode") + 1] == "convos"
@@ -477,7 +477,7 @@ def test_mine_sync_with_transcript_uses_convos_mode(tmp_path):
mock_run.assert_called_once()
cmd = mock_run.call_args[0][0]
assert "mine" in cmd
- assert str(tmp_path) in cmd
+ assert str(tmp_path.resolve()) in cmd
assert cmd[cmd.index("--mode") + 1] == "convos"
@@ -554,13 +554,32 @@ def test_mine_already_running_corrupt_file(tmp_path):
def test_get_mine_dir_mempal_dir(tmp_path):
- """MEMPAL_DIR takes priority and is treated as projects mode."""
+ """MEMPAL_DIR takes priority, is expanded/resolved, and is treated as projects mode."""
mempal_dir = tmp_path / "project"
mempal_dir.mkdir()
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
- assert _get_mine_dir(str(transcript)) == (str(mempal_dir), "projects")
+ result_dir, result_mode = _get_mine_dir(str(transcript))
+ assert Path(result_dir).resolve() == mempal_dir.resolve()
+ assert result_mode == "projects"
+
+
+def test_get_mine_dir_mempal_dir_tilde(tmp_path):
+ """MEMPAL_DIR with a tilde prefix is expanded correctly."""
+ mempal_dir = tmp_path / "project"
+ mempal_dir.mkdir()
+ home = Path.home()
+ # Build a ~-relative path only if tmp_path is inside home
+ try:
+ rel = mempal_dir.relative_to(home)
+ except ValueError:
+ pytest.skip("tmp_path is not under home, cannot build ~-relative path")
+ tilde_path = "~/" + str(rel)
+ with patch.dict("os.environ", {"MEMPAL_DIR": tilde_path}):
+ result_dir, result_mode = _get_mine_dir("")
+ assert Path(result_dir).resolve() == mempal_dir.resolve()
+ assert result_mode == "projects"
def test_get_mine_dir_transcript_fallback(tmp_path):
@@ -568,7 +587,27 @@ def test_get_mine_dir_transcript_fallback(tmp_path):
transcript = tmp_path / "t.jsonl"
transcript.write_text("")
with patch.dict("os.environ", {}, clear=True):
- assert _get_mine_dir(str(transcript)) == (str(tmp_path), "convos")
+ result_dir, result_mode = _get_mine_dir(str(transcript))
+ assert Path(result_dir).resolve() == tmp_path.resolve()
+ assert result_mode == "convos"
+
+
+def test_get_mine_dir_transcript_path_traversal_rejected(tmp_path):
+ """Transcript paths with '..' components are rejected and return no dir."""
+ with patch.dict("os.environ", {}, clear=True):
+ result_dir, result_mode = _get_mine_dir("../../etc/passwd")
+ assert result_dir == ""
+ assert result_mode == "projects"
+
+
+def test_get_mine_dir_transcript_non_jsonl_rejected(tmp_path):
+ """Transcript paths without .jsonl/.json extension are rejected."""
+ bad = tmp_path / "notes.txt"
+ bad.write_text("content")
+ with patch.dict("os.environ", {}, clear=True):
+ result_dir, result_mode = _get_mine_dir(str(bad))
+ assert result_dir == ""
+ assert result_mode == "projects"
def test_get_mine_dir_empty():
@@ -706,9 +745,9 @@ def test_precompact_mines_transcript_dir(tmp_path, monkeypatch):
)
assert result == {}
mock_run.assert_called_once()
- # Mine dir is the transcript's parent and mode is convos for JSONL.
+ # Mine dir is the transcript's parent (resolved) and mode is convos for JSONL.
call_args = mock_run.call_args[0][0]
- assert str(tmp_path) in call_args
+ assert str(tmp_path.resolve()) in call_args
assert call_args[call_args.index("--mode") + 1] == "convos"