Merge pull request #1415 from MemPalace/fix/1212-per-target-pid-guard
fix(hooks): per-target PID guard with atomic claim (#1212, #1206)
This commit is contained in:
+144
-31
@@ -6,6 +6,7 @@ Supported hooks: session-start, stop, precompact
|
|||||||
Supported harnesses: claude-code, codex (extensible to cursor, gemini, etc.)
|
Supported harnesses: claude-code, codex (extensible to cursor, gemini, etc.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -13,6 +14,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
SAVE_INTERVAL = 15
|
SAVE_INTERVAL = 15
|
||||||
STATE_DIR = Path.home() / ".mempalace" / "hook_state"
|
STATE_DIR = Path.home() / ".mempalace" / "hook_state"
|
||||||
@@ -256,7 +258,45 @@ def _get_mine_targets() -> list[tuple[str, str]]:
|
|||||||
return targets
|
return targets
|
||||||
|
|
||||||
|
|
||||||
_MINE_PID_FILE = STATE_DIR / "mine.pid"
|
# Per-target PID guard.
|
||||||
|
#
|
||||||
|
# Hook fires ingest mines in the background. If a previous fire's child is
|
||||||
|
# still running for the *same* target (same source dir, mode, wing), the new
|
||||||
|
# fire should skip rather than pile up — multiple concurrent mines against the
|
||||||
|
# same source corrupt the HNSW index and exhaust disk via duplicate upserts
|
||||||
|
# (#1212, #1206). But mines targeting *different* sources / modes must remain
|
||||||
|
# independent so the user can have e.g. project-mining and transcript-ingest
|
||||||
|
# running in parallel.
|
||||||
|
#
|
||||||
|
# The single ``mine.pid`` global file used previously failed both ways: the
|
||||||
|
# guard was rebuilt every spawn (so two near-simultaneous fires both passed
|
||||||
|
# the check before either wrote), and the file was unconditionally overwritten
|
||||||
|
# (so the second spawn lost the first PID, orphaning it). The replacement is
|
||||||
|
# a directory of per-target slots, claimed via ``O_CREAT | O_EXCL`` so the
|
||||||
|
# claim is atomic and per-target.
|
||||||
|
_MINE_PID_DIR = STATE_DIR / "mine_pids"
|
||||||
|
|
||||||
|
# The per-process PID file path is communicated to the mine subprocess via
|
||||||
|
# this env var so the child's cleanup hook (in miner.py) can remove its
|
||||||
|
# own slot on exit without scanning the whole directory.
|
||||||
|
_MINE_PID_FILE_ENV = "MEMPALACE_MINE_PID_FILE"
|
||||||
|
|
||||||
|
|
||||||
|
def _pid_file_for_cmd(cmd: list[str]) -> Path:
|
||||||
|
"""Return the per-target PID file path for a mine subcommand.
|
||||||
|
|
||||||
|
The key is derived from the mine arguments (everything after ``mine``)
|
||||||
|
so different (dir, mode, wing) combinations get independent slots.
|
||||||
|
Two fires with the same arguments collapse to the same slot — which is
|
||||||
|
exactly the dedup we want.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
idx = cmd.index("mine")
|
||||||
|
key = " ".join(cmd[idx:])
|
||||||
|
except ValueError:
|
||||||
|
key = " ".join(cmd)
|
||||||
|
digest = hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]
|
||||||
|
return _MINE_PID_DIR / f"mine_{digest}.pid"
|
||||||
|
|
||||||
|
|
||||||
def _pid_alive(pid: int) -> bool:
|
def _pid_alive(pid: int) -> bool:
|
||||||
@@ -292,22 +332,96 @@ def _pid_alive(pid: int) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _mine_already_running() -> bool:
|
def _mine_already_running(cmd: list[str]) -> bool:
|
||||||
"""Return True if a background mine process from a previous hook fire is still alive."""
|
"""Return True if a previous mine for ``cmd``'s target is still alive."""
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
try:
|
try:
|
||||||
pid = int(_MINE_PID_FILE.read_text().strip())
|
recorded = pid_file.read_text().strip()
|
||||||
except (OSError, ValueError):
|
except OSError:
|
||||||
return False
|
return False
|
||||||
return _pid_alive(pid)
|
if not recorded.isdigit():
|
||||||
|
return False
|
||||||
|
return _pid_alive(int(recorded))
|
||||||
|
|
||||||
|
|
||||||
|
def _claim_mine_slot(cmd: list[str]) -> Optional[Path]:
|
||||||
|
"""Atomically reserve the per-target PID slot for ``cmd``.
|
||||||
|
|
||||||
|
Returns the slot path on success, or ``None`` if the target is
|
||||||
|
already being mined by a live process. The reservation is done via
|
||||||
|
``O_CREAT | O_EXCL`` so two simultaneous hook fires can never both
|
||||||
|
pass the check; one wins, the other returns None.
|
||||||
|
|
||||||
|
A stale slot (file exists but the recorded PID is dead) is reclaimed
|
||||||
|
transparently — orphan miners that crashed without cleanup do not
|
||||||
|
block future hook fires forever.
|
||||||
|
"""
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
fd = os.open(str(pid_file), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
|
||||||
|
os.close(fd)
|
||||||
|
return pid_file
|
||||||
|
except FileExistsError:
|
||||||
|
pass
|
||||||
|
# Slot exists. If the holder is alive, defer.
|
||||||
|
if _mine_already_running(cmd):
|
||||||
|
return None
|
||||||
|
# Stale entry; reclaim. The unlink+create is racy against another hook
|
||||||
|
# firing right now, but the second create's O_EXCL will fail and that
|
||||||
|
# caller will see the live PID via the next round.
|
||||||
|
try:
|
||||||
|
pid_file.unlink()
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except OSError:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
fd = os.open(str(pid_file), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
|
||||||
|
os.close(fd)
|
||||||
|
return pid_file
|
||||||
|
except FileExistsError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _spawn_mine(cmd: list) -> None:
|
def _spawn_mine(cmd: list) -> None:
|
||||||
"""Spawn a mine subprocess, write its PID to the lock file, log to hook.log."""
|
"""Spawn a mine subprocess if no live mine is already targeting it.
|
||||||
|
|
||||||
|
The PID slot is claimed atomically *before* the spawn, so two near-
|
||||||
|
simultaneous hook fires can't both proceed — the second sees the
|
||||||
|
claimed slot and silently skips. The spawned process inherits a
|
||||||
|
``MEMPALACE_MINE_PID_FILE`` env var so its cleanup hook can remove
|
||||||
|
the slot on exit without scanning the directory.
|
||||||
|
"""
|
||||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
log_path = STATE_DIR / "hook.log"
|
log_path = STATE_DIR / "hook.log"
|
||||||
|
pid_file = _claim_mine_slot(cmd)
|
||||||
|
if pid_file is None:
|
||||||
|
_log(f"Skipping mine: target already running ({' '.join(cmd[-3:])})")
|
||||||
|
return
|
||||||
|
child_env = os.environ.copy()
|
||||||
|
child_env[_MINE_PID_FILE_ENV] = str(pid_file)
|
||||||
with open(log_path, "a") as log_f:
|
with open(log_path, "a") as log_f:
|
||||||
proc = subprocess.Popen(cmd, stdout=log_f, stderr=log_f, **_detached_popen_kwargs())
|
try:
|
||||||
_MINE_PID_FILE.write_text(str(proc.pid))
|
proc = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdout=log_f,
|
||||||
|
stderr=log_f,
|
||||||
|
env=child_env,
|
||||||
|
**_detached_popen_kwargs(),
|
||||||
|
)
|
||||||
|
except OSError:
|
||||||
|
# Spawn failed; release the slot we just claimed so the next
|
||||||
|
# hook fire can try again rather than skipping forever.
|
||||||
|
try:
|
||||||
|
pid_file.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
try:
|
||||||
|
pid_file.write_text(str(proc.pid))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _maybe_auto_ingest():
|
def _maybe_auto_ingest():
|
||||||
@@ -317,13 +431,15 @@ def _maybe_auto_ingest():
|
|||||||
in the hook handlers — this function does not handle them, to avoid
|
in the hook handlers — this function does not handle them, to avoid
|
||||||
asymmetric interpreter handling and PID-file overwrite when both
|
asymmetric interpreter handling and PID-file overwrite when both
|
||||||
targets fire from a single hook call (#1231 review).
|
targets fire from a single hook call (#1231 review).
|
||||||
|
|
||||||
|
Per-target dedup is done by ``_spawn_mine`` itself: each (dir, mode)
|
||||||
|
target gets its own PID slot, so distinct targets never block each
|
||||||
|
other but a re-fire of the same target while the previous one is
|
||||||
|
still running is silently skipped.
|
||||||
"""
|
"""
|
||||||
targets = _get_mine_targets()
|
targets = _get_mine_targets()
|
||||||
if not targets:
|
if not targets:
|
||||||
return
|
return
|
||||||
if _mine_already_running():
|
|
||||||
_log("Skipping auto-ingest: mine already running")
|
|
||||||
return
|
|
||||||
for mine_dir, mode in targets:
|
for mine_dir, mode in targets:
|
||||||
try:
|
try:
|
||||||
_spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
|
_spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
|
||||||
@@ -518,25 +634,22 @@ def _ingest_transcript(transcript_path: str):
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
log_path = STATE_DIR / "hook.log"
|
# Route through ``_spawn_mine`` so the per-target PID guard kicks
|
||||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
# in here too — repeated Stop/PreCompact fires for the same
|
||||||
with open(log_path, "a") as log_f:
|
# transcript should not stack up parallel ingest mines.
|
||||||
subprocess.Popen(
|
_spawn_mine(
|
||||||
[
|
[
|
||||||
_mempalace_python(),
|
_mempalace_python(),
|
||||||
"-m",
|
"-m",
|
||||||
"mempalace",
|
"mempalace",
|
||||||
"mine",
|
"mine",
|
||||||
str(path.parent),
|
str(path.parent),
|
||||||
"--mode",
|
"--mode",
|
||||||
"convos",
|
"convos",
|
||||||
"--wing",
|
"--wing",
|
||||||
"sessions",
|
"sessions",
|
||||||
],
|
]
|
||||||
stdout=log_f,
|
)
|
||||||
stderr=log_f,
|
|
||||||
**_detached_popen_kwargs(),
|
|
||||||
)
|
|
||||||
_log(f"Transcript ingest started: {path.name}")
|
_log(f"Transcript ingest started: {path.name}")
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|||||||
+16
-17
@@ -1206,30 +1206,29 @@ def _mine_impl(
|
|||||||
|
|
||||||
|
|
||||||
def _cleanup_mine_pid_file() -> None:
|
def _cleanup_mine_pid_file() -> None:
|
||||||
"""Remove the global mine PID file if it currently points at us.
|
"""Remove this process's per-target PID slot on exit.
|
||||||
|
|
||||||
The PID file (``~/.mempalace/hook_state/mine.pid``, written by the
|
Hook-spawned mines receive ``MEMPALACE_MINE_PID_FILE`` in their env
|
||||||
hook in :func:`mempalace.hooks_cli._spawn_mine`) tracks the PID of
|
pointing at the slot the hook claimed for them
|
||||||
the most recently spawned mine subprocess so the hook can dedup
|
(``~/.mempalace/hook_state/mine_pids/mine_<sha>.pid``). When the
|
||||||
concurrent auto-ingest fires. When that subprocess exits — cleanly,
|
subprocess exits — cleanly, on error, or via Ctrl-C — it removes its
|
||||||
on error, or via Ctrl-C — it should remove its own entry so the
|
own slot so the next hook fire isn't briefly fooled by a stale PID
|
||||||
next hook fire isn't briefly fooled by a stale PID before
|
before ``_pid_alive`` returns False.
|
||||||
``_pid_alive`` returns False.
|
|
||||||
|
|
||||||
We only delete the file if it claims our own PID; any other PID is
|
Only delete the slot if it claims our own PID; any other PID is left
|
||||||
left alone (could be an unrelated mine running concurrently from
|
alone (it could belong to an unrelated mine that just claimed the
|
||||||
a different worktree / session).
|
same slot via a stale-reclaim race).
|
||||||
"""
|
"""
|
||||||
try:
|
pid_file_env = os.environ.get("MEMPALACE_MINE_PID_FILE", "")
|
||||||
from .hooks_cli import _MINE_PID_FILE
|
if not pid_file_env:
|
||||||
except Exception:
|
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
if not _MINE_PID_FILE.exists():
|
pid_file = Path(pid_file_env)
|
||||||
|
if not pid_file.exists():
|
||||||
return
|
return
|
||||||
recorded = _MINE_PID_FILE.read_text().strip()
|
recorded = pid_file.read_text().strip()
|
||||||
if recorded and recorded.isdigit() and int(recorded) == os.getpid():
|
if recorded and recorded.isdigit() and int(recorded) == os.getpid():
|
||||||
_MINE_PID_FILE.unlink()
|
pid_file.unlink()
|
||||||
except OSError:
|
except OSError:
|
||||||
# Best-effort cleanup; never fail the mine over PID bookkeeping.
|
# Best-effort cleanup; never fail the mine over PID bookkeeping.
|
||||||
pass
|
pass
|
||||||
|
|||||||
+196
-35
@@ -3,6 +3,7 @@ import io
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
@@ -441,7 +442,7 @@ def test_maybe_auto_ingest_with_env(tmp_path):
|
|||||||
mempal_dir.mkdir()
|
mempal_dir.mkdir()
|
||||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
_maybe_auto_ingest()
|
_maybe_auto_ingest()
|
||||||
mock_popen.assert_called_once()
|
mock_popen.assert_called_once()
|
||||||
@@ -463,7 +464,7 @@ def test_maybe_auto_ingest_uses_mempalace_python(tmp_path):
|
|||||||
mempal_dir.mkdir()
|
mempal_dir.mkdir()
|
||||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
with patch(
|
with patch(
|
||||||
"mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"
|
"mempalace.hooks_cli._mempalace_python", return_value="/fake/venv/python"
|
||||||
):
|
):
|
||||||
@@ -513,7 +514,7 @@ def test_maybe_auto_ingest_ignores_transcript_arg_path(tmp_path):
|
|||||||
transcript.write_text("")
|
transcript.write_text("")
|
||||||
with patch.dict("os.environ", {}, clear=True):
|
with patch.dict("os.environ", {}, clear=True):
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
_maybe_auto_ingest()
|
_maybe_auto_ingest()
|
||||||
mock_popen.assert_not_called()
|
mock_popen.assert_not_called()
|
||||||
@@ -543,21 +544,38 @@ def test_maybe_auto_ingest_oserror(tmp_path):
|
|||||||
mempal_dir.mkdir()
|
mempal_dir.mkdir()
|
||||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("fail")):
|
with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("fail")):
|
||||||
_maybe_auto_ingest() # should not raise
|
_maybe_auto_ingest() # should not raise
|
||||||
|
|
||||||
|
|
||||||
def test_maybe_auto_ingest_skips_when_mine_running(tmp_path):
|
def test_maybe_auto_ingest_skips_when_mine_running(tmp_path):
|
||||||
"""Does not spawn a new mine process if one is already running."""
|
"""Does not spawn a new mine process if a mine for the same target is alive."""
|
||||||
mempal_dir = tmp_path / "project"
|
mempal_dir = tmp_path / "project"
|
||||||
mempal_dir.mkdir()
|
mempal_dir.mkdir()
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
with patch.dict("os.environ", {"MEMPAL_DIR": str(mempal_dir)}):
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._mine_already_running", return_value=True):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
# Pre-populate the per-target slot with a live PID (our own).
|
||||||
_maybe_auto_ingest()
|
from mempalace.hooks_cli import _pid_file_for_cmd
|
||||||
mock_popen.assert_not_called()
|
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"mempalace",
|
||||||
|
"mine",
|
||||||
|
str(mempal_dir.resolve()),
|
||||||
|
"--mode",
|
||||||
|
"projects",
|
||||||
|
]
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text(str(os.getpid()))
|
||||||
|
with patch("mempalace.hooks_cli._mempalace_python", return_value=sys.executable):
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
_maybe_auto_ingest()
|
||||||
|
mock_popen.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
# --- _detached_popen_kwargs ---
|
# --- _detached_popen_kwargs ---
|
||||||
@@ -604,7 +622,7 @@ def test_detached_popen_kwargs_windows(monkeypatch):
|
|||||||
def test_spawn_mine_uses_detached_kwargs(tmp_path):
|
def test_spawn_mine_uses_detached_kwargs(tmp_path):
|
||||||
"""_spawn_mine forwards detached kwargs so the hook can exit cleanly."""
|
"""_spawn_mine forwards detached kwargs so the hook can exit cleanly."""
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
mock_popen.return_value.pid = 9999
|
mock_popen.return_value.pid = 9999
|
||||||
from mempalace.hooks_cli import _spawn_mine
|
from mempalace.hooks_cli import _spawn_mine
|
||||||
@@ -617,52 +635,195 @@ def test_spawn_mine_uses_detached_kwargs(tmp_path):
|
|||||||
assert kwargs.get("close_fds") is True
|
assert kwargs.get("close_fds") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_spawn_mine_skips_when_target_running(tmp_path):
|
||||||
|
"""A second spawn for the same cmd target while the first is alive must skip."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
|
||||||
|
|
||||||
|
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text(str(os.getpid())) # live PID
|
||||||
|
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
_spawn_mine(cmd)
|
||||||
|
mock_popen.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_spawn_mine_distinct_targets_dont_block_each_other(tmp_path):
|
||||||
|
"""Two spawn calls for *different* targets both proceed."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
from mempalace.hooks_cli import _spawn_mine
|
||||||
|
|
||||||
|
mock_popen.return_value.pid = 1111
|
||||||
|
_spawn_mine(["mempalace", "mine", "/tmp/a", "--mode", "projects"])
|
||||||
|
mock_popen.return_value.pid = 2222
|
||||||
|
_spawn_mine(["mempalace", "mine", "/tmp/b", "--mode", "projects"])
|
||||||
|
assert mock_popen.call_count == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_spawn_mine_reclaims_stale_slot(tmp_path):
|
||||||
|
"""A slot pointing at a dead PID is reclaimed silently."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
|
||||||
|
|
||||||
|
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text("999999999") # dead PID
|
||||||
|
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
mock_popen.return_value.pid = 4242
|
||||||
|
_spawn_mine(cmd)
|
||||||
|
mock_popen.assert_called_once()
|
||||||
|
# New PID is recorded in the reclaimed slot.
|
||||||
|
assert pid_file.read_text().strip() == "4242"
|
||||||
|
|
||||||
|
|
||||||
|
def test_spawn_mine_releases_slot_on_oserror(tmp_path):
|
||||||
|
"""If Popen raises OSError, the claimed slot must be released."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
|
||||||
|
|
||||||
|
cmd = ["mempalace", "mine", "/tmp/proj", "--mode", "projects"]
|
||||||
|
pid_file = _pid_file_for_cmd(cmd)
|
||||||
|
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen", side_effect=OSError("spawn fail")):
|
||||||
|
with pytest.raises(OSError):
|
||||||
|
_spawn_mine(cmd)
|
||||||
|
assert (
|
||||||
|
not pid_file.exists()
|
||||||
|
), "slot must be released so the next hook fire isn't permanently blocked"
|
||||||
|
|
||||||
|
|
||||||
|
def test_spawn_mine_passes_pid_file_env_var(tmp_path):
|
||||||
|
"""The child inherits MEMPALACE_MINE_PID_FILE so its cleanup hook can find the slot."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
mock_popen.return_value.pid = 5555
|
||||||
|
from mempalace.hooks_cli import _pid_file_for_cmd, _spawn_mine
|
||||||
|
|
||||||
|
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
|
||||||
|
_spawn_mine(cmd)
|
||||||
|
child_env = mock_popen.call_args.kwargs.get("env", {})
|
||||||
|
expected = str(_pid_file_for_cmd(cmd))
|
||||||
|
assert child_env.get("MEMPALACE_MINE_PID_FILE") == expected
|
||||||
|
|
||||||
|
|
||||||
def test_ingest_transcript_uses_detached_kwargs(tmp_path):
|
def test_ingest_transcript_uses_detached_kwargs(tmp_path):
|
||||||
"""_ingest_transcript spawns the convos mine with detach kwargs."""
|
"""_ingest_transcript spawns the convos mine with detach kwargs."""
|
||||||
transcript = tmp_path / "session.jsonl"
|
transcript = tmp_path / "session.jsonl"
|
||||||
transcript.write_text("x" * 200) # > 100 byte gate
|
transcript.write_text("x" * 200) # > 100 byte gate
|
||||||
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
from mempalace.hooks_cli import _ingest_transcript
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
from mempalace.hooks_cli import _ingest_transcript
|
||||||
|
|
||||||
_ingest_transcript(str(transcript))
|
_ingest_transcript(str(transcript))
|
||||||
assert mock_popen.called
|
assert mock_popen.called
|
||||||
kwargs = mock_popen.call_args.kwargs
|
kwargs = mock_popen.call_args.kwargs
|
||||||
assert kwargs.get("stdin") is subprocess.DEVNULL
|
assert kwargs.get("stdin") is subprocess.DEVNULL
|
||||||
assert kwargs.get("close_fds") is True
|
assert kwargs.get("close_fds") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_transcript_skips_when_target_running(tmp_path):
|
||||||
|
"""Repeated transcript ingests for the same transcript should dedup."""
|
||||||
|
transcript = tmp_path / "session.jsonl"
|
||||||
|
transcript.write_text("x" * 200)
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
with patch("mempalace.hooks_cli._mempalace_python", return_value=sys.executable):
|
||||||
|
from mempalace.hooks_cli import _ingest_transcript, _pid_file_for_cmd
|
||||||
|
|
||||||
|
expected_cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"mempalace",
|
||||||
|
"mine",
|
||||||
|
str(transcript.parent),
|
||||||
|
"--mode",
|
||||||
|
"convos",
|
||||||
|
"--wing",
|
||||||
|
"sessions",
|
||||||
|
]
|
||||||
|
pid_file = _pid_file_for_cmd(expected_cmd)
|
||||||
|
pid_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pid_file.write_text(str(os.getpid())) # live target
|
||||||
|
|
||||||
|
with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
|
||||||
|
_ingest_transcript(str(transcript))
|
||||||
|
mock_popen.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
# --- _mine_already_running ---
|
# --- _mine_already_running ---
|
||||||
|
|
||||||
|
|
||||||
|
def _seed_slot(pid_dir, cmd, body: str):
|
||||||
|
"""Write ``body`` into the per-target slot for ``cmd`` under ``pid_dir``."""
|
||||||
|
from mempalace.hooks_cli import _pid_file_for_cmd
|
||||||
|
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
slot = _pid_file_for_cmd(cmd)
|
||||||
|
slot.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
slot.write_text(body)
|
||||||
|
return slot
|
||||||
|
|
||||||
|
|
||||||
def test_mine_already_running_no_file(tmp_path):
|
def test_mine_already_running_no_file(tmp_path):
|
||||||
"""Returns False when no PID file exists."""
|
"""Returns False when no per-target slot exists."""
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", tmp_path / "mine.pid"):
|
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
|
||||||
assert _mine_already_running() is False
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", tmp_path / "mine_pids"):
|
||||||
|
assert _mine_already_running(cmd) is False
|
||||||
|
|
||||||
|
|
||||||
def test_mine_already_running_dead_pid(tmp_path):
|
def test_mine_already_running_dead_pid(tmp_path):
|
||||||
"""Returns False when PID file contains a PID that no longer exists."""
|
"""Returns False when the slot's recorded PID is no longer alive."""
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_dir = tmp_path / "mine_pids"
|
||||||
pid_file.write_text("999999999") # almost certainly not a real PID
|
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
|
_seed_slot(pid_dir, cmd, "999999999") # almost certainly not a real PID
|
||||||
assert _mine_already_running() is False
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
assert _mine_already_running(cmd) is False
|
||||||
|
|
||||||
|
|
||||||
def test_mine_already_running_live_pid(tmp_path):
|
def test_mine_already_running_live_pid(tmp_path):
|
||||||
"""Returns True when PID file contains the current process's own PID."""
|
"""Returns True when the slot's recorded PID is alive."""
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_dir = tmp_path / "mine_pids"
|
||||||
pid_file.write_text(str(os.getpid())) # current process is definitely alive
|
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
|
_seed_slot(pid_dir, cmd, str(os.getpid())) # current process is alive
|
||||||
assert _mine_already_running() is True
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
assert _mine_already_running(cmd) is True
|
||||||
|
|
||||||
|
|
||||||
def test_mine_already_running_corrupt_file(tmp_path):
|
def test_mine_already_running_corrupt_file(tmp_path):
|
||||||
"""Returns False when PID file contains non-integer content."""
|
"""Returns False when the slot contains non-integer content."""
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_dir = tmp_path / "mine_pids"
|
||||||
pid_file.write_text("not-a-pid")
|
cmd = ["mempalace", "mine", "/tmp/x", "--mode", "projects"]
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
|
_seed_slot(pid_dir, cmd, "not-a-pid")
|
||||||
assert _mine_already_running() is False
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
assert _mine_already_running(cmd) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_mine_already_running_distinct_cmds_independent(tmp_path):
|
||||||
|
"""Slots are keyed per cmd; an alive entry for cmd A doesn't shadow cmd B."""
|
||||||
|
pid_dir = tmp_path / "mine_pids"
|
||||||
|
cmd_a = ["mempalace", "mine", "/tmp/a", "--mode", "projects"]
|
||||||
|
cmd_b = ["mempalace", "mine", "/tmp/b", "--mode", "projects"]
|
||||||
|
_seed_slot(pid_dir, cmd_a, str(os.getpid()))
|
||||||
|
with patch("mempalace.hooks_cli._MINE_PID_DIR", pid_dir):
|
||||||
|
assert _mine_already_running(cmd_a) is True
|
||||||
|
assert _mine_already_running(cmd_b) is False
|
||||||
|
|
||||||
|
|
||||||
# --- _get_mine_targets ---
|
# --- _get_mine_targets ---
|
||||||
|
|||||||
+11
-9
@@ -777,7 +777,7 @@ def test_mine_arbitrary_exception_prints_summary_and_reraises(tmp_path, capsys):
|
|||||||
|
|
||||||
|
|
||||||
def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
||||||
"""Our own PID entry in mine.pid is removed in the finally clause."""
|
"""Our own per-target PID slot is removed in the finally clause."""
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@@ -786,14 +786,16 @@ def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
|||||||
_make_minable_project(project_root, n_files=2)
|
_make_minable_project(project_root, n_files=2)
|
||||||
palace_path = project_root / "palace"
|
palace_path = project_root / "palace"
|
||||||
|
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_file = tmp_path / "mine_abc.pid"
|
||||||
pid_file.write_text(str(os.getpid()))
|
pid_file.write_text(str(os.getpid()))
|
||||||
|
|
||||||
def fake_process_file(*args, **kwargs):
|
def fake_process_file(*args, **kwargs):
|
||||||
raise KeyboardInterrupt
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
# The mine subprocess receives its slot path via env var; the cleanup
|
||||||
|
# hook in miner.py reads that var and removes the slot if it matches.
|
||||||
with (
|
with (
|
||||||
patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file),
|
patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}),
|
||||||
patch("mempalace.miner.process_file", side_effect=fake_process_file),
|
patch("mempalace.miner.process_file", side_effect=fake_process_file),
|
||||||
):
|
):
|
||||||
with pytest.raises(SystemExit):
|
with pytest.raises(SystemExit):
|
||||||
@@ -803,7 +805,7 @@ def test_mine_cleans_up_pid_file_on_interrupt(tmp_path):
|
|||||||
|
|
||||||
|
|
||||||
def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
|
def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
|
||||||
"""Successful mine also removes its own PID entry in the finally clause."""
|
"""Successful mine also removes its own per-target PID slot."""
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
project_root = tmp_path / "proj"
|
project_root = tmp_path / "proj"
|
||||||
@@ -811,17 +813,17 @@ def test_mine_cleans_up_pid_file_on_clean_exit(tmp_path):
|
|||||||
_make_minable_project(project_root, n_files=1)
|
_make_minable_project(project_root, n_files=1)
|
||||||
palace_path = project_root / "palace"
|
palace_path = project_root / "palace"
|
||||||
|
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_file = tmp_path / "mine_abc.pid"
|
||||||
pid_file.write_text(str(os.getpid()))
|
pid_file.write_text(str(os.getpid()))
|
||||||
|
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
|
with patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}):
|
||||||
mine(str(project_root), str(palace_path))
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
assert not pid_file.exists()
|
assert not pid_file.exists()
|
||||||
|
|
||||||
|
|
||||||
def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
|
def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
|
||||||
"""A PID file pointing at someone else's PID is left untouched."""
|
"""A PID slot pointing at someone else's PID is left untouched."""
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
project_root = tmp_path / "proj"
|
project_root = tmp_path / "proj"
|
||||||
@@ -830,10 +832,10 @@ def test_mine_does_not_remove_other_processes_pid_file(tmp_path):
|
|||||||
palace_path = project_root / "palace"
|
palace_path = project_root / "palace"
|
||||||
|
|
||||||
other_pid = os.getpid() + 999_999 # a PID that isn't us
|
other_pid = os.getpid() + 999_999 # a PID that isn't us
|
||||||
pid_file = tmp_path / "mine.pid"
|
pid_file = tmp_path / "mine_abc.pid"
|
||||||
pid_file.write_text(str(other_pid))
|
pid_file.write_text(str(other_pid))
|
||||||
|
|
||||||
with patch("mempalace.hooks_cli._MINE_PID_FILE", pid_file):
|
with patch.dict(os.environ, {"MEMPALACE_MINE_PID_FILE": str(pid_file)}):
|
||||||
mine(str(project_root), str(palace_path))
|
mine(str(project_root), str(palace_path))
|
||||||
|
|
||||||
assert pid_file.exists(), "Foreign PID entries must not be removed"
|
assert pid_file.exists(), "Foreign PID entries must not be removed"
|
||||||
|
|||||||
Reference in New Issue
Block a user