feat(convo): parse Claude Code conversation dirs into project entities

Claude Code stores sessions under `~/.claude/projects/<slug>/<id>.jsonl`
where `<slug>` is the original CWD with `/` replaced by `-`. That
encoding is lossy — can't distinguish `foo-bar` (one segment) from
`foo/bar` (two) — so slug-decoding alone produces wrong names for any
hyphenated project.

Fortunately, every message record carries a `cwd` field with the true
path. This scanner reads one record per session to recover the
accurate project name deterministically, falling back to slug-decoding
only if the JSONL is malformed or empty.

Output shape matches project_scanner.ProjectInfo so the discover
orchestrator can union results across sources. Session count doubles
as a density signal for ranking.

22 unit tests cover: root detection, cwd extraction with malformed
input tolerance, fallback slug decoding, name resolution using the
newest session (so renames win), and dedup when two encoded dirs
resolve to the same project.
This commit is contained in:
Igor Lins e Silva
2026-04-24 00:46:31 -03:00
parent 14d7444abe
commit c7bd2cd8e4
2 changed files with 351 additions and 0 deletions
+199
View File
@@ -0,0 +1,199 @@
"""Tests for mempalace.convo_scanner."""
import json
from mempalace.convo_scanner import (
_decode_slug_fallback,
_extract_cwd_from_session,
_resolve_project_name,
is_claude_projects_root,
scan_claude_projects,
)
# ── is_claude_projects_root ─────────────────────────────────────────────
def test_is_claude_projects_root_true(tmp_path):
project_dir = tmp_path / "-home-user-dev-foo"
project_dir.mkdir()
(project_dir / "abc.jsonl").write_text("{}\n")
assert is_claude_projects_root(tmp_path)
def test_is_claude_projects_root_false_no_dash_prefix(tmp_path):
project_dir = tmp_path / "normal-folder"
project_dir.mkdir()
(project_dir / "abc.jsonl").write_text("{}\n")
assert not is_claude_projects_root(tmp_path)
def test_is_claude_projects_root_false_no_jsonl(tmp_path):
project_dir = tmp_path / "-home-user-foo"
project_dir.mkdir()
(project_dir / "other.txt").write_text("hello")
assert not is_claude_projects_root(tmp_path)
def test_is_claude_projects_root_false_empty(tmp_path):
assert not is_claude_projects_root(tmp_path)
def test_is_claude_projects_root_false_nonexistent(tmp_path):
assert not is_claude_projects_root(tmp_path / "does-not-exist")
# ── cwd extraction ──────────────────────────────────────────────────────
def test_extract_cwd_from_session(tmp_path):
f = tmp_path / "session.jsonl"
lines = [
json.dumps({"type": "file-history-snapshot", "messageId": "x"}),
json.dumps({"type": "user", "cwd": "/home/user/dev/myproj", "content": "hi"}),
]
f.write_text("\n".join(lines) + "\n")
assert _extract_cwd_from_session(f) == "/home/user/dev/myproj"
def test_extract_cwd_from_session_skips_malformed(tmp_path):
f = tmp_path / "session.jsonl"
f.write_text(
"{not valid json\n" + json.dumps({"type": "user", "cwd": "/home/user/dev/good"}) + "\n"
)
assert _extract_cwd_from_session(f) == "/home/user/dev/good"
def test_extract_cwd_from_session_none_if_absent(tmp_path):
f = tmp_path / "session.jsonl"
f.write_text(json.dumps({"type": "x", "messageId": "y"}) + "\n")
assert _extract_cwd_from_session(f) is None
def test_extract_cwd_from_session_none_if_file_missing(tmp_path):
assert _extract_cwd_from_session(tmp_path / "missing.jsonl") is None
# ── slug fallback ───────────────────────────────────────────────────────
def test_decode_slug_fallback_last_segment():
assert _decode_slug_fallback("-home-user-dev-foo") == "foo"
def test_decode_slug_fallback_double_dash():
assert _decode_slug_fallback("-home-user--bentokit") == "bentokit"
def test_decode_slug_fallback_empty():
assert _decode_slug_fallback("") == ""
def test_decode_slug_fallback_only_dashes():
assert _decode_slug_fallback("---") == "---"
# ── _resolve_project_name ───────────────────────────────────────────────
def test_resolve_project_name_uses_cwd(tmp_path):
pdir = tmp_path / "-home-user-dev-coolproj"
pdir.mkdir()
session = pdir / "a.jsonl"
session.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/cool-proj-real"}) + "\n")
assert _resolve_project_name(pdir) == "cool-proj-real"
def test_resolve_project_name_falls_back_when_no_cwd(tmp_path):
pdir = tmp_path / "-home-user-dev-foo"
pdir.mkdir()
(pdir / "a.jsonl").write_text(json.dumps({"type": "x"}) + "\n")
assert _resolve_project_name(pdir) == "foo"
def test_resolve_project_name_prefers_newer_session(tmp_path):
"""Newest session's cwd wins — covers the case where user renamed the
project directory between sessions."""
pdir = tmp_path / "-home-user-dev-old"
pdir.mkdir()
old = pdir / "old.jsonl"
old.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/old"}) + "\n")
# Ensure distinguishable mtimes
old_mtime = old.stat().st_mtime - 100
import os
os.utime(old, (old_mtime, old_mtime))
new = pdir / "new.jsonl"
new.write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/new-name"}) + "\n")
assert _resolve_project_name(pdir) == "new-name"
# ── scan_claude_projects ────────────────────────────────────────────────
def test_scan_claude_projects_empty_dir(tmp_path):
assert scan_claude_projects(tmp_path) == []
def test_scan_claude_projects_not_a_projects_root(tmp_path):
"""Returns empty list if the dir doesn't look like .claude/projects/."""
(tmp_path / "some-folder").mkdir()
(tmp_path / "some-folder" / "readme.md").write_text("hi")
assert scan_claude_projects(tmp_path) == []
def test_scan_claude_projects_finds_projects(tmp_path):
p1 = tmp_path / "-home-user-dev-alpha"
p1.mkdir()
(p1 / "a.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/alpha"}) + "\n")
(p1 / "b.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/alpha"}) + "\n")
p2 = tmp_path / "-home-user-dev-beta"
p2.mkdir()
(p2 / "x.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/beta"}) + "\n")
result = scan_claude_projects(tmp_path)
names = [p.name for p in result]
assert "alpha" in names
assert "beta" in names
# alpha has 2 sessions, beta has 1 — alpha ranks higher
alpha = next(p for p in result if p.name == "alpha")
beta = next(p for p in result if p.name == "beta")
assert alpha.user_commits == 2
assert beta.user_commits == 1
def test_scan_claude_projects_ignores_dirs_without_jsonl(tmp_path):
empty_proj = tmp_path / "-home-user-dev-empty"
empty_proj.mkdir()
(empty_proj / "notes.md").write_text("hi")
assert scan_claude_projects(tmp_path) == []
def test_scan_claude_projects_marks_as_mine(tmp_path):
p = tmp_path / "-home-user-dev-owned"
p.mkdir()
(p / "s.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/dev/owned"}) + "\n")
result = scan_claude_projects(tmp_path)
assert len(result) == 1
assert result[0].is_mine is True
def test_scan_claude_projects_dedup_by_name(tmp_path):
"""Two encoded dirs resolving to the same project name collapse to one."""
p1 = tmp_path / "-home-user-a-proj"
p1.mkdir()
(p1 / "s.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/a/proj"}) + "\n")
(p1 / "t.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/a/proj"}) + "\n")
p2 = tmp_path / "-home-user-b-proj"
p2.mkdir()
(p2 / "u.jsonl").write_text(json.dumps({"type": "user", "cwd": "/home/user/b/proj"}) + "\n")
result = scan_claude_projects(tmp_path)
# Both decode to "proj"; only one remains — the one with more sessions wins
assert len(result) == 1
assert result[0].name == "proj"
assert result[0].user_commits == 2