feat(graph): cross-wing tunnels by shared topics (#1180)

When two wings have one or more confirmed TOPIC labels in common, the
miner now drops a symmetric tunnel between them at mine time so the
palace graph reflects shared themes (frameworks, vendors, recurring
concepts).

- llm_refine: TOPIC label routes to a dedicated `topics` bucket so the
  signal survives confirmation instead of getting collapsed into
  `uncertain` and dropped.
- entity_detector / project_scanner: bucket plumbed through the
  detection pipeline; `confirm_entities` returns confirmed topics
  alongside people/projects.
- miner.add_to_known_entities: optional `wing` parameter records the
  confirmed topics under `topics_by_wing` in
  `~/.mempalace/known_entities.json`. Wing names do NOT leak into the
  flat known-name set used by drawer-tagging.
- palace_graph: `compute_topic_tunnels` and `topic_tunnels_for_wing`
  create symmetric tunnels via the existing `create_tunnel` API so they
  share dedup and persistence with explicit tunnels.
- miner.mine: post-file-loop pass calls `topic_tunnels_for_wing` for
  the freshly-mined wing. Failures are logged but never abort the mine.
- config: `topic_tunnel_min_count` knob (env
  `MEMPALACE_TOPIC_TUNNEL_MIN_COUNT` or `~/.mempalace/config.json`),
  default 1.

Tests cover topic persistence through init->mine, tunnel creation when
wings share a topic, no tunnel below threshold, cross-wing tunnel
retrieval via `list_tunnels`, dedup on recompute, case-insensitive
overlap, and the end-to-end mine-time wiring.

Out of scope for this PR (called out in the PR body): manifest-
dependency overlap, per-topic allow/deny lists, search-result surfacing.
This commit is contained in:
Igor Lins e Silva
2026-04-24 19:19:58 -03:00
parent ed2ba726c9
commit fe051adc73
14 changed files with 678 additions and 28 deletions
+138
View File
@@ -499,3 +499,141 @@ def follow_tunnels(wing: str, room: str, col=None, config=None):
pass
return connections
# =============================================================================
# TOPIC TUNNELS — auto-link wings that share confirmed TOPIC labels
# =============================================================================
# When two wings have one or more confirmed topics in common (e.g. both
# discuss "Angular" or "OpenAPI"), drop a symmetric tunnel between them.
# Topics come from the LLM-refined ``TOPIC`` bucket in the per-project
# ``entities.json`` and are persisted by wing in
# ``~/.mempalace/known_entities.json`` under ``topics_by_wing``.
#
# Tunnels are created via the existing ``create_tunnel`` API so they share
# storage and dedup with explicit tunnels. The room is the topic name —
# this matches the "two wings share an idea" mental model and keeps the
# graph homogeneous.
def _normalize_topic(name: str) -> str:
"""Lowercase + strip topics for case-insensitive overlap detection."""
return str(name).strip().lower()
def compute_topic_tunnels(
topics_by_wing: dict,
min_count: int = 1,
label_prefix: str = "shared topic",
) -> list[dict]:
"""Create tunnels for every pair of wings that share >= ``min_count`` topics.
Args:
topics_by_wing: ``{wing_name: [topic_name, ...]}`` mapping. Topic
names are compared case-insensitively; the first observed
casing is used for the tunnel room name.
min_count: minimum number of overlapping topics required to drop
any tunnel between a wing pair. ``1`` means a single shared
topic is enough; bumping to e.g. ``2`` requires multiple
overlaps and filters out coincidental single-topic links.
label_prefix: human-readable string prefixed to the tunnel label.
Returns:
List of tunnel dicts as returned by ``create_tunnel`` — one per
(wing_a, wing_b, topic) triple that crossed the threshold. A
wing-pair below ``min_count`` produces no tunnels at all (not
even for its single shared topic).
No-op semantics:
- empty/None ``topics_by_wing`` returns ``[]``.
- wings whose topic list is empty are skipped.
- ``min_count <= 0`` is clamped to 1.
"""
if not topics_by_wing:
return []
min_count = max(1, int(min_count))
# Build a normalized-topic -> first-seen casing map per wing so we
# preserve display casing while still doing case-insensitive overlap.
wing_topics: dict[str, dict[str, str]] = {}
for wing, names in topics_by_wing.items():
if not isinstance(wing, str) or not wing.strip():
continue
if not isinstance(names, (list, tuple)):
continue
bucket: dict[str, str] = {}
for n in names:
if not isinstance(n, str):
continue
key = _normalize_topic(n)
if not key:
continue
bucket.setdefault(key, n.strip())
if bucket:
wing_topics[wing.strip()] = bucket
wings = sorted(wing_topics.keys())
created: list[dict] = []
for i, wa in enumerate(wings):
topics_a = wing_topics[wa]
for wb in wings[i + 1 :]:
topics_b = wing_topics[wb]
shared_keys = set(topics_a.keys()) & set(topics_b.keys())
if len(shared_keys) < min_count:
continue
# Stable sort for deterministic tunnel ordering across runs.
for key in sorted(shared_keys):
# Prefer the casing from whichever wing sorts first — both
# are valid; this just keeps the displayed room consistent.
room = topics_a[key] if topics_a[key] else topics_b[key]
tunnel = create_tunnel(
source_wing=wa,
source_room=room,
target_wing=wb,
target_room=room,
label=f"{label_prefix}: {room}",
)
created.append(tunnel)
return created
def topic_tunnels_for_wing(
wing: str,
topics_by_wing: dict,
min_count: int = 1,
label_prefix: str = "shared topic",
) -> list[dict]:
"""Compute topic tunnels involving a single wing.
Used by the miner to incrementally update tunnels for the wing that
just finished mining without recomputing pairs that don't involve it.
Returns the list of tunnels created or refreshed.
"""
if not topics_by_wing or not isinstance(wing, str) or not wing.strip():
return []
wing = wing.strip()
own = topics_by_wing.get(wing)
if not isinstance(own, (list, tuple)) or not own:
return []
# Restrict the pair-wise computation to (wing, other) pairs only by
# building a 2-wing slice for each other wing. Reusing
# ``compute_topic_tunnels`` keeps the threshold and casing logic in
# one place.
created: list[dict] = []
for other, other_topics in topics_by_wing.items():
if not isinstance(other, str) or not other.strip() or other == wing:
continue
if not isinstance(other_topics, (list, tuple)) or not other_topics:
continue
slice_map = {wing: list(own), other: list(other_topics)}
created.extend(
compute_topic_tunnels(
slice_map,
min_count=min_count,
label_prefix=label_prefix,
)
)
return created