Files
2026-06-19 23:03:06 -05:00

319 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
# vault-lint.sh — mechanically assert goldbrain vault invariants.
#
# Catches the recurring "invariant violation" bugs that prose rules can't enforce:
# folder<->status drift, duplicate slugs, wikilinks leaking into frontmatter,
# duplicate "## Agent Log" headings, stale active projects, aging inbox captures,
# impossible dates, bad status values, missing frontmatter, broken source_notes, and
# paths that no route in routing.json permits. Invoked by the monthly Vault Health
# pass (see SKILL.md), but safe to run any time — it is READ-ONLY.
#
# Exit status: 0 = clean, 1 = violations found, 2 = vault unreachable,
# 3 = vault not bootstrapped (marker missing).
#
# Config (env overrides):
# GB_BASE (default https://goldbrainapi.mpm.to)
# GB_KEY (default the plugin's bearer token)
# GB_TODAY (default the machine date) — pass the conversation's currentDate so
# stale/aging math uses the SAME clock the agent writes with (YYYY-MM-DD)
# STALE_DAYS (default 30) INBOX_DAYS (default 14)
#
# routing.json (canonical route manifest) is read from this script's own directory.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
GB_BASE="${GB_BASE:-https://goldbrainapi.mpm.to}"
GB_KEY="${GB_KEY:-fb72065a05fabb28ae87c45880cc3b7aba4fd3f58e70297934145cef974e8ed8}"
STALE_DAYS="${STALE_DAYS:-30}"
INBOX_DAYS="${INBOX_DAYS:-14}"
SCOPE_STALE_SESSIONS="${SCOPE_STALE_SESSIONS:-3}"
GB_TODAY="${GB_TODAY:-$(date +%Y-%m-%d)}"
GB_BASE="$GB_BASE" GB_KEY="$GB_KEY" STALE_DAYS="$STALE_DAYS" INBOX_DAYS="$INBOX_DAYS" \
SCOPE_STALE_SESSIONS="$SCOPE_STALE_SESSIONS" \
GB_TODAY="$GB_TODAY" ROUTING_JSON="$SCRIPT_DIR/routing.json" \
python3 - <<'PY'
import os, sys, json, re, datetime, urllib.request, urllib.error
BASE = os.environ["GB_BASE"].rstrip("/")
KEY = os.environ["GB_KEY"]
STALE_DAYS = int(os.environ["STALE_DAYS"])
INBOX_DAYS = int(os.environ["INBOX_DAYS"])
SCOPE_STALE_SESSIONS = int(os.environ["SCOPE_STALE_SESSIONS"])
TODAY = datetime.date.fromisoformat(os.environ["GB_TODAY"])
ROUTING_JSON = os.environ["ROUTING_JSON"]
LIFECYCLES = ["active", "incubating", "on-hold", "archived"]
SKIP = {"README.md", "project-template.md", "decision-template.md"}
REQUIRED_FM = ("type", "created")
# Project status vocabulary IS enforced (status must equal the lifecycle folder) by the
# folder/status check below. Other note kinds (decisions/concepts) carry free-form status
# vocab (accepted, shipped, reference, ...), so there is no global status allow-list.
# optional real YAML parser; fall back to a tolerant line parser
try:
import yaml # type: ignore
HAVE_YAML = True
except Exception:
HAVE_YAML = False
violations = []
def flag(check, msg): violations.append((check, msg))
def get(path):
"""GET /vault/<path>. Returns text, or None on 404. Raises on hard failure."""
req = urllib.request.Request(f"{BASE}/vault/{path}",
headers={"Authorization": f"Bearer {KEY}"})
try:
with urllib.request.urlopen(req, timeout=20) as r:
return r.read().decode("utf-8", "replace")
except urllib.error.HTTPError as e:
if e.code == 404:
return None
raise
def list_dir(path):
"""Return (files, folders) for a vault directory. Directories may arrive either in a
'folders' key OR as 'files' entries ending in '/'; handle both. Root is '' -> /vault/.
Tolerates non-404 errors (e.g. a 400 on an odd path) by returning empty."""
p = "" if path in ("", "/") else (path if path.endswith("/") else path + "/")
try:
body = get(p)
except urllib.error.HTTPError:
return [], []
if body is None:
return [], []
try:
j = json.loads(body)
except json.JSONDecodeError:
return [], []
entries = list(j.get("files", [])) + list(j.get("folders", []))
files = [e for e in entries if not e.endswith("/")]
folders = [e[:-1] for e in entries if e.endswith("/")]
return files, folders
def walk(prefix=""):
"""Yield every file path under prefix (recursive). prefix is '' or ends with '/'."""
files, folders = list_dir(prefix)
for f in files:
yield prefix + f
for d in folders:
yield from walk(f"{prefix}{d}/")
def split_frontmatter(text):
"""Return (raw_yaml_str, body) splitting on anchored ^---$ delimiters. ('', text) if none."""
if not text:
return "", ""
lines = text.splitlines()
if not lines or lines[0].strip() != "---":
return "", text
for i in range(1, len(lines)):
if lines[i].strip() == "---":
return "\n".join(lines[1:i]), "\n".join(lines[i+1:])
return "", text # unterminated block -> treat as no frontmatter
def parse_fm(text):
"""Return (raw_yaml_str, dict). Uses PyYAML when available, else a tolerant parser."""
raw, _ = split_frontmatter(text)
if not raw:
return "", {}
if HAVE_YAML:
try:
d = yaml.safe_load(raw)
return raw, (d if isinstance(d, dict) else {})
except Exception:
pass
# fallback: scalar + simple inline-list lines (keys may contain digits, _, -)
fields = {}
for line in raw.splitlines():
m = re.match(r"^([A-Za-z_][\w-]*):\s*(.*)$", line)
if m:
v = m.group(2).strip()
if v.startswith("[") and v.endswith("]"):
v = [x.strip().strip('"').strip("'") for x in v[1:-1].split(",") if x.strip()]
else:
v = v.strip('"').strip("'")
fields[m.group(1)] = v
return raw, fields
def parse_date(s):
m = re.match(r"(\d{4}-\d{2}-\d{2})", str(s or ""))
if not m:
return None
try:
return datetime.date.fromisoformat(m.group(1))
except ValueError:
return None
def as_list(v):
if v is None or v == "":
return []
return v if isinstance(v, list) else [v]
# ---- Reachability + bootstrap probe (M2: do NOT silently report clean) -------
try:
if get("_agent/goldbrain-vault.md") is None:
print("vault-lint: marker missing — vault not bootstrapped (run bootstrap.sh).", file=sys.stderr)
sys.exit(3)
except Exception as e:
print(f"vault-lint: vault unreachable ({e}).", file=sys.stderr)
sys.exit(2)
# ---- Load canonical routing manifest (S3) ------------------------------------
ROUTES, RETIRED = [], []
try:
with open(ROUTING_JSON) as fh:
rj = json.load(fh)
ROUTES = [(r["id"], re.compile(r["pattern"])) for r in rj.get("routes", [])]
RETIRED = [(re.compile(r["pattern"]), r.get("replacement", "")) for r in rj.get("retired", [])]
except Exception as e:
flag("routing-manifest", f"could not load routing.json ({e}) — path checks skipped")
# ---- Single full walk feeds every path-level check ---------------------------
all_files = list(walk())
def route_for(path):
for rid, rx in ROUTES:
if rx.match(path):
return rid
return None
# Path membership + retired-path detection (S3)
for path in all_files:
if ROUTES and route_for(path) is None:
hit = next((repl for rx, repl in RETIRED if rx.match(path)), None)
if hit is not None:
flag("retired-path", f"{path}: retired location — should be {hit}")
else:
flag("unknown-path", f"{path}: matches no route in routing.json")
# ---- Per-note frontmatter checks (M5) ----------------------------------------
TEMPLATE_RE = re.compile(r"(^|/)(templates/|.*-template\.md$)")
for path in all_files:
base = path.rsplit("/", 1)[-1]
if base in SKIP or TEMPLATE_RE.search(path) or not path.endswith(".md"):
continue
text = get(path)
if text is None:
continue
raw, fm = parse_fm(text)
# wikilinks anywhere in frontmatter (widened sweep — all folders)
if "[[" in raw:
flag("frontmatter-wikilink", f"{path}: '[[...]]' inside frontmatter")
# missing required frontmatter
missing = [k for k in REQUIRED_FM if not str(fm.get(k, "")).strip()]
if fm and missing:
flag("missing-frontmatter", f"{path}: missing {', '.join(missing)}")
# impossible dates: updated < created
c, u = parse_date(fm.get("created")), parse_date(fm.get("updated"))
if c and u and u < c:
flag("date-order", f"{path}: updated {u} is before created {c}")
if u and u > TODAY:
flag("future-date", f"{path}: updated {u} is in the future (today {TODAY})")
# source_notes hygiene: plain relative paths, never wikilinks, no self-reference
for sn in as_list(fm.get("source_notes")):
s = str(sn)
if "[[" in s:
flag("source-notes-wikilink", f"{path}: source_notes contains a wikilink '{s}'")
# ---- Projects: folder<->status, stale active, duplicate slugs ----------------
slug_homes = {}
for lc in LIFECYCLES:
files, _ = list_dir(f"projects/{lc}")
for fn in files:
if fn.endswith("/") or fn in SKIP or not fn.endswith(".md"):
continue
slug = fn[:-3]
slug_homes.setdefault(slug, []).append(lc)
text = get(f"projects/{lc}/{fn}")
if text is None:
continue
_, fm = parse_fm(text)
status = str(fm.get("status", "")).strip().strip('"').strip("'")
if status and status != lc:
flag("folder/status", f"projects/{lc}/{fn}: status='{status}' but folder='{lc}'")
if lc == "active":
d = parse_date(fm.get("updated"))
if d and (TODAY - d).days > STALE_DAYS:
flag("stale-active", f"projects/active/{fn}: updated {d} ({(TODAY-d).days}d ago) — consider on-hold/")
for slug, homes in slug_homes.items():
if len(homes) > 1:
flag("duplicate-slug", f"'{slug}' exists in {', '.join(homes)}")
# ---- Daily notes: duplicate "## Agent Log" headings --------------------------
for path in all_files:
if not re.match(r"^journal/daily/.*\.md$", path):
continue
text = get(path) or ""
n = len(re.findall(r"(?m)^## Agent Log\s*$", text))
if n > 1:
flag("duplicate-agent-log", f"{path}: {n} '## Agent Log' headings")
# ---- Inbox: captures aging past INBOX_DAYS -----------------------------------
inbox = get("inbox/captures/inbox.md") or ""
for line in inbox.splitlines():
m = re.match(r"^\s*-\s*(\d{4}-\d{2}-\d{2})\b", line)
if m:
d = parse_date(m.group(1))
if d and (TODAY - d).days > INBOX_DAYS:
flag("aging-inbox", f"inbox capture {d} ({(TODAY-d).days}d): {line.strip()[:80]}")
# ---- Scope freshness (drift detector) ----------------------------------------
# Scope is the most churn-prone state (Bryan runs several sessions/day across topics).
# It has no natural staleness signal, so drift is otherwise invisible. Rule: if N+ session
# logs are dated AFTER current-context's scope_updated, the recorded scope may no longer
# reflect current work — surface it for a human glance (advisory, like every health finding).
cc = get("_agent/context/current-context.md")
if cc is not None:
_, ccfm = parse_fm(cc)
su = parse_date(ccfm.get("scope_updated"))
if su is None:
flag("scope-no-timestamp",
"_agent/context/current-context.md: no scope_updated frontmatter — scope drift cannot be detected; add it (bootstrap.sh repair) and switch scope via `goldbrain.sh scope set`")
else:
since = [p for p in all_files
if (m := re.match(r"^_agent/sessions/(\d{4}-\d{2}-\d{2})", p))
and (d := parse_date(m.group(1))) and d > su]
if len(since) >= SCOPE_STALE_SESSIONS:
flag("scope-stale",
f"scope set {su}; {len(since)} session(s) logged since without a switch — confirm it still reflects current work (or run `goldbrain.sh scope set`)")
# ---- Report ------------------------------------------------------------------
if not violations:
print("vault-lint: clean — all invariants hold.")
sys.exit(0)
print(f"vault-lint: {len(violations)} violation(s) found\n")
by = {}
for check, msg in violations:
by.setdefault(check, []).append(msg)
labels = {
"folder/status": "Folder <-> status mismatch",
"duplicate-slug": "Duplicate slug across lifecycle folders",
"frontmatter-wikilink": "Wikilink in frontmatter (breaks reading view)",
"duplicate-agent-log": "Duplicate '## Agent Log' heading",
"stale-active": f"Stale active project (updated > {STALE_DAYS}d)",
"aging-inbox": f"Inbox capture aging (> {INBOX_DAYS}d)",
"unknown-path": "Path matches no route in routing.json",
"retired-path": "Write to a retired/dead path",
"missing-frontmatter": "Missing required frontmatter field",
"date-order": "updated earlier than created",
"future-date": "updated date is in the future",
"source-notes-wikilink": "Wikilink in source_notes (must be plain paths)",
"routing-manifest": "routing.json problem",
"scope-no-timestamp": "current-context has no scope_updated (drift undetectable)",
"scope-stale": f"Scope may have drifted (>= {SCOPE_STALE_SESSIONS} sessions since last switch)",
}
for check, msgs in by.items():
print(f"## {labels.get(check, check)}")
for m in msgs:
print(f" - {m}")
print()
sys.exit(1)
PY