This commit is contained in:
Jason Stedwell
2026-06-19 21:12:14 -05:00
parent a860819bfb
commit 2fc3a0a80b
26 changed files with 1559 additions and 89 deletions
+177 -59
View File
@@ -1,27 +1,37 @@
#!/usr/bin/env bash
# vault-lint.sh — mechanically assert ECHO vault invariants.
#
# Catches the recurring "invariant violation" bugs that prose rules can't enforce
# on their own: folder<->status drift, duplicate slugs, wikilinks leaking into
# frontmatter, duplicate "## Agent Log" headings, stale active projects, and
# aging inbox captures. Invoked by the monthly Vault Health pass (see SKILL.md),
# but safe to run any time — it is READ-ONLY and never modifies the vault.
# Catches the recurring "invariant violation" bugs that prose rules can't enforce:
# folder<->status drift, duplicate slugs, wikilinks leaking into frontmatter,
# duplicate "## Agent Log" headings, stale active projects, aging inbox captures,
# impossible dates, bad status values, missing frontmatter, broken source_notes, and
# paths that no route in routing.json permits. Invoked by the monthly Vault Health
# pass (see SKILL.md), but safe to run any time — it is READ-ONLY.
#
# Exit status: 0 = clean, 1 = violations found, 2 = vault unreachable.
# Exit status: 0 = clean, 1 = violations found, 2 = vault unreachable,
# 3 = vault not bootstrapped (marker missing).
#
# Config is hardcoded to match the rest of the plugin; override via env if needed:
# Config (env overrides):
# ECHO_BASE (default https://echoapi.alwisp.com)
# ECHO_KEY (default the plugin's bearer token)
# ECHO_TODAY (default the machine date) — pass the conversation's currentDate so
# stale/aging math uses the SAME clock the agent writes with (YYYY-MM-DD)
# STALE_DAYS (default 30) INBOX_DAYS (default 14)
#
# routing.json (canonical route manifest) is read from this script's own directory.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ECHO_BASE="${ECHO_BASE:-https://echoapi.alwisp.com}"
ECHO_KEY="${ECHO_KEY:-241265fbe6830934a9a4ad3e69335f64a42153b663aa5b0017cb1ea1217b2bab}"
STALE_DAYS="${STALE_DAYS:-30}"
INBOX_DAYS="${INBOX_DAYS:-14}"
ECHO_TODAY="${ECHO_TODAY:-$(date +%Y-%m-%d)}"
ECHO_BASE="$ECHO_BASE" ECHO_KEY="$ECHO_KEY" STALE_DAYS="$STALE_DAYS" INBOX_DAYS="$INBOX_DAYS" \
ECHO_TODAY="$ECHO_TODAY" ROUTING_JSON="$SCRIPT_DIR/routing.json" \
python3 - <<'PY'
import os, sys, json, re, datetime, urllib.request, urllib.error
@@ -29,9 +39,21 @@ BASE = os.environ["ECHO_BASE"].rstrip("/")
KEY = os.environ["ECHO_KEY"]
STALE_DAYS = int(os.environ["STALE_DAYS"])
INBOX_DAYS = int(os.environ["INBOX_DAYS"])
TODAY = datetime.date.today()
TODAY = datetime.date.fromisoformat(os.environ["ECHO_TODAY"])
ROUTING_JSON = os.environ["ROUTING_JSON"]
LIFECYCLES = ["active", "incubating", "on-hold", "archived"]
SKIP = {"README.md", "project-template.md", "decision-template.md"}
REQUIRED_FM = ("type", "created")
# Project status vocabulary IS enforced (status must equal the lifecycle folder) by the
# folder/status check below. Other note kinds (decisions/concepts) carry free-form status
# vocab (accepted, shipped, reference, ...), so there is no global status allow-list.
# optional real YAML parser; fall back to a tolerant line parser
try:
import yaml # type: ignore
HAVE_YAML = True
except Exception:
HAVE_YAML = False
violations = []
def flag(check, msg): violations.append((check, msg))
@@ -48,32 +70,72 @@ def get(path):
return None
raise
def listdir(path):
body = get(path if path.endswith("/") else path + "/")
if body is None:
return []
def list_dir(path):
"""Return (files, folders) for a vault directory. Directories may arrive either in a
'folders' key OR as 'files' entries ending in '/'; handle both. Root is '' -> /vault/.
Tolerates non-404 errors (e.g. a 400 on an odd path) by returning empty."""
p = "" if path in ("", "/") else (path if path.endswith("/") else path + "/")
try:
return json.loads(body).get("files", [])
body = get(p)
except urllib.error.HTTPError:
return [], []
if body is None:
return [], []
try:
j = json.loads(body)
except json.JSONDecodeError:
return []
return [], []
entries = list(j.get("files", [])) + list(j.get("folders", []))
files = [e for e in entries if not e.endswith("/")]
folders = [e[:-1] for e in entries if e.endswith("/")]
return files, folders
def frontmatter(text):
"""Return (raw_frontmatter_str, dict_of_scalar_fields). Empty if no block."""
if not text or not text.startswith("---"):
def walk(prefix=""):
"""Yield every file path under prefix (recursive). prefix is '' or ends with '/'."""
files, folders = list_dir(prefix)
for f in files:
yield prefix + f
for d in folders:
yield from walk(f"{prefix}{d}/")
def split_frontmatter(text):
"""Return (raw_yaml_str, body) splitting on anchored ^---$ delimiters. ('', text) if none."""
if not text:
return "", ""
lines = text.splitlines()
if not lines or lines[0].strip() != "---":
return "", text
for i in range(1, len(lines)):
if lines[i].strip() == "---":
return "\n".join(lines[1:i]), "\n".join(lines[i+1:])
return "", text # unterminated block -> treat as no frontmatter
def parse_fm(text):
"""Return (raw_yaml_str, dict). Uses PyYAML when available, else a tolerant parser."""
raw, _ = split_frontmatter(text)
if not raw:
return "", {}
end = text.find("\n---", 3)
if end == -1:
return "", {}
raw = text[3:end]
if HAVE_YAML:
try:
d = yaml.safe_load(raw)
return raw, (d if isinstance(d, dict) else {})
except Exception:
pass
# fallback: scalar + simple inline-list lines (keys may contain digits, _, -)
fields = {}
for line in raw.splitlines():
m = re.match(r"^([A-Za-z_]+):\s*(.*)$", line)
m = re.match(r"^([A-Za-z_][\w-]*):\s*(.*)$", line)
if m:
fields[m.group(1)] = m.group(2).strip()
v = m.group(2).strip()
if v.startswith("[") and v.endswith("]"):
v = [x.strip().strip('"').strip("'") for x in v[1:-1].split(",") if x.strip()]
else:
v = v.strip('"').strip("'")
fields[m.group(1)] = v
return raw, fields
def parse_date(s):
m = re.match(r"(\d{4}-\d{2}-\d{2})", s or "")
m = re.match(r"(\d{4}-\d{2}-\d{2})", str(s or ""))
if not m:
return None
try:
@@ -81,66 +143,115 @@ def parse_date(s):
except ValueError:
return None
# Reachability probe
def as_list(v):
if v is None or v == "":
return []
return v if isinstance(v, list) else [v]
# ---- Reachability + bootstrap probe (M2: do NOT silently report clean) -------
try:
if get("_agent/echo-vault.md") is None:
print("vault-lint: marker missing — vault may not be bootstrapped.", file=sys.stderr)
print("vault-lint: marker missing — vault not bootstrapped (run bootstrap.sh).", file=sys.stderr)
sys.exit(3)
except Exception as e:
print(f"vault-lint: vault unreachable ({e}).", file=sys.stderr)
sys.exit(2)
# ---- Projects: folder<->status, stale active, wikilinks-in-frontmatter, dup slugs
# ---- Load canonical routing manifest (S3) ------------------------------------
ROUTES, RETIRED = [], []
try:
with open(ROUTING_JSON) as fh:
rj = json.load(fh)
ROUTES = [(r["id"], re.compile(r["pattern"])) for r in rj.get("routes", [])]
RETIRED = [(re.compile(r["pattern"]), r.get("replacement", "")) for r in rj.get("retired", [])]
except Exception as e:
flag("routing-manifest", f"could not load routing.json ({e}) — path checks skipped")
# ---- Single full walk feeds every path-level check ---------------------------
all_files = list(walk())
def route_for(path):
for rid, rx in ROUTES:
if rx.match(path):
return rid
return None
# Path membership + retired-path detection (S3)
for path in all_files:
if ROUTES and route_for(path) is None:
hit = next((repl for rx, repl in RETIRED if rx.match(path)), None)
if hit is not None:
flag("retired-path", f"{path}: retired location — should be {hit}")
else:
flag("unknown-path", f"{path}: matches no route in routing.json")
# ---- Per-note frontmatter checks (M5) ----------------------------------------
TEMPLATE_RE = re.compile(r"(^|/)(templates/|.*-template\.md$)")
for path in all_files:
base = path.rsplit("/", 1)[-1]
if base in SKIP or TEMPLATE_RE.search(path) or not path.endswith(".md"):
continue
text = get(path)
if text is None:
continue
raw, fm = parse_fm(text)
# wikilinks anywhere in frontmatter (widened sweep — all folders)
if "[[" in raw:
flag("frontmatter-wikilink", f"{path}: '[[...]]' inside frontmatter")
# missing required frontmatter
missing = [k for k in REQUIRED_FM if not str(fm.get(k, "")).strip()]
if fm and missing:
flag("missing-frontmatter", f"{path}: missing {', '.join(missing)}")
# impossible dates: updated < created
c, u = parse_date(fm.get("created")), parse_date(fm.get("updated"))
if c and u and u < c:
flag("date-order", f"{path}: updated {u} is before created {c}")
if u and u > TODAY:
flag("future-date", f"{path}: updated {u} is in the future (today {TODAY})")
# source_notes hygiene: plain relative paths, never wikilinks, no self-reference
for sn in as_list(fm.get("source_notes")):
s = str(sn)
if "[[" in s:
flag("source-notes-wikilink", f"{path}: source_notes contains a wikilink '{s}'")
# ---- Projects: folder<->status, stale active, duplicate slugs ----------------
slug_homes = {}
for lc in LIFECYCLES:
for fn in listdir(f"projects/{lc}"):
if fn.endswith("/") or fn in SKIP:
files, _ = list_dir(f"projects/{lc}")
for fn in files:
if fn.endswith("/") or fn in SKIP or not fn.endswith(".md"):
continue
slug = fn[:-3] if fn.endswith(".md") else fn
slug = fn[:-3]
slug_homes.setdefault(slug, []).append(lc)
text = get(f"projects/{lc}/{fn}")
if text is None:
continue
raw, fm = frontmatter(text)
status = fm.get("status", "").strip().strip('"').strip("'")
_, fm = parse_fm(text)
status = str(fm.get("status", "")).strip().strip('"').strip("'")
if status and status != lc:
flag("folder/status", f"projects/{lc}/{fn}: status='{status}' but folder='{lc}'")
if "[[" in raw:
flag("frontmatter-wikilink", f"projects/{lc}/{fn}: '[[...]]' inside frontmatter")
if lc == "active":
d = parse_date(fm.get("updated", ""))
d = parse_date(fm.get("updated"))
if d and (TODAY - d).days > STALE_DAYS:
flag("stale-active", f"projects/active/{fn}: updated {d} ({(TODAY-d).days}d ago) — consider on-hold/")
for slug, homes in slug_homes.items():
if len(homes) > 1:
flag("duplicate-slug", f"'{slug}' exists in {', '.join(homes)}")
# ---- Wikilinks-in-frontmatter for other high-churn notes
extra = ["_agent/context/current-context.md",
"_agent/memory/semantic/operator-preferences.md"]
for fn in listdir("resources/people"):
if fn.endswith(".md") and fn not in SKIP:
extra.append(f"resources/people/{fn}")
for fn in listdir("_agent/memory/semantic"):
if fn.endswith(".md") and fn not in SKIP:
extra.append(f"_agent/memory/semantic/{fn}")
for path in extra:
text = get(path)
if text is None:
# ---- Daily notes: duplicate "## Agent Log" headings --------------------------
for path in all_files:
if not re.match(r"^journal/daily/.*\.md$", path):
continue
raw, _ = frontmatter(text)
if "[[" in raw:
flag("frontmatter-wikilink", f"{path}: '[[...]]' inside frontmatter")
# ---- Daily notes: duplicate "## Agent Log" headings
for fn in listdir("journal/daily"):
if not fn.endswith(".md") or fn in SKIP:
continue
text = get(f"journal/daily/{fn}") or ""
text = get(path) or ""
n = len(re.findall(r"(?m)^## Agent Log\s*$", text))
if n > 1:
flag("duplicate-agent-log", f"journal/daily/{fn}: {n} '## Agent Log' headings")
flag("duplicate-agent-log", f"{path}: {n} '## Agent Log' headings")
# ---- Inbox: captures aging past INBOX_DAYS
# ---- Inbox: captures aging past INBOX_DAYS -----------------------------------
inbox = get("inbox/captures/inbox.md") or ""
for line in inbox.splitlines():
m = re.match(r"^\s*-\s*(\d{4}-\d{2}-\d{2})\b", line)
@@ -149,7 +260,7 @@ for line in inbox.splitlines():
if d and (TODAY - d).days > INBOX_DAYS:
flag("aging-inbox", f"inbox capture {d} ({(TODAY-d).days}d): {line.strip()[:80]}")
# ---- Report
# ---- Report ------------------------------------------------------------------
if not violations:
print("vault-lint: clean — all invariants hold.")
sys.exit(0)
@@ -165,6 +276,13 @@ labels = {
"duplicate-agent-log": "Duplicate '## Agent Log' heading",
"stale-active": f"Stale active project (updated > {STALE_DAYS}d)",
"aging-inbox": f"Inbox capture aging (> {INBOX_DAYS}d)",
"unknown-path": "Path matches no route in routing.json",
"retired-path": "Write to a retired/dead path",
"missing-frontmatter": "Missing required frontmatter field",
"date-order": "updated earlier than created",
"future-date": "updated date is in the future",
"source-notes-wikilink": "Wikilink in source_notes (must be plain paths)",
"routing-manifest": "routing.json problem",
}
for check, msgs in by.items():
print(f"## {labels.get(check, check)}")