security: harden inputs, fix shell injection, optimize DB access

- Fix command injection in hook script (pass paths via sys.argv) - Add sanitize_name/sanitize_content validators in config.py - Add 10MB file size guard + symlink skip in miners - Fix SQLite connection leak in knowledge_graph.py (reuse connection) - Use `with conn:` for proper transaction handling - Consolidate shared palace operations into palace.py - Add write-ahead log for audit trail on writes/deletes - Add metadata cache with 30s TTL for status/taxonomy calls - Upgrade md5 → sha256 for drawer/triple IDs - Harden file permissions (0o700/0o600) - Pin chromadb>=0.5.0,<0.7 Based on PR #252 by @anthonyonazure with lint fixes applied. Co-Authored-By: anthonyonazure <anthonyonazure@users.noreply.github.com>
2026-04-09 08:06:30 -07:00
parent 963c04cf45
commit 1d19dfc9d5
8 changed files with 389 additions and 203 deletions
@@ -64,13 +64,20 @@ MEMPAL_DIR=""
 # Read JSON input from stdin
 INPUT=$(cat)

-# Parse fields from Claude Code's JSON
-SESSION_ID=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('session_id','unknown'))" 2>/dev/null)
-# Sanitize SESSION_ID to prevent path traversal (only allow alnum, dash, underscore)
-SESSION_ID=$(echo "$SESSION_ID" | tr -cd 'a-zA-Z0-9_-')
-[ -z "$SESSION_ID" ] && SESSION_ID="unknown"
-STOP_HOOK_ACTIVE=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('stop_hook_active', False))" 2>/dev/null)
-TRANSCRIPT_PATH=$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('transcript_path',''))" 2>/dev/null)
+# Parse all fields in a single Python call (3x faster than separate invocations)
+eval $(echo "$INPUT" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+sid = data.get('session_id', 'unknown')
+sha = data.get('stop_hook_active', False)
+tp = data.get('transcript_path', '')
+# Shell-safe output — only allow alphanumeric, underscore, hyphen, slash, dot, tilde
+import re
+safe = lambda s: re.sub(r'[^a-zA-Z0-9_/.\-~]', '', str(s))
+print(f'SESSION_ID=\"{safe(sid)}\"')
+print(f'STOP_HOOK_ACTIVE=\"{sha}\"')
+print(f'TRANSCRIPT_PATH=\"{safe(tp)}\"')
+" 2>/dev/null)

 # Expand ~ in path
 TRANSCRIPT_PATH="${TRANSCRIPT_PATH/#\~/$HOME}"
@@ -83,6 +90,7 @@ if [ "$STOP_HOOK_ACTIVE" = "True" ] || [ "$STOP_HOOK_ACTIVE" = "true" ]; then
 fi

 # Count human messages in the JSONL transcript
+# SECURITY: Pass transcript path as sys.argv to avoid shell injection via crafted paths
 if [ -f "$TRANSCRIPT_PATH" ]; then
    EXCHANGE_COUNT=$(python3 - "$TRANSCRIPT_PATH" <<'PYEOF'
 import json, sys
@@ -94,7 +102,6 @@ with open(sys.argv[1]) as f:
            msg = entry.get('message', {})
            if isinstance(msg, dict) and msg.get('role') == 'user':
                content = msg.get('content', '')
-                # Skip system/command messages — only count real human input
                if isinstance(content, str) and '<command-message>' in content:
                    continue
                count += 1