From 4f36145c2e51aa53c705c67a3f692174230bbcf9 Mon Sep 17 00:00:00 2001 From: Arnold Wender Date: Sun, 26 Apr 2026 13:01:55 +0200 Subject: [PATCH 1/2] fix(entity_registry): atomic write to prevent partial corruption on crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EntityRegistry.save() called Path.write_text() directly, which truncates the target file and then writes — so a crash mid-write (power loss, OOM, filesystem-full mid-flush) leaves an empty or half-written entity_registry.json. The whole people/projects map is lost; the system falls back to an empty registry on next load. Switch to the standard atomic-write pattern: serialize to a sibling .tmp file in the same directory (so os.replace stays on one filesystem), fsync, chmod 0o600, then os.replace over the target. The replace is atomic on POSIX and Windows, so any crash leaves the previous registry intact instead of a truncated file. Tests cover: no leftover .tmp on success, and previous content preserved when os.replace itself raises mid-save. --- mempalace/entity_registry.py | 15 ++++++++++-- tests/test_entity_registry.py | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/mempalace/entity_registry.py b/mempalace/entity_registry.py index 78d8a8b..d77b6c1 100644 --- a/mempalace/entity_registry.py +++ b/mempalace/entity_registry.py @@ -16,6 +16,7 @@ Usage: """ import json +import os import re import urllib.request import urllib.parse @@ -320,11 +321,21 @@ class EntityRegistry: self._path.parent.chmod(0o700) except (OSError, NotImplementedError): pass - self._path.write_text(json.dumps(self._data, indent=2), encoding="utf-8") + # Atomic write: serialize to a sibling temp file in the same dir + # (so os.replace stays on one filesystem), fsync, then rename over + # the target. A crash mid-write leaves the previous registry intact + # instead of a half-written file or an empty file from the truncate. + payload = json.dumps(self._data, indent=2) + tmp_path = self._path.with_name(self._path.name + ".tmp") + with open(tmp_path, "w", encoding="utf-8") as f: + f.write(payload) + f.flush() + os.fsync(f.fileno()) try: - self._path.chmod(0o600) + tmp_path.chmod(0o600) except (OSError, NotImplementedError): pass + os.replace(tmp_path, self._path) @staticmethod def _empty() -> dict: diff --git a/tests/test_entity_registry.py b/tests/test_entity_registry.py index c857a07..a5f237c 100644 --- a/tests/test_entity_registry.py +++ b/tests/test_entity_registry.py @@ -2,6 +2,8 @@ from unittest.mock import patch +import pytest + from mempalace.entity_registry import ( COMMON_ENGLISH_WORDS, PERSON_CONTEXT_PATTERNS, @@ -71,6 +73,50 @@ def test_save_creates_file(tmp_path): assert (tmp_path / "entity_registry.json").exists() +def test_save_is_atomic_does_not_leave_tmp(tmp_path): + # Atomic write must not leave the .tmp sidecar file after a successful save. + registry = EntityRegistry.load(config_dir=tmp_path) + registry.save() + leftover = list(tmp_path.glob("entity_registry.json.tmp*")) + assert leftover == [], f"atomic write leaked tmp file(s): {leftover}" + + +def test_save_preserves_previous_on_serialization_failure(tmp_path, monkeypatch): + # If serialization fails mid-write, the previous registry must remain + # intact — this is the whole point of atomic write vs truncating in place. + registry = EntityRegistry.load(config_dir=tmp_path) + registry.seed( + mode="personal", + people=[{"name": "Alice", "relationship": "friend", "context": "personal"}], + projects=[], + ) + registry.save() + target = tmp_path / "entity_registry.json" + original = target.read_text(encoding="utf-8") + + # Force os.replace to raise — simulates filesystem full / permission flip + # AFTER the temp file is written but BEFORE the rename completes. + import os as _os + + real_replace = _os.replace + + def boom(src, dst): + raise OSError("simulated rename failure") + + monkeypatch.setattr(_os, "replace", boom) + with pytest.raises(OSError): + registry.seed( + mode="personal", + people=[{"name": "Bob", "relationship": "friend", "context": "personal"}], + projects=[], + ) + registry.save() + + # Restore os.replace before reading so the assertion can rely on it. + monkeypatch.setattr(_os, "replace", real_replace) + assert target.read_text(encoding="utf-8") == original + + # ── seed ──────────────────────────────────────────────────────────────── From 2e441d17a22f9e14dcb9d8576551ea867e56cda8 Mon Sep 17 00:00:00 2001 From: Arnold Wender Date: Tue, 28 Apr 2026 09:43:27 +0200 Subject: [PATCH 2/2] fix(entity_registry): fsync parent dir after rename for ext4 durability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, on ext4 (and similar) filesystems the rename ack does not guarantee durability across power loss — a crash can revert to a state where the temp file is present and the target is at the old version. Suggested by @jphein on #1215. --- mempalace/entity_registry.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mempalace/entity_registry.py b/mempalace/entity_registry.py index d77b6c1..c8ac517 100644 --- a/mempalace/entity_registry.py +++ b/mempalace/entity_registry.py @@ -336,6 +336,20 @@ class EntityRegistry: except (OSError, NotImplementedError): pass os.replace(tmp_path, self._path) + # On ext4 (and similar) the rename's durability across power loss + # requires an additional fsync on the parent directory. Without it, + # the kernel can ack the rename and a crash reverts to the state + # where the temp file is present and the target is at the old version. + try: + dir_fd = os.open(str(self._path.parent), os.O_RDONLY) + try: + os.fsync(dir_fd) + finally: + os.close(dir_fd) + except OSError: + # Windows and some special filesystems reject directory fds — they + # have different durability semantics on rename anyway. + pass @staticmethod def _empty() -> dict: