From 9b9daa9b4b99ca5c74a037d47ced6597f354169f Mon Sep 17 00:00:00 2001 From: ac-opensource Date: Tue, 7 Apr 2026 22:26:06 +0800 Subject: [PATCH 1/2] fix: respect .gitignore during project mining --- mempalace/miner.py | 129 +++++++++++++++++++++++++++++++++++++++++++- tests/test_miner.py | 48 ++++++++++++++++- 2 files changed, 175 insertions(+), 2 deletions(-) diff --git a/mempalace/miner.py b/mempalace/miner.py index ecd313d..b7ac628 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -10,6 +10,7 @@ Stores verbatim chunks as drawers. No summaries. Ever. import os import sys import hashlib +import fnmatch from pathlib import Path from datetime import datetime from collections import defaultdict @@ -58,6 +59,122 @@ CHUNK_OVERLAP = 100 # overlap between chunks MIN_CHUNK_SIZE = 50 # skip tiny chunks +# ============================================================================= +# IGNORE MATCHING +# ============================================================================= + + +class GitignoreMatcher: + """Lightweight matcher for a project's root .gitignore patterns.""" + + def __init__(self, rules: list): + self.rules = rules + self.has_negations = any(rule["negated"] for rule in rules) + + @classmethod + def from_project(cls, project_path: Path): + gitignore_path = project_path / ".gitignore" + if not gitignore_path.exists(): + return cls([]) + + try: + lines = gitignore_path.read_text(encoding="utf-8", errors="replace").splitlines() + except Exception: + return cls([]) + + rules = [] + for raw_line in lines: + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + negated = line.startswith("!") + if negated: + line = line[1:] + + anchored = line.startswith("/") + if anchored: + line = line.lstrip("/") + + dir_only = line.endswith("/") + if dir_only: + line = line.rstrip("/") + + if not line: + continue + + rules.append( + { + "pattern": line, + "anchored": anchored, + "dir_only": dir_only, + "negated": negated, + } + ) + + return cls(rules) + + def matches(self, path: Path, project_path: Path, is_dir: bool = None) -> bool: + if not self.rules: + return False + + try: + relative = path.relative_to(project_path).as_posix().strip("/") + except ValueError: + return False + + if not relative: + return False + + if is_dir is None: + is_dir = path.is_dir() + + ignored = False + for rule in self.rules: + if self._rule_matches(rule, relative, is_dir): + ignored = not rule["negated"] + return ignored + + def _rule_matches(self, rule: dict, relative: str, is_dir: bool) -> bool: + pattern = rule["pattern"] + parts = relative.split("/") + pattern_parts = pattern.split("/") + + if rule["dir_only"]: + target_parts = parts if is_dir else parts[:-1] + if not target_parts: + return False + if rule["anchored"] or len(pattern_parts) > 1: + return self._match_from_root(target_parts, pattern_parts) + return any(fnmatch.fnmatch(part, pattern) for part in target_parts) + + if rule["anchored"] or len(pattern_parts) > 1: + return self._match_from_root(parts, pattern_parts) + + return any(fnmatch.fnmatch(part, pattern) for part in parts) + + def _match_from_root(self, target_parts: list, pattern_parts: list) -> bool: + def matches(path_index: int, pattern_index: int) -> bool: + if pattern_index == len(pattern_parts): + return True + + if path_index == len(target_parts): + return all(part == "**" for part in pattern_parts[pattern_index:]) + + pattern_part = pattern_parts[pattern_index] + if pattern_part == "**": + return matches(path_index, pattern_index + 1) or matches( + path_index + 1, pattern_index + ) + + if not fnmatch.fnmatch(target_parts[path_index], pattern_part): + return False + + return matches(path_index + 1, pattern_index + 1) + + return matches(0, 0) + + # ============================================================================= # CONFIG # ============================================================================= @@ -287,11 +404,21 @@ def process_file( def scan_project(project_dir: str) -> list: """Return list of all readable file paths.""" project_path = Path(project_dir).expanduser().resolve() + gitignore_matcher = GitignoreMatcher.from_project(project_path) files = [] for root, dirs, filenames in os.walk(project_path): + root_path = Path(root) dirs[:] = [d for d in dirs if d not in SKIP_DIRS] + if not gitignore_matcher.has_negations: + dirs[:] = [ + d + for d in dirs + if not gitignore_matcher.matches(root_path / d, project_path, is_dir=True) + ] for filename in filenames: - filepath = Path(root) / filename + filepath = root_path / filename + if gitignore_matcher.matches(filepath, project_path, is_dir=False): + continue if filepath.suffix.lower() in READABLE_EXTENSIONS: # Skip config files if filename in ( diff --git a/tests/test_miner.py b/tests/test_miner.py index b4d0c3a..f5b3933 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -3,7 +3,9 @@ import tempfile import shutil import yaml import chromadb -from mempalace.miner import mine +from pathlib import Path + +from mempalace.miner import mine, scan_project def test_project_mining(): @@ -34,3 +36,47 @@ def test_project_mining(): assert col.count() > 0 shutil.rmtree(tmpdir) + + +def test_scan_project_respects_gitignore(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + os.makedirs(project_root / "src") + os.makedirs(project_root / "generated") + + (project_root / ".gitignore").write_text("ignored.py\ngenerated/\n", encoding="utf-8") + (project_root / "src" / "app.py").write_text("print('hello')\n" * 20, encoding="utf-8") + (project_root / "ignored.py").write_text("print('ignore me')\n" * 20, encoding="utf-8") + (project_root / "generated" / "artifact.py").write_text( + "print('ignore this dir')\n" * 20, + encoding="utf-8", + ) + + files = scan_project(str(project_root)) + relative_files = sorted(path.relative_to(project_root).as_posix() for path in files) + + assert relative_files == ["src/app.py"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_handles_gitignore_negation(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + os.makedirs(project_root / "generated") + + (project_root / ".gitignore").write_text( + "generated/\n!generated/keep.py\n", + encoding="utf-8", + ) + (project_root / "generated" / "drop.py").write_text("print('drop')\n" * 20, encoding="utf-8") + (project_root / "generated" / "keep.py").write_text("print('keep')\n" * 20, encoding="utf-8") + + files = scan_project(str(project_root)) + relative_files = sorted(path.relative_to(project_root).as_posix() for path in files) + + assert relative_files == ["generated/keep.py"] + finally: + shutil.rmtree(tmpdir) From c8c220d789a1d909c6593796e9f5d44fdb0f0c0a Mon Sep 17 00:00:00 2001 From: ac-opensource Date: Wed, 8 Apr 2026 00:02:21 +0800 Subject: [PATCH 2/2] fix: support nested .gitignore rules during mining --- mempalace/cli.py | 16 ++++ mempalace/miner.py | 197 +++++++++++++++++++++++++++++++------- tests/test_miner.py | 228 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 355 insertions(+), 86 deletions(-) diff --git a/mempalace/cli.py b/mempalace/cli.py index d0f097e..627da34 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -65,6 +65,9 @@ def cmd_init(args): def cmd_mine(args): palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path + include_ignored = [] + for raw in args.include_ignored or []: + include_ignored.extend(part.strip() for part in raw.split(",") if part.strip()) if args.mode == "convos": from .convo_miner import mine_convos @@ -88,6 +91,8 @@ def cmd_mine(args): agent=args.agent, limit=args.limit, dry_run=args.dry_run, + respect_gitignore=not args.no_gitignore, + include_ignored=include_ignored, ) @@ -288,6 +293,17 @@ def main(): help="Ingest mode: 'projects' for code/docs (default), 'convos' for chat exports", ) p_mine.add_argument("--wing", default=None, help="Wing name (default: directory name)") + p_mine.add_argument( + "--no-gitignore", + action="store_true", + help="Don't respect .gitignore files when scanning project files", + ) + p_mine.add_argument( + "--include-ignored", + action="append", + default=[], + help="Always scan these project-relative paths even if ignored; repeat or pass comma-separated paths", + ) p_mine.add_argument( "--agent", default="mempalace", diff --git a/mempalace/miner.py b/mempalace/miner.py index b7ac628..4d3ca76 100644 --- a/mempalace/miner.py +++ b/mempalace/miner.py @@ -52,6 +52,27 @@ SKIP_DIRS = { ".next", "coverage", ".mempalace", + ".ruff_cache", + ".mypy_cache", + ".pytest_cache", + ".cache", + ".tox", + ".nox", + ".idea", + ".vscode", + ".ipynb_checkpoints", + ".eggs", + "htmlcov", + "target", +} + +SKIP_FILENAMES = { + "mempalace.yaml", + "mempalace.yml", + "mempal.yaml", + "mempal.yml", + ".gitignore", + "package-lock.json", } CHUNK_SIZE = 800 # chars per drawer @@ -65,27 +86,32 @@ MIN_CHUNK_SIZE = 50 # skip tiny chunks class GitignoreMatcher: - """Lightweight matcher for a project's root .gitignore patterns.""" + """Lightweight matcher for one directory's .gitignore patterns.""" - def __init__(self, rules: list): + def __init__(self, base_dir: Path, rules: list): + self.base_dir = base_dir self.rules = rules - self.has_negations = any(rule["negated"] for rule in rules) @classmethod - def from_project(cls, project_path: Path): - gitignore_path = project_path / ".gitignore" - if not gitignore_path.exists(): - return cls([]) + def from_dir(cls, dir_path: Path): + gitignore_path = dir_path / ".gitignore" + if not gitignore_path.is_file(): + return None try: lines = gitignore_path.read_text(encoding="utf-8", errors="replace").splitlines() except Exception: - return cls([]) + return None rules = [] for raw_line in lines: line = raw_line.strip() - if not line or line.startswith("#"): + if not line: + continue + + if line.startswith("\\#") or line.startswith("\\!"): + line = line[1:] + elif line.startswith("#"): continue negated = line.startswith("!") @@ -112,24 +138,24 @@ class GitignoreMatcher: } ) - return cls(rules) + if not rules: + return None - def matches(self, path: Path, project_path: Path, is_dir: bool = None) -> bool: - if not self.rules: - return False + return cls(dir_path, rules) + def matches(self, path: Path, is_dir: bool = None): try: - relative = path.relative_to(project_path).as_posix().strip("/") + relative = path.relative_to(self.base_dir).as_posix().strip("/") except ValueError: - return False + return None if not relative: - return False + return None if is_dir is None: is_dir = path.is_dir() - ignored = False + ignored = None for rule in self.rules: if self._rule_matches(rule, relative, is_dir): ignored = not rule["negated"] @@ -175,6 +201,75 @@ class GitignoreMatcher: return matches(0, 0) +def load_gitignore_matcher(dir_path: Path, cache: dict): + """Load and cache one directory's .gitignore matcher.""" + if dir_path not in cache: + cache[dir_path] = GitignoreMatcher.from_dir(dir_path) + return cache[dir_path] + + +def is_gitignored(path: Path, matchers: list, is_dir: bool = False) -> bool: + """Apply active .gitignore matchers in ancestor order; last match wins.""" + ignored = False + for matcher in matchers: + decision = matcher.matches(path, is_dir=is_dir) + if decision is not None: + ignored = decision + return ignored + + +def should_skip_dir(dirname: str) -> bool: + """Skip known generated/cache directories before gitignore matching.""" + return dirname in SKIP_DIRS or dirname.endswith(".egg-info") + + +def normalize_include_paths(include_ignored: list) -> set: + """Normalize comma-parsed include paths into project-relative POSIX strings.""" + normalized = set() + for raw_path in include_ignored or []: + candidate = str(raw_path).strip().strip("/") + if candidate: + normalized.add(Path(candidate).as_posix()) + return normalized + + +def is_exact_force_include(path: Path, project_path: Path, include_paths: set) -> bool: + """Return True when a path exactly matches an explicit include override.""" + if not include_paths: + return False + + try: + relative = path.relative_to(project_path).as_posix().strip("/") + except ValueError: + return False + + return relative in include_paths + + +def is_force_included(path: Path, project_path: Path, include_paths: set) -> bool: + """Return True when a path or one of its ancestors/descendants was explicitly included.""" + if not include_paths: + return False + + try: + relative = path.relative_to(project_path).as_posix().strip("/") + except ValueError: + return False + + if not relative: + return False + + for include_path in include_paths: + if relative == include_path: + return True + if relative.startswith(f"{include_path}/"): + return True + if include_path.startswith(f"{relative}/"): + return True + + return False + + # ============================================================================= # CONFIG # ============================================================================= @@ -401,36 +496,58 @@ def process_file( # ============================================================================= -def scan_project(project_dir: str) -> list: +def scan_project( + project_dir: str, + respect_gitignore: bool = True, + include_ignored: list = None, +) -> list: """Return list of all readable file paths.""" project_path = Path(project_dir).expanduser().resolve() - gitignore_matcher = GitignoreMatcher.from_project(project_path) files = [] + active_matchers = [] + matcher_cache = {} + include_paths = normalize_include_paths(include_ignored) + for root, dirs, filenames in os.walk(project_path): root_path = Path(root) - dirs[:] = [d for d in dirs if d not in SKIP_DIRS] - if not gitignore_matcher.has_negations: + + if respect_gitignore: + active_matchers = [ + matcher + for matcher in active_matchers + if root_path == matcher.base_dir or matcher.base_dir in root_path.parents + ] + current_matcher = load_gitignore_matcher(root_path, matcher_cache) + if current_matcher is not None: + active_matchers.append(current_matcher) + + dirs[:] = [ + d + for d in dirs + if is_force_included(root_path / d, project_path, include_paths) + or not should_skip_dir(d) + ] + if respect_gitignore and active_matchers: dirs[:] = [ d for d in dirs - if not gitignore_matcher.matches(root_path / d, project_path, is_dir=True) + if is_force_included(root_path / d, project_path, include_paths) + or not is_gitignored(root_path / d, active_matchers, is_dir=True) ] + for filename in filenames: filepath = root_path / filename - if gitignore_matcher.matches(filepath, project_path, is_dir=False): + force_include = is_force_included(filepath, project_path, include_paths) + exact_force_include = is_exact_force_include(filepath, project_path, include_paths) + + if not force_include and filename in SKIP_FILENAMES: continue - if filepath.suffix.lower() in READABLE_EXTENSIONS: - # Skip config files - if filename in ( - "mempalace.yaml", - "mempalace.yml", - "mempal.yaml", - "mempal.yml", - ".gitignore", - "package-lock.json", - ): + if filepath.suffix.lower() not in READABLE_EXTENSIONS and not exact_force_include: + continue + if respect_gitignore and active_matchers and not force_include: + if is_gitignored(filepath, active_matchers, is_dir=False): continue - files.append(filepath) + files.append(filepath) return files @@ -446,6 +563,8 @@ def mine( agent: str = "mempalace", limit: int = 0, dry_run: bool = False, + respect_gitignore: bool = True, + include_ignored: list = None, ): """Mine a project directory into the palace.""" @@ -455,7 +574,11 @@ def mine( wing = wing_override or config["wing"] rooms = config.get("rooms", [{"name": "general", "description": "All project files"}]) - files = scan_project(project_dir) + files = scan_project( + project_dir, + respect_gitignore=respect_gitignore, + include_ignored=include_ignored, + ) if limit > 0: files = files[:limit] @@ -468,6 +591,10 @@ def mine( print(f" Palace: {palace_path}") if dry_run: print(" DRY RUN — nothing will be filed") + if not respect_gitignore: + print(" .gitignore: DISABLED") + if include_ignored: + print(f" Include: {', '.join(sorted(normalize_include_paths(include_ignored)))}") print(f"{'─' * 55}\n") if not dry_run: diff --git a/tests/test_miner.py b/tests/test_miner.py index f5b3933..337e949 100644 --- a/tests/test_miner.py +++ b/tests/test_miner.py @@ -1,82 +1,208 @@ import os -import tempfile import shutil -import yaml -import chromadb +import tempfile from pathlib import Path +import chromadb +import yaml + from mempalace.miner import mine, scan_project +def write_file(path: Path, content: str): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + + +def scanned_files(project_root: Path, **kwargs): + files = scan_project(str(project_root), **kwargs) + return sorted(path.relative_to(project_root).as_posix() for path in files) + + def test_project_mining(): tmpdir = tempfile.mkdtemp() - # Create a mini project - os.makedirs(os.path.join(tmpdir, "backend")) - with open(os.path.join(tmpdir, "backend", "app.py"), "w") as f: - f.write("def main():\n print('hello world')\n" * 20) - # Create config - with open(os.path.join(tmpdir, "mempalace.yaml"), "w") as f: - yaml.dump( - { - "wing": "test_project", - "rooms": [ - {"name": "backend", "description": "Backend code"}, - {"name": "general", "description": "General"}, - ], - }, - f, + try: + project_root = Path(tmpdir).resolve() + os.makedirs(project_root / "backend") + + write_file( + project_root / "backend" / "app.py", "def main():\n print('hello world')\n" * 20 ) + with open(project_root / "mempalace.yaml", "w") as f: + yaml.dump( + { + "wing": "test_project", + "rooms": [ + {"name": "backend", "description": "Backend code"}, + {"name": "general", "description": "General"}, + ], + }, + f, + ) - palace_path = os.path.join(tmpdir, "palace") - mine(tmpdir, palace_path) + palace_path = project_root / "palace" + mine(str(project_root), str(palace_path)) - # Verify - client = chromadb.PersistentClient(path=palace_path) - col = client.get_collection("mempalace_drawers") - assert col.count() > 0 - - shutil.rmtree(tmpdir) + client = chromadb.PersistentClient(path=str(palace_path)) + col = client.get_collection("mempalace_drawers") + assert col.count() > 0 + finally: + shutil.rmtree(tmpdir) def test_scan_project_respects_gitignore(): tmpdir = tempfile.mkdtemp() try: project_root = Path(tmpdir).resolve() - os.makedirs(project_root / "src") - os.makedirs(project_root / "generated") - (project_root / ".gitignore").write_text("ignored.py\ngenerated/\n", encoding="utf-8") - (project_root / "src" / "app.py").write_text("print('hello')\n" * 20, encoding="utf-8") - (project_root / "ignored.py").write_text("print('ignore me')\n" * 20, encoding="utf-8") - (project_root / "generated" / "artifact.py").write_text( - "print('ignore this dir')\n" * 20, - encoding="utf-8", - ) + write_file(project_root / ".gitignore", "ignored.py\ngenerated/\n") + write_file(project_root / "src" / "app.py", "print('hello')\n" * 20) + write_file(project_root / "ignored.py", "print('ignore me')\n" * 20) + write_file(project_root / "generated" / "artifact.py", "print('artifact')\n" * 20) - files = scan_project(str(project_root)) - relative_files = sorted(path.relative_to(project_root).as_posix() for path in files) - - assert relative_files == ["src/app.py"] + assert scanned_files(project_root) == ["src/app.py"] finally: shutil.rmtree(tmpdir) -def test_scan_project_handles_gitignore_negation(): +def test_scan_project_respects_nested_gitignore(): tmpdir = tempfile.mkdtemp() try: project_root = Path(tmpdir).resolve() - os.makedirs(project_root / "generated") - (project_root / ".gitignore").write_text( - "generated/\n!generated/keep.py\n", - encoding="utf-8", - ) - (project_root / "generated" / "drop.py").write_text("print('drop')\n" * 20, encoding="utf-8") - (project_root / "generated" / "keep.py").write_text("print('keep')\n" * 20, encoding="utf-8") + write_file(project_root / ".gitignore", "*.log\n") + write_file(project_root / "subrepo" / ".gitignore", "tasks/\n") + write_file(project_root / "subrepo" / "src" / "main.py", "print('main')\n" * 20) + write_file(project_root / "subrepo" / "tasks" / "task.py", "print('task')\n" * 20) + write_file(project_root / "subrepo" / "debug.log", "debug\n" * 20) - files = scan_project(str(project_root)) - relative_files = sorted(path.relative_to(project_root).as_posix() for path in files) - - assert relative_files == ["generated/keep.py"] + assert scanned_files(project_root) == ["subrepo/src/main.py"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_allows_nested_gitignore_override(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "*.csv\n") + write_file(project_root / "subrepo" / ".gitignore", "!keep.csv\n") + write_file(project_root / "drop.csv", "a,b,c\n" * 20) + write_file(project_root / "subrepo" / "keep.csv", "a,b,c\n" * 20) + + assert scanned_files(project_root) == ["subrepo/keep.csv"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_allows_gitignore_negation_when_parent_dir_is_visible(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "generated/*\n!generated/keep.py\n") + write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20) + write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20) + + assert scanned_files(project_root) == ["generated/keep.py"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_does_not_reinclude_file_from_ignored_directory(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "generated/\n!generated/keep.py\n") + write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20) + write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20) + + assert scanned_files(project_root) == [] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_can_disable_gitignore(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "data/\n") + write_file(project_root / "data" / "stuff.csv", "a,b,c\n" * 20) + + assert scanned_files(project_root, respect_gitignore=False) == ["data/stuff.csv"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_can_include_ignored_directory(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "docs/\n") + write_file(project_root / "docs" / "guide.md", "# Guide\n" * 20) + + assert scanned_files(project_root, include_ignored=["docs"]) == ["docs/guide.md"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_can_include_specific_ignored_file(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "generated/\n") + write_file(project_root / "generated" / "drop.py", "print('drop')\n" * 20) + write_file(project_root / "generated" / "keep.py", "print('keep')\n" * 20) + + assert scanned_files(project_root, include_ignored=["generated/keep.py"]) == [ + "generated/keep.py" + ] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_can_include_exact_file_without_known_extension(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".gitignore", "README\n") + write_file(project_root / "README", "hello\n" * 20) + + assert scanned_files(project_root, include_ignored=["README"]) == ["README"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_include_override_beats_skip_dirs(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20) + + assert scanned_files( + project_root, + respect_gitignore=False, + include_ignored=[".pytest_cache"], + ) == [".pytest_cache/cache.py"] + finally: + shutil.rmtree(tmpdir) + + +def test_scan_project_skip_dirs_still_apply_without_override(): + tmpdir = tempfile.mkdtemp() + try: + project_root = Path(tmpdir).resolve() + + write_file(project_root / ".pytest_cache" / "cache.py", "print('cache')\n" * 20) + write_file(project_root / "main.py", "print('main')\n" * 20) + + assert scanned_files(project_root, respect_gitignore=False) == ["main.py"] finally: shutil.rmtree(tmpdir)