Merge pull request #895 from MemPalace/bench/v3.3.0-verification

benchmarks: v3.3.0 reproduction results + Ollama rerank backend
This commit is contained in:
Igor Lins e Silva
2026-04-14 22:06:30 -03:00
committed by GitHub
11 changed files with 331421 additions and 65 deletions
+508
View File
@@ -0,0 +1,508 @@
{
"dev": [
"cc06de0d",
"f9e8c073",
"b320f3f8",
"a89d7624",
"311778f1",
"gpt4_59c863d7",
"bbf86515",
"099778bb",
"e831120c",
"dcfa8644",
"8fb83627",
"e66b632c",
"gpt4_7fce9456",
"55241a1f",
"352ab8bd",
"f4f1d8a4",
"830ce83f",
"2311e44b",
"09ba9854",
"gpt4_a1b77f9c",
"07741c45",
"gpt4_70e84552",
"b46e15ee",
"6071bd76",
"6f9b354f",
"1d4da289",
"gpt4_8279ba02",
"6456829e_abs",
"0db4c65d",
"d6062bb9",
"60bf93ed_abs",
"d3ab962e",
"87f22b4a",
"e01b8e2f",
"gpt4_7ddcf75f",
"8ebdbe50",
"26bdc477",
"29f2956b_abs",
"2311e44b_abs",
"75f70248",
"852ce960",
"f0e564bc",
"fca70973",
"3c1045c8",
"18bc8abd",
"afdc33df",
"54026fce",
"b9cfe692",
"6456829e",
"e6041065"
],
"held_out": [
"gpt4_15e38248",
"gpt4_2ba83207",
"2133c1b5_abs",
"gpt4_8279ba03",
"76d63226",
"1192316e",
"gpt4_fa19884d",
"gpt4_372c3eed_abs",
"1a8a66a6",
"gpt4_fe651585",
"e25c3b8d",
"945e3d21",
"86b68151",
"1c0ddc50",
"1e043500",
"d682f1a2",
"gpt4_b5700ca0",
"91b15a6e",
"ce6d2d27",
"f523d9fe",
"7024f17c",
"8752c811",
"gpt4_f420262d",
"d01c6aa8",
"4b24c848",
"7e974930",
"3fdac837",
"gpt4_b4a80587",
"c18a7dc8",
"80ec1f4f_abs",
"7527f7e2",
"6ade9755",
"89941a94",
"gpt4_1d80365e",
"2133c1b5",
"06db6396",
"gpt4_88806d6e",
"88432d0a",
"3ba21379",
"0862e8bf",
"aae3761f",
"5025383b",
"gpt4_e061b84f",
"73d42213",
"4bc144e2",
"gpt4_5501fe77",
"00ca467f",
"dfde3500",
"01493427",
"b6025781",
"a96c20ee_abs",
"982b5123_abs",
"gpt4_fa19884c",
"gpt4_1a1dc16d",
"28dc39ac",
"gpt4_2d58bcd6",
"51c32626",
"c4ea545c",
"1da05512",
"gpt4_385a5000",
"577d4d32",
"72e3ee87",
"f4f1d8a4_abs",
"9d25d4e0",
"b29f3365",
"b759caee",
"10e09553",
"1d4e3b97",
"d52b4f67",
"gpt4_e072b769",
"58ef2f1c",
"6e984301",
"41275add",
"gpt4_59149c77",
"2ebe6c90",
"1cea1afa",
"gpt4_1e4a8aec",
"6c49646a",
"8a2466db",
"gpt4_65aabe59",
"gpt4_93159ced",
"51a45a95",
"af8d2e46",
"561fabcd",
"370a8ff4",
"gpt4_d84a3211",
"gpt4_7a0daae1",
"2a1811e2",
"gpt4_78cf46a3",
"1568498a",
"6b7dfb22",
"6ae235be",
"bc8a6e93_abs",
"681a1674",
"06878be2",
"1a1907b4",
"0e4e4c46",
"gpt4_85da3956",
"gpt4_f420262c",
"2bf43736",
"bc149d6b",
"09d032c9",
"5c40ec5b",
"eac54adc",
"993da5e2",
"71a3fd6b",
"gpt4_0b2f1d21",
"ad7109d1",
"4c36ccef",
"c8c3f81d",
"edced276_abs",
"0bc8ad92",
"gpt4_468eb064",
"2ebe6c92",
"cc6d1ec1",
"4dfccbf8",
"95228167",
"ba358f49",
"45dc21b6",
"db467c8c",
"720133ac",
"67e0d0f2",
"cc5ded98",
"726462e0",
"4100d0a0",
"3a704032",
"gpt4_7ca326fa",
"ec81a493",
"618f13b2",
"58470ed2",
"gpt4_4fc4f797",
"60036106",
"157a136e",
"6222b6eb",
"69fee5aa",
"19b5f2b3_abs",
"gpt4_d12ceb0e",
"51b23612",
"2318644b",
"3fe836c9",
"gpt4_7de946e7",
"71017277",
"f0853d11",
"dc439ea3",
"gpt4_2f91af09",
"9a707b81",
"bc8a6e93",
"c14c00dd",
"8979f9ec",
"cf22b7bf",
"gpt4_ec93e27f",
"gpt4_468eb063",
"41698283",
"1de5cff2",
"21d02d0d",
"c7cf7dfd",
"gpt4_ab202e7f",
"dccbc061",
"078150f1",
"e3038f8c",
"gpt4_c27434e8_abs",
"2698e78f",
"031748ae_abs",
"gpt4_59149c78",
"c8f1aeed",
"184da446",
"gpt4_b5700ca9",
"89527b6b",
"0977f2af",
"853b0a1d",
"a346bb18",
"3249768e",
"gpt4_2f8be40d",
"gpt4_93159ced_abs",
"eeda8a6d",
"7a8d0b71",
"95bcc1c8",
"gpt4_2487a7cb",
"85fa3a3f",
"7e00a6cb",
"e3fc4d6e",
"59524333",
"37f165cf",
"0ddfec37",
"60bf93ed",
"d7c942c3",
"80ec1f4f",
"ceb54acb",
"9aaed6a3",
"gpt4_4929293a",
"ed4ddc30",
"545bd2b5",
"2788b940",
"ef9cf60a",
"gpt4_7f6b06db",
"0ea62687",
"3d86fd0a",
"3e321797",
"d24813b1",
"38146c39",
"efc3f7c2",
"7401057b",
"5809eb10",
"28bcfaac",
"1903aded",
"gpt4_194be4b3",
"gpt4_e414231f",
"0ddfec37_abs",
"c2ac3c61",
"gpt4_4ef30696",
"1f2b8d4f",
"0f05491a",
"8550ddae",
"8077ef71",
"b86304ba",
"e61a7584",
"8cf51dda",
"gpt4_2f584639",
"08e075c7",
"5d3d2817",
"7405e8b1",
"a3045048",
"gpt4_731e37d7",
"c8090214_abs",
"36580ce8",
"ba358f49_abs",
"gpt4_d6585ce8",
"e56a43b9",
"2c63a862",
"gpt4_5438fa52",
"07b6f563",
"gpt4_31ff4165",
"0bb5a684",
"71315a70",
"gpt4_cd90e484",
"gpt4_8c8961ae",
"gpt4_fe651585_abs",
"36b9f61e",
"gpt4_b0863698",
"gpt4_1d4ab0c9",
"15745da0_abs",
"0862e8bf_abs",
"bcbe585f",
"a2f3aa27",
"gpt4_6dc9b45b",
"ccb36322",
"f685340e",
"9ea5eabc",
"gpt4_372c3eed",
"37d43f65",
"bf659f65",
"b0479f84",
"gpt4_213fd887",
"e4e14d04",
"f8c5f88b",
"gpt4_18c2b244",
"a11281a2",
"gpt4_2655b836",
"e47becba",
"gpt4_74aed68e",
"gpt4_af6db32f",
"6cb6f249",
"77eafa52",
"gpt4_93f6379c",
"e8a79c70",
"7a87bd0c",
"gpt4_6ed717ea",
"d6233ab6",
"c19f7a0b",
"gpt4_61e13b3c",
"d23cf73b",
"gpt4_1e4a8aeb",
"ba61f0b9",
"118b2229",
"488d3006",
"c4a1ceb8",
"8e91e7d9",
"42ec0761",
"65240037",
"fea54f57",
"c8090214",
"b01defab",
"6aeb4375_abs",
"faba32e5",
"c5e8278d",
"gpt4_e414231e",
"eeda8a6d_abs",
"gpt4_8e165409",
"af082822",
"22d2cb42",
"92a0aa75",
"1c549ce4",
"25e5aa4f",
"gpt4_68e94288",
"4baee567",
"18dcd5a5",
"dad224aa",
"gpt4_f2262a51",
"29f2956b",
"21436231",
"19b5f2b3",
"gpt4_1916e0ea",
"gpt4_45189cb4",
"0a995998",
"b6019101",
"9bbe84a2",
"61f8c8f8",
"9a707b82",
"8cf4d046",
"eac54add",
"75832dbd",
"gpt4_98f46fc6",
"d596882b",
"88432d0a_abs",
"16c90bf4",
"f685340e_abs",
"b5ef892d",
"gpt4_f49edff3",
"gpt4_483dd43c",
"bb7c3b45",
"gpt4_7abb270c",
"gpt4_9a159967",
"07741c44",
"4d6b87c8",
"6aeb4375",
"gpt4_d6585ce9",
"60472f9c",
"caf9ead2",
"32260d93",
"60159905",
"0a34ad58",
"a40e080f",
"10d9b85a",
"a06e4cfe",
"4f54b7c9",
"6613b389",
"70b3e69b",
"gpt4_7bc6cf22",
"gpt4_0a05b494",
"778164c6",
"195a1a1b",
"8464fc84",
"b46e15ed",
"603deb26",
"eaca4986",
"2698e78f_abs",
"gpt4_21adecb5",
"2e6d26dc",
"5831f84d",
"08f4fc43",
"3f1e9474",
"c9f37c46",
"gpt4_2f56ae70",
"1b9b7252",
"35a27287",
"gpt4_d31cdae3",
"129d1232",
"4adc0475",
"27016adc",
"46a3abf7",
"9ee3ecd6",
"982b5123",
"09ba9854_abs",
"0e5e2d1a",
"e9327a54",
"86f00804",
"e982271f",
"7161e7e2",
"57f827a0",
"6a27ffc2",
"edced276",
"gpt4_d9af6064",
"75499fd8",
"60d45044",
"gpt4_70e84552_abs",
"2ce6a0f2",
"gpt4_4929293b",
"a1cc6108",
"gpt4_5dcc0aab",
"a3838d2b",
"c7dc5443",
"505af2f5",
"gpt4_68e94287",
"15745da0",
"0100672e",
"a82c026e",
"5e1b23de",
"71017276",
"89941a93",
"6b168ec8",
"affe2881",
"0edc2aef",
"gpt4_2312f94c",
"a4996e51",
"c6853660",
"ef66a6e5",
"8a137a7f",
"a96c20ee",
"fca762bc",
"ac031881",
"d905b33f",
"e493bb7c",
"a9f6b44c",
"dd2973ad",
"8aef76bc",
"f35224e0",
"8b9d4367",
"gpt4_c27434e8",
"gpt4_a56e767c",
"eace081b",
"5a4f22c0",
"58bf7951",
"c4f10528",
"50635ada",
"06f04340",
"0bc8ad93",
"e5ba910e_abs",
"5a7937c8",
"a3332713",
"4388e9dd",
"8c18457d",
"gpt4_2c50253f",
"6a1eabeb",
"b3c15d39",
"gpt4_e061b84g",
"3b6f954b",
"gpt4_76048e76",
"4dfccbf7",
"2b8f3739",
"d851d5ba",
"4fd1909e",
"94f70d80",
"66f24dbb",
"a08a253f",
"6e984302",
"001be529",
"gpt4_a2d1d1f6",
"cc539528",
"e48988bc",
"gpt4_4cd9eba1",
"8e9d538c",
"a1eacc2a",
"6d550036",
"gpt4_e05b82a6",
"81507db6",
"caf03d32",
"031748ae",
"c960da58",
"1faac195",
"gpt4_4edbafa2"
],
"seed": 42,
"dev_size": 50
}
+69 -23
View File
@@ -510,11 +510,20 @@ def palace_assign_rooms(sessions, sample_id, api_key, cache, model="claude-haiku
def llm_rerank_locomo(
question, retrieved_ids, retrieved_docs, api_key, top_k=10, model="claude-sonnet-4-6"
question,
retrieved_ids,
retrieved_docs,
api_key,
top_k=10,
model="claude-sonnet-4-6",
backend="anthropic",
base_url="",
):
"""
Ask LLM to pick the single most relevant document for this question.
Returns reordered retrieved_ids with the best candidate first.
Supports backend="anthropic" (default) or "ollama" (OpenAI-compat endpoint).
"""
candidates = retrieved_ids[:top_k]
candidate_docs = retrieved_docs[:top_k]
@@ -522,7 +531,6 @@ def llm_rerank_locomo(
if len(candidates) <= 1:
return retrieved_ids
# Build numbered list of candidates
lines = []
for i, (cid, doc) in enumerate(zip(candidates, candidate_docs), 1):
snippet = doc[:300].replace("\n", " ")
@@ -534,35 +542,51 @@ def llm_rerank_locomo(
f"Reply with just the number (1-{len(candidates)}).\n\n" + "\n".join(lines)
)
payload = json.dumps(
{
"model": model,
"max_tokens": 8,
"messages": [{"role": "user", "content": prompt}],
}
).encode("utf-8")
req = urllib.request.Request(
"https://api.anthropic.com/v1/messages",
data=payload,
headers={
if backend == "ollama":
url = (base_url or "http://localhost:11434").rstrip("/") + "/v1/chat/completions"
payload = json.dumps(
{
"model": model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 1024,
"temperature": 0.0,
}
).encode("utf-8")
headers = {"content-type": "application/json"}
if api_key:
headers["authorization"] = f"Bearer {api_key}"
else:
url = "https://api.anthropic.com/v1/messages"
payload = json.dumps(
{
"model": model,
"max_tokens": 8,
"messages": [{"role": "user", "content": prompt}],
}
).encode("utf-8")
headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
method="POST",
)
}
req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
import socket as _socket
for _attempt in range(3):
try:
with urllib.request.urlopen(req, timeout=30) as resp:
with urllib.request.urlopen(req, timeout=120 if backend == "ollama" else 30) as resp:
result = json.loads(resp.read())
raw = result["content"][0]["text"].strip()
m = re.search(r"\b(\d+)\b", raw)
if backend == "ollama":
msg = result["choices"][0]["message"]
raw = (msg.get("content") or "").strip() or (msg.get("reasoning") or "").strip()
else:
raw = result["content"][0]["text"].strip()
# Take LAST integer — reasoning models often count candidates first
m = re.search(r"\b(\d+)\b", raw[::-1])
if m:
pick = int(m.group(1))
pick = int(m.group(1)[::-1])
if 1 <= pick <= len(candidates):
chosen_id = candidates[pick - 1]
reordered = [chosen_id] + [cid for cid in retrieved_ids if cid != chosen_id]
@@ -608,6 +632,8 @@ def run_benchmark(
palace_cache_file=None,
palace_model="claude-haiku-4-5-20251001",
embed_model="default",
llm_backend="anthropic",
llm_base_url="",
):
"""Run LoCoMo retrieval benchmark."""
with open(data_file) as f:
@@ -619,8 +645,12 @@ def run_benchmark(
api_key = ""
if llm_rerank_enabled or mode == "palace":
api_key = _load_api_key(llm_key)
if not api_key:
print(f"ERROR: --mode {mode} requires an API key (--llm-key or ANTHROPIC_API_KEY).")
# Ollama backend doesn't require an Anthropic key. Palace mode still does
# (it uses Anthropic for room-assignment indexing) — so only relax the
# requirement when rerank is the ONLY llm use and backend is ollama.
needs_key = mode == "palace" or (llm_rerank_enabled and llm_backend == "anthropic")
if needs_key and not api_key:
print(f"ERROR: --mode {mode} / --llm-rerank (anthropic) requires an API key.")
sys.exit(1)
# Palace mode: load or create room assignment cache
@@ -888,6 +918,8 @@ def run_benchmark(
api_key,
top_k=rerank_pool,
model=llm_model,
backend=llm_backend,
base_url=llm_base_url,
)
# Compute recall
@@ -1013,6 +1045,18 @@ if __name__ == "__main__":
help="Model for LLM rerank (default: claude-sonnet-4-6)",
)
parser.add_argument("--llm-key", default="", help="API key (or set ANTHROPIC_API_KEY env var)")
parser.add_argument(
"--llm-backend",
choices=["anthropic", "ollama"],
default="anthropic",
help="Which API for --llm-rerank. 'anthropic' (default) or 'ollama' "
"(OpenAI-compat /v1/chat/completions — works for local + Ollama Cloud).",
)
parser.add_argument(
"--llm-base-url",
default="",
help="Override base URL for --llm-backend ollama. Default: http://localhost:11434.",
)
parser.add_argument(
"--hybrid-weight",
type=float,
@@ -1049,4 +1093,6 @@ if __name__ == "__main__":
palace_cache_file=args.palace_cache,
palace_model=args.palace_model,
embed_model=args.embed_model,
llm_backend=args.llm_backend,
llm_base_url=args.llm_base_url,
)
+101 -42
View File
@@ -2763,7 +2763,15 @@ def build_palace_and_retrieve_diary(
def llm_rerank(
question, rankings, corpus, corpus_ids, api_key, top_k=10, model="claude-haiku-4-5-20251001"
question,
rankings,
corpus,
corpus_ids,
api_key,
top_k=10,
model="claude-haiku-4-5-20251001",
backend="anthropic",
base_url="",
):
"""
Use an LLM to re-rank the top-k retrieved sessions.
@@ -2772,19 +2780,22 @@ def llm_rerank(
which single session is most relevant to the question. That session
is promoted to rank 1; the rest stay in their existing order.
This closes the gap for "preference" and jargon-dense "assistant"
failures where the right session is in top-10 semantically but not
top-5 — because the semantic gap (battery life ↔ phone hardware) is
too large for embeddings to bridge.
Supports two backends:
- "anthropic": hits https://api.anthropic.com/v1/messages with x-api-key.
- "ollama": hits {base_url}/v1/chat/completions (OpenAI-compat) —
works for local Ollama (default http://localhost:11434)
and Ollama Cloud (:cloud model tags).
Args:
question: The benchmark question string
rankings: Current ranked list of corpus indices (from any mode)
corpus: List of document strings
corpus_ids: List of corpus IDs (parallel to corpus)
api_key: Anthropic API key string
top_k: How many top sessions to send to LLM (default: 10)
model: Claude model ID for reranking (default: haiku)
question: The benchmark question string
rankings: Current ranked list of corpus indices (from any mode)
corpus: List of document strings
corpus_ids: List of corpus IDs (parallel to corpus)
api_key: Anthropic API key (only required for backend="anthropic")
top_k: How many top sessions to send to LLM (default: 10)
model: Model id (Claude model for anthropic, e.g. "minimax-m2.7:cloud" for ollama)
backend: "anthropic" or "ollama"
base_url: Override base URL (ollama default: http://localhost:11434)
Returns:
Reordered rankings list with LLM's best pick promoted to rank 1.
@@ -2796,7 +2807,6 @@ def llm_rerank(
if not candidates:
return rankings
# Format sessions for the prompt — first 500 chars each, labelled 1..N
session_blocks = []
for rank, idx in enumerate(candidates):
text = corpus[idx][:500].replace("\n", " ").strip()
@@ -2813,49 +2823,68 @@ def llm_rerank(
f"Most relevant session number:"
)
payload = json.dumps(
{
"model": model,
"max_tokens": 8,
"messages": [{"role": "user", "content": prompt}],
}
).encode("utf-8")
req = urllib.request.Request(
"https://api.anthropic.com/v1/messages",
data=payload,
headers={
if backend == "ollama":
url = (base_url or "http://localhost:11434").rstrip("/") + "/v1/chat/completions"
payload = json.dumps(
{
"model": model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 1024,
"temperature": 0.0,
}
).encode("utf-8")
headers = {"content-type": "application/json"}
if api_key:
headers["authorization"] = f"Bearer {api_key}"
else:
url = "https://api.anthropic.com/v1/messages"
payload = json.dumps(
{
"model": model,
"max_tokens": 8,
"messages": [{"role": "user", "content": prompt}],
}
).encode("utf-8")
headers = {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
method="POST",
)
}
req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
import socket as _socket
for _attempt in range(3):
try:
with urllib.request.urlopen(req, timeout=20) as resp:
with urllib.request.urlopen(req, timeout=120 if backend == "ollama" else 20) as resp:
result = json.loads(resp.read())
raw = result["content"][0]["text"].strip()
# Parse just the first integer from Haiku's response
m = re.search(r"\b(\d+)\b", raw)
if backend == "ollama":
msg = result["choices"][0]["message"]
# Reasoning models (e.g. minimax-m2.7) may emit final answer in "content"
# or embed it in "reasoning". Try content first, fall back to reasoning.
raw = (msg.get("content") or "").strip()
if not raw:
raw = (msg.get("reasoning") or "").strip()
else:
raw = result["content"][0]["text"].strip()
m = re.search(
r"\b(\d+)\b", raw[::-1]
) # take LAST integer (rerank models often reason first)
if m:
pick = int(m.group(1))
pick = int(m.group(1)[::-1])
if 1 <= pick <= len(candidates):
chosen_idx = candidates[pick - 1]
reordered = [chosen_idx] + [i for i in rankings if i != chosen_idx]
return reordered
break # Got a response, even if unparseable — don't retry
break
except (_socket.timeout, TimeoutError):
if _attempt < 2:
import time as _time
_time.sleep(3) # brief pause then retry
# else fall through to return rankings
_time.sleep(3)
except (urllib.error.URLError, KeyError, ValueError, IndexError, OSError):
break # Non-timeout error — fall back immediately
break
return rankings
@@ -2919,6 +2948,8 @@ def run_benchmark(
skip_precompute=False,
split_file=None,
split_subset=None,
llm_backend="anthropic",
llm_base_url="",
):
"""Run the full benchmark.
@@ -2947,10 +2978,14 @@ def run_benchmark(
api_key = ""
if llm_rerank_enabled or mode == "diary":
api_key = _load_api_key(llm_key)
if not api_key:
# Ollama backend doesn't require an Anthropic API key; a local/cloud Ollama
# daemon with the requested model pulled is enough. Diary mode is always anthropic.
needs_key = (llm_backend == "anthropic") or (mode == "diary")
if needs_key and not api_key:
print(
"ERROR: --llm-rerank / --mode diary requires an API key. "
"Set ANTHROPIC_API_KEY or use --llm-key."
"ERROR: --llm-rerank (anthropic backend) / --mode diary requires an API key. "
"Set ANTHROPIC_API_KEY or use --llm-key. For ollama backend, pass "
"--llm-backend ollama."
)
sys.exit(1)
@@ -3100,7 +3135,15 @@ def run_benchmark(
if llm_rerank_enabled:
rerank_pool = 20 if mode in ("hybrid_v3", "hybrid_v4", "palace") else 10
rankings = llm_rerank(
question, rankings, corpus, corpus_ids, api_key, top_k=rerank_pool, model=llm_model
question,
rankings,
corpus,
corpus_ids,
api_key,
top_k=rerank_pool,
model=llm_model,
backend=llm_backend,
base_url=llm_base_url,
)
# Evaluate at session level
@@ -3276,7 +3319,21 @@ if __name__ == "__main__":
default="claude-haiku-4-5-20251001",
help="Model for LLM re-ranking and diary ingest "
"(default: claude-haiku-4-5-20251001). "
"Use 'claude-sonnet-4-6' for Sonnet comparison.",
"Use 'claude-sonnet-4-6' for Sonnet comparison. "
"With --llm-backend ollama, use an Ollama model tag like 'minimax-m2.7:cloud'.",
)
parser.add_argument(
"--llm-backend",
choices=["anthropic", "ollama"],
default="anthropic",
help="Which API to hit for --llm-rerank. 'anthropic' (default) uses Anthropic's "
"/v1/messages endpoint. 'ollama' uses Ollama's OpenAI-compatible "
"/v1/chat/completions endpoint (works with local Ollama and Ollama Cloud).",
)
parser.add_argument(
"--llm-base-url",
default="",
help="Override base URL for --llm-backend ollama. Defaults to http://localhost:11434.",
)
parser.add_argument(
"--diary-cache",
@@ -3380,4 +3437,6 @@ if __name__ == "__main__":
args.skip_precompute,
split_file=args.split_file,
split_subset=split_subset,
llm_backend=args.llm_backend,
llm_base_url=args.llm_base_url,
)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long