From 2f30ac4e0588e7b87b0a9309827b2e254959a024 Mon Sep 17 00:00:00 2001 From: JohannesBOT Date: Sat, 6 Jun 2026 20:18:11 +0200 Subject: [PATCH] matches double key fix --- src/MatchesCache.py | 105 +++++++++++++++++++++++++++++++++++++------ src/MatchesWebApp.py | 20 ++++++--- 2 files changed, 105 insertions(+), 20 deletions(-) diff --git a/src/MatchesCache.py b/src/MatchesCache.py index b225ec4..23afa1e 100644 --- a/src/MatchesCache.py +++ b/src/MatchesCache.py @@ -2,14 +2,15 @@ matches_cache.py ================ -Persistent JSON cache that maps a Suwayomi/series search title to the +Persistent JSON cache that maps a normalised (lowercase) search title to the MangaBaka series it was matched against. Structure on disk:: { "matches": { - "": { + "": { + "folderTitle": "Original Folder Name", "mangabakaId": "12345", "mangabakaName": "One-Punch Man", "imageUrl": "https://.../cover.jpg", @@ -19,6 +20,11 @@ Structure on disk:: } } +Keys are always stored lowercase so that folder names differing only in +capitalisation (e.g. "[Oshi No Ko]" vs "[oshi no ko]") are treated as +identical entries. The original casing is preserved in the ``folderTitle`` +field and is used for display purposes (e.g. the web UI title link). + The cache is consulted by ComicInfoBuilder before issuing a MangaBaka search request, and is written back to disk on every mutation so a crash does not lose matches that were resolved in the current run. @@ -32,6 +38,11 @@ import time from pathlib import Path +def _norm_key(title: str) -> str: + """Normalises a cache key to lowercase for case-insensitive deduplication.""" + return title.lower() + + class MatchesCache: def __init__(self, path): self._path = Path(path) @@ -44,7 +55,7 @@ class MatchesCache: # ------------------------------------------------------------------ def get(self, title: str) -> "dict | None": with self._lock: - entry = self._data["matches"].get(title) + entry = self._data["matches"].get(_norm_key(title)) return dict(entry) if entry else None def add(self, title: str, *, @@ -52,13 +63,14 @@ class MatchesCache: mangabaka_name: str, image_url: "str | None") -> dict: entry = { + "folderTitle": title, "mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "", "mangabakaName": mangabaka_name or "", "imageUrl": image_url or "", "firstMatchTime": int(time.time()), } with self._lock: - self._data["matches"][title] = entry + self._data["matches"][_norm_key(title)] = entry self._save_unlocked() return dict(entry) @@ -67,16 +79,19 @@ class MatchesCache: mangabaka_name=None, image_url=None, first_match_time=None) -> dict: + norm = _norm_key(title) with self._lock: - entry = self._data["matches"].get(title) + entry = self._data["matches"].get(norm) if entry is None: entry = { + "folderTitle": title, "mangabakaId": "", "mangabakaName": "", "imageUrl": "", "firstMatchTime": int(time.time()), } - self._data["matches"][title] = entry + self._data["matches"][norm] = entry + # folderTitle is only set on creation; preserve original casing on updates. if mangabaka_id is not None: entry["mangabakaId"] = str(mangabaka_id) if mangabaka_name is not None: @@ -92,21 +107,25 @@ class MatchesCache: return dict(entry) def rename(self, old_title: str, new_title: str) -> bool: - if not new_title or old_title == new_title: + old_norm = _norm_key(old_title) + new_norm = _norm_key(new_title) + if not new_title or old_norm == new_norm: return False with self._lock: - entry = self._data["matches"].pop(old_title, None) + entry = self._data["matches"].pop(old_norm, None) if entry is None: return False - self._data["matches"][new_title] = entry + entry["folderTitle"] = new_title + self._data["matches"][new_norm] = entry self._save_unlocked() return True def remove(self, title: str) -> bool: + norm = _norm_key(title) with self._lock: - existed = title in self._data["matches"] + existed = norm in self._data["matches"] if existed: - del self._data["matches"][title] + del self._data["matches"][norm] self._save_unlocked() return existed @@ -128,8 +147,68 @@ class MatchesCache: print(f"[MatchesCache] failed to load {self._path}: {exc}", flush=True) return - if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict): - self._data = loaded + if not isinstance(loaded, dict) or not isinstance(loaded.get("matches"), dict): + return + + normalized, changed = self._normalize_on_load(loaded["matches"]) + loaded["matches"] = normalized + self._data = loaded + if changed: + print(f"[MatchesCache] migrated {changed} entr{'y' if changed == 1 else 'ies'} " + f"(lowercase keys / folderTitle), saving", flush=True) + self._save_unlocked() + + @staticmethod + def _normalize_on_load(raw: dict) -> "tuple[dict, int]": + """ + Normalises the raw matches dict loaded from disk. + + - Keys are lowercased. + - ``folderTitle`` is added from the original key when missing. + - Duplicate keys (same normalised form) are merged by keeping the + entry with the higher ``firstMatchTime``. + + Returns (normalised_dict, number_of_changed_entries). + """ + result: dict = {} + changed = 0 + + for orig_key, entry in raw.items(): + if not isinstance(entry, dict): + continue + norm = _norm_key(orig_key) + entry = dict(entry) + + # Add folderTitle if absent + if "folderTitle" not in entry: + entry["folderTitle"] = orig_key + changed += 1 + + if norm != orig_key: + changed += 1 + + # Merge duplicates: keep data from the more recent entry, but + # prefer the folderTitle that contains uppercase letters (= the + # original folder name) regardless of which entry is newer. + if norm in result: + existing = result[norm] + if entry.get("firstMatchTime", 0) > existing.get("firstMatchTime", 0): + # Newer entry wins for data; preserve better-cased folderTitle + existing_ft = existing.get("folderTitle", norm) + new_ft = entry.get("folderTitle", norm) + if existing_ft != existing_ft.lower() and new_ft == new_ft.lower(): + entry["folderTitle"] = existing_ft + result[norm] = entry + else: + # Existing entry stays; but adopt new folderTitle if it has casing + existing_ft = existing.get("folderTitle", norm) + new_ft = entry.get("folderTitle", norm) + if new_ft != new_ft.lower() and existing_ft == existing_ft.lower(): + existing["folderTitle"] = new_ft + else: + result[norm] = entry + + return result, changed def _save_unlocked(self) -> None: self._path.parent.mkdir(parents=True, exist_ok=True) diff --git a/src/MatchesWebApp.py b/src/MatchesWebApp.py index 5ed47f7..b4734c3 100644 --- a/src/MatchesWebApp.py +++ b/src/MatchesWebApp.py @@ -115,15 +115,17 @@ function updateDirtyCount() { function makeRow(title, e) { const tr = document.createElement("tr"); tr.dataset.title = title; + const displayTitle = e.folderTitle || title; + tr.dataset.folderTitle = displayTitle; - // Title — link only, not editable + // Title — link only, not editable; shows folderTitle (original casing) const titleTd = document.createElement("td"); titleTd.className = "title"; const titleLink = document.createElement("a"); - titleLink.href = searchUrl(title); + titleLink.href = searchUrl(displayTitle); titleLink.target = "_blank"; titleLink.rel = "noopener"; - titleLink.textContent = title; + titleLink.textContent = displayTitle; titleTd.appendChild(titleLink); tr.appendChild(titleTd); @@ -186,7 +188,7 @@ function makeRow(title, e) { async function saveRow(tr) { const title = tr.dataset.title; const newId = tr._idInp.value.trim(); - setStatus("Saving " + title + "…"); + setStatus("Saving " + (tr.dataset.folderTitle || title) + "…"); try { const r = await fetch("/api/matches", { method: "POST", @@ -203,7 +205,7 @@ async function saveRow(tr) { tr._img.src = entry.imageUrl || ""; tr.classList.remove("dirty"); updateDirtyCount(); - setStatus("Saved " + title); + setStatus("Saved " + (tr.dataset.folderTitle || title)); return true; } catch (err) { setStatus("Save failed (" + title + "): " + err.message); @@ -250,7 +252,11 @@ function sortedTitles() { const titles = Object.keys(matchesData); const dir = currentSort.asc ? 1 : -1; if (currentSort.col === "title") { - return titles.sort((a, b) => a.localeCompare(b) * dir); + return titles.sort((a, b) => { + const fa = (matchesData[a].folderTitle || a).toLowerCase(); + const fb = (matchesData[b].folderTitle || b).toLowerCase(); + return fa.localeCompare(fb) * dir; + }); } if (currentSort.col === "firstMatchTime") { return titles.sort((a, b) => { @@ -298,7 +304,7 @@ async function load() { function applyFilter() { const q = document.getElementById("filter").value.toLowerCase(); for (const tr of document.querySelectorAll("#rows tr")) { - const t = tr.dataset.title.toLowerCase(); + const t = (tr.dataset.folderTitle || tr.dataset.title).toLowerCase(); tr.style.display = t.includes(q) ? "" : "none"; } }