matches double key fix

2026-06-06 20:18:11 +02:00
parent 97e4b10ac8
commit 2f30ac4e05
2 changed files with 105 additions and 20 deletions
@@ -2,14 +2,15 @@
 matches_cache.py
 ================

-Persistent JSON cache that maps a Suwayomi/series search title to the
+Persistent JSON cache that maps a normalised (lowercase) search title to the
 MangaBaka series it was matched against.

 Structure on disk::

    {
      "matches": {
-        "<search title>": {
+        "<normalised lowercase key>": {
+          "folderTitle":    "Original Folder Name",
          "mangabakaId":    "12345",
          "mangabakaName":  "One-Punch Man",
          "imageUrl":       "https://.../cover.jpg",
@@ -19,6 +20,11 @@ Structure on disk::
      }
    }

+Keys are always stored lowercase so that folder names differing only in
+capitalisation (e.g. "[Oshi No Ko]" vs "[oshi no ko]") are treated as
+identical entries.  The original casing is preserved in the ``folderTitle``
+field and is used for display purposes (e.g. the web UI title link).
+
 The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
 search request, and is written back to disk on every mutation so a crash
 does not lose matches that were resolved in the current run.
@@ -32,6 +38,11 @@ import time
 from pathlib import Path


+def _norm_key(title: str) -> str:
+    """Normalises a cache key to lowercase for case-insensitive deduplication."""
+    return title.lower()
+
+
 class MatchesCache:
    def __init__(self, path):
        self._path = Path(path)
@@ -44,7 +55,7 @@ class MatchesCache:
    # ------------------------------------------------------------------
    def get(self, title: str) -> "dict | None":
        with self._lock:
-            entry = self._data["matches"].get(title)
+            entry = self._data["matches"].get(_norm_key(title))
            return dict(entry) if entry else None

    def add(self, title: str, *,
@@ -52,13 +63,14 @@ class MatchesCache:
            mangabaka_name: str,
            image_url: "str | None") -> dict:
        entry = {
+            "folderTitle":    title,
            "mangabakaId":    str(mangabaka_id) if mangabaka_id is not None else "",
            "mangabakaName":  mangabaka_name or "",
            "imageUrl":       image_url or "",
            "firstMatchTime": int(time.time()),
        }
        with self._lock:
-            self._data["matches"][title] = entry
+            self._data["matches"][_norm_key(title)] = entry
            self._save_unlocked()
        return dict(entry)

@@ -67,16 +79,19 @@ class MatchesCache:
               mangabaka_name=None,
               image_url=None,
               first_match_time=None) -> dict:
+        norm = _norm_key(title)
        with self._lock:
-            entry = self._data["matches"].get(title)
+            entry = self._data["matches"].get(norm)
            if entry is None:
                entry = {
+                    "folderTitle":    title,
                    "mangabakaId":    "",
                    "mangabakaName":  "",
                    "imageUrl":       "",
                    "firstMatchTime": int(time.time()),
                }
-                self._data["matches"][title] = entry
+                self._data["matches"][norm] = entry
+            # folderTitle is only set on creation; preserve original casing on updates.
            if mangabaka_id is not None:
                entry["mangabakaId"] = str(mangabaka_id)
            if mangabaka_name is not None:
@@ -92,21 +107,25 @@ class MatchesCache:
            return dict(entry)

    def rename(self, old_title: str, new_title: str) -> bool:
-        if not new_title or old_title == new_title:
+        old_norm = _norm_key(old_title)
+        new_norm = _norm_key(new_title)
+        if not new_title or old_norm == new_norm:
            return False
        with self._lock:
-            entry = self._data["matches"].pop(old_title, None)
+            entry = self._data["matches"].pop(old_norm, None)
            if entry is None:
                return False
-            self._data["matches"][new_title] = entry
+            entry["folderTitle"] = new_title
+            self._data["matches"][new_norm] = entry
            self._save_unlocked()
            return True

    def remove(self, title: str) -> bool:
+        norm = _norm_key(title)
        with self._lock:
-            existed = title in self._data["matches"]
+            existed = norm in self._data["matches"]
            if existed:
-                del self._data["matches"][title]
+                del self._data["matches"][norm]
                self._save_unlocked()
            return existed

@@ -128,8 +147,68 @@ class MatchesCache:
            print(f"[MatchesCache] failed to load {self._path}: {exc}",
                  flush=True)
            return
-        if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
+        if not isinstance(loaded, dict) or not isinstance(loaded.get("matches"), dict):
+            return
+
+        normalized, changed = self._normalize_on_load(loaded["matches"])
+        loaded["matches"] = normalized
        self._data = loaded
+        if changed:
+            print(f"[MatchesCache] migrated {changed} entr{'y' if changed == 1 else 'ies'} "
+                  f"(lowercase keys / folderTitle), saving", flush=True)
+            self._save_unlocked()
+
+    @staticmethod
+    def _normalize_on_load(raw: dict) -> "tuple[dict, int]":
+        """
+        Normalises the raw matches dict loaded from disk.
+
+        - Keys are lowercased.
+        - ``folderTitle`` is added from the original key when missing.
+        - Duplicate keys (same normalised form) are merged by keeping the
+          entry with the higher ``firstMatchTime``.
+
+        Returns (normalised_dict, number_of_changed_entries).
+        """
+        result: dict = {}
+        changed = 0
+
+        for orig_key, entry in raw.items():
+            if not isinstance(entry, dict):
+                continue
+            norm = _norm_key(orig_key)
+            entry = dict(entry)
+
+            # Add folderTitle if absent
+            if "folderTitle" not in entry:
+                entry["folderTitle"] = orig_key
+                changed += 1
+
+            if norm != orig_key:
+                changed += 1
+
+            # Merge duplicates: keep data from the more recent entry, but
+            # prefer the folderTitle that contains uppercase letters (= the
+            # original folder name) regardless of which entry is newer.
+            if norm in result:
+                existing = result[norm]
+                if entry.get("firstMatchTime", 0) > existing.get("firstMatchTime", 0):
+                    # Newer entry wins for data; preserve better-cased folderTitle
+                    existing_ft = existing.get("folderTitle", norm)
+                    new_ft      = entry.get("folderTitle", norm)
+                    if existing_ft != existing_ft.lower() and new_ft == new_ft.lower():
+                        entry["folderTitle"] = existing_ft
+                    result[norm] = entry
+                else:
+                    # Existing entry stays; but adopt new folderTitle if it has casing
+                    existing_ft = existing.get("folderTitle", norm)
+                    new_ft      = entry.get("folderTitle", norm)
+                    if new_ft != new_ft.lower() and existing_ft == existing_ft.lower():
+                        existing["folderTitle"] = new_ft
+            else:
+                result[norm] = entry
+
+        return result, changed

    def _save_unlocked(self) -> None:
        self._path.parent.mkdir(parents=True, exist_ok=True)
@@ -115,15 +115,17 @@ function updateDirtyCount() {
 function makeRow(title, e) {
  const tr = document.createElement("tr");
  tr.dataset.title = title;
+  const displayTitle = e.folderTitle || title;
+  tr.dataset.folderTitle = displayTitle;

-  // Title — link only, not editable
+  // Title — link only, not editable; shows folderTitle (original casing)
  const titleTd = document.createElement("td");
  titleTd.className = "title";
  const titleLink = document.createElement("a");
-  titleLink.href = searchUrl(title);
+  titleLink.href = searchUrl(displayTitle);
  titleLink.target = "_blank";
  titleLink.rel = "noopener";
-  titleLink.textContent = title;
+  titleLink.textContent = displayTitle;
  titleTd.appendChild(titleLink);
  tr.appendChild(titleTd);

@@ -186,7 +188,7 @@ function makeRow(title, e) {
 async function saveRow(tr) {
  const title = tr.dataset.title;
  const newId = tr._idInp.value.trim();
-  setStatus("Saving " + title + "…");
+  setStatus("Saving " + (tr.dataset.folderTitle || title) + "…");
  try {
    const r = await fetch("/api/matches", {
      method: "POST",
@@ -203,7 +205,7 @@ async function saveRow(tr) {
    tr._img.src = entry.imageUrl || "";
    tr.classList.remove("dirty");
    updateDirtyCount();
-    setStatus("Saved " + title);
+    setStatus("Saved " + (tr.dataset.folderTitle || title));
    return true;
  } catch (err) {
    setStatus("Save failed (" + title + "): " + err.message);
@@ -250,7 +252,11 @@ function sortedTitles() {
  const titles = Object.keys(matchesData);
  const dir = currentSort.asc ? 1 : -1;
  if (currentSort.col === "title") {
-    return titles.sort((a, b) => a.localeCompare(b) * dir);
+    return titles.sort((a, b) => {
+      const fa = (matchesData[a].folderTitle || a).toLowerCase();
+      const fb = (matchesData[b].folderTitle || b).toLowerCase();
+      return fa.localeCompare(fb) * dir;
+    });
  }
  if (currentSort.col === "firstMatchTime") {
    return titles.sort((a, b) => {
@@ -298,7 +304,7 @@ async function load() {
 function applyFilter() {
  const q = document.getElementById("filter").value.toLowerCase();
  for (const tr of document.querySelectorAll("#rows tr")) {
-    const t = tr.dataset.title.toLowerCase();
+    const t = (tr.dataset.folderTitle || tr.dataset.title).toLowerCase();
    tr.style.display = t.includes(q) ? "" : "none";
  }
 }