manga matching and WebApp

2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
@@ -23,43 +23,43 @@ jobs:
      - name: Push Image
        run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
-#  deploy:
+  deploy:
-#    needs: build
+    needs: build
-#    runs-on: ubuntu-latest
+    runs-on: ubuntu-latest
-#    steps:
+    steps:
-#      - name: Checkout
+      - name: Checkout
-#        uses: actions/checkout@v4
+        uses: actions/checkout@v4
-#
+
-#      - name: Create deployment directory
+      - name: Create deployment directory
-#        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.0.3
-#        with:
+        with:
-#          host: ${{ secrets.SSH_HOST }}
+          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
+          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
+          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
+          port: ${{ secrets.SSH_PORT || 22 }}
-#          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
-#
+
-#      - name: Copy docker-compose via SCP
+      - name: Copy docker-compose via SCP
-#        uses: appleboy/scp-action@v0.1.7
+        uses: appleboy/scp-action@v0.1.7
-#        with:
+        with:
-#          host: ${{ secrets.SSH_HOST }}
+          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
+          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
+          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
+          port: ${{ secrets.SSH_PORT || 22 }}
-#          source: "docker-compose.prod.yml"
+          source: "docker-compose.prod.yml"
-#          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
+          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
-#
+
-#      - name: Deploy via SSH
+      - name: Deploy via SSH
-#        uses: appleboy/ssh-action@v1.0.3
+        uses: appleboy/ssh-action@v1.0.3
-#        with:
+        with:
-#          host: ${{ secrets.SSH_HOST }}
+          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
+          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
+          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
+          port: ${{ secrets.SSH_PORT || 22 }}
-#          script: |
+          script: |
-#            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
-#            mv docker-compose.prod.yml docker-compose.yml
+            mv docker-compose.prod.yml docker-compose.yml
-#            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
+            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
-#            sudo docker compose pull
+            sudo docker compose pull
-#            sudo docker compose up -d --remove-orphans
+            sudo docker compose up -d --remove-orphans
-#            sudo docker image prune -f
+            sudo docker image prune -f
@@ -18,6 +18,8 @@ ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1
 # Mount points used by main.py defaults
-VOLUME ["/mnt/suwayomi", "/mnt/kavita"]
+VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
 EXPOSE 8080
 CMD ["python", "/app/main.py"]
@@ -9,6 +9,11 @@ services:
      LANGUAGE:       "${LANGUAGE:-en}"
      SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
      DELETE_SOURCE:  "${DELETE_SOURCE:-true}"
      MATCH_PATH:     "${MATCH_PATH:-/config/matches.json}"
      WEB_PORT:       "${WEB_PORT:-8080}"
    ports:
      - "${WEB_PORT:-8080}:8080"
    volumes:
      - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
      - "${HOST_KAVITA_PATH}:/mnt/kavita"
      - "${HOST_CONFIG_PATH}:/config"
@@ -24,6 +24,9 @@ Environment variables
    SETTLE_SECONDS      default 600   (10-minute quiet window)
    REQUEST_TIMEOUT     default 30
    DELETE_SOURCE       default true  (delete source folders after pack)
    MATCH_PATH          default /config/matches.json
    WEB_PORT            default 8080  (Flask web UI for matches.json)
    WEB_HOST            default 0.0.0.0
 """
 from __future__ import annotations
@@ -38,6 +41,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
 from src.SuwayomiMover import SuwayomiMover                       # noqa: E402
 from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher       # noqa: E402
 from src.MatchesCache import MatchesCache                          # noqa: E402
 from src.MatchesWebApp import MatchesWebApp                        # noqa: E402
 def _env_str(name: str, default: "str | None" = None,
@@ -77,6 +82,9 @@ def main() -> int:
    settle_seconds  = _env_int("SETTLE_SECONDS",  600)
    request_timeout = _env_int("REQUEST_TIMEOUT",  30)
    delete_source   = _env_bool("DELETE_SOURCE",  True)
    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
    web_port        = _env_int("WEB_PORT", 8080)
    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
    print(f"[main] kavita    = {kavita_path}",    flush=True)
@@ -84,6 +92,10 @@ def main() -> int:
    print(f"[main] settle    = {settle_seconds}s", flush=True)
    print(f"[main] language  = {language}",       flush=True)
    print(f"[main] delete src= {delete_source}",  flush=True)
    print(f"[main] match path= {match_path}",     flush=True)
    print(f"[main] web       = {web_host}:{web_port}", flush=True)
    matches_cache = MatchesCache(match_path)
    mover = SuwayomiMover(
        suwayomi_path, kavita_path,
@@ -92,20 +104,23 @@ def main() -> int:
        language=language,
        request_timeout=request_timeout,
        delete_source=delete_source,
        matches_cache=matches_cache,
    )
-    watcher = SuwayomiFolderWatcher(
+    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
        suwayomi_path, mover, settle_seconds=settle_seconds)
-    def shutdown(signum, _frame):
+    web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
-        print(f"[main] received signal {signum}", flush=True)
+    web_app.start()
        watcher.stop()
-    signal.signal(signal.SIGTERM, shutdown)
+    # def shutdown(signum, _frame):
-    signal.signal(signal.SIGINT,  shutdown)
+    #     print(f"[main] received signal {signum}", flush=True)
-
+    #     watcher.stop()
-    watcher.start()
+    #
-    watcher.wait()   # blocks until stop() is called via a signal
+    # signal.signal(signal.SIGTERM, shutdown)
    # signal.signal(signal.SIGINT,  shutdown)
    #
    # watcher.start()
    # watcher.wait()   # blocks until stop() is called via a signal
    return 0
@@ -1,3 +1,4 @@
 requests>=2.31
 Pillow>=10.0
 watchdog>=4.0
 Flask>=3.0
@@ -48,6 +48,7 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 try:
    from PIL import Image
@@ -170,7 +171,8 @@ class ComicInfoBuilder:
                 volume_resolver: "MangaDexVolumeResolver | None" = None,
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None,
-                 al_resolver: "AniListResolver | None" = None):
+                 al_resolver: "AniListResolver | None" = None,
                 matches_cache: "MatchesCache | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")
@@ -197,6 +199,7 @@ class ComicInfoBuilder:
            request_timeout=request_timeout)
        self._al_resolver = al_resolver or AniListResolver(
            request_timeout=request_timeout)
        self._matches_cache = matches_cache
        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
@@ -353,14 +356,43 @@ class ComicInfoBuilder:
        return series
    def _search_best_series(self, title: str):
-        """Searches for `title` and returns the best matching series entry."""
+        """
        Resolves `title` to a MangaBaka series.
        Lookup order:
          1. matches.json cache (if attached) — uses the stored series ID
             to fetch the full series, skipping the search step entirely.
          2. Fresh MangaBaka search — top hit. The match is persisted to
             matches.json before being returned so it survives a crash.
        """
        if self._matches_cache is not None:
            cached = self._matches_cache.get(title)
            if cached and cached.get("mangabakaId"):
                try:
                    return self._fetch_series_by_id(cached["mangabakaId"])
                except Exception as exc:
                    print(f"[ComicInfoBuilder] cached id "
                          f"{cached['mangabakaId']} for {title!r} failed "
                          f"({exc}); falling back to fresh search",
                          flush=True)
        url = f"{self.api_base_url}/series/search"
        resp = self._session.get(
            url, params={"q": title, "page": 1, "limit": 1},
            timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data") or []
-        return data[0] if data else None
+        series = data[0] if data else None
        if series and self._matches_cache is not None:
            self._matches_cache.add(
                title,
                mangabaka_id=series.get("id"),
                mangabaka_name=series.get("title") or "",
                image_url=_pick_cover_url(series.get("cover")),
            )
        return series
    def _fetch_series_by_id(self, series_id) -> dict:
        url = f"{self.api_base_url}/series/{series_id}"
@@ -0,0 +1,139 @@
 """
 matches_cache.py
 ================
 Persistent JSON cache that maps a Suwayomi/series search title to the
 MangaBaka series it was matched against.
 Structure on disk::
    {
      "matches": {
        "<search title>": {
          "mangabakaId":    "12345",
          "mangabakaName":  "One-Punch Man",
          "imageUrl":       "https://.../cover.jpg",
          "firstMatchTime": 1700000000
        },
        ...
      }
    }
 The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
 search request, and is written back to disk on every mutation so a crash
 does not lose matches that were resolved in the current run.
 """
 from __future__ import annotations
 import json
 import threading
 import time
 from pathlib import Path
 class MatchesCache:
    def __init__(self, path):
        self._path = Path(path)
        self._lock = threading.RLock()
        self._data: dict = {"matches": {}}
        self._load()
    # ------------------------------------------------------------------
    # Public lookup / mutation API
    # ------------------------------------------------------------------
    def get(self, title: str) -> "dict | None":
        with self._lock:
            entry = self._data["matches"].get(title)
            return dict(entry) if entry else None
    def add(self, title: str, *,
            mangabaka_id,
            mangabaka_name: str,
            image_url: "str | None") -> dict:
        entry = {
            "mangabakaId":    str(mangabaka_id) if mangabaka_id is not None else "",
            "mangabakaName":  mangabaka_name or "",
            "imageUrl":       image_url or "",
            "firstMatchTime": int(time.time()),
        }
        with self._lock:
            self._data["matches"][title] = entry
            self._save_unlocked()
        return dict(entry)
    def upsert(self, title: str, *,
               mangabaka_id=None,
               mangabaka_name=None,
               image_url=None,
               first_match_time=None) -> dict:
        with self._lock:
            entry = self._data["matches"].get(title)
            if entry is None:
                entry = {
                    "mangabakaId":    "",
                    "mangabakaName":  "",
                    "imageUrl":       "",
                    "firstMatchTime": int(time.time()),
                }
                self._data["matches"][title] = entry
            if mangabaka_id is not None:
                entry["mangabakaId"] = str(mangabaka_id)
            if mangabaka_name is not None:
                entry["mangabakaName"] = mangabaka_name
            if image_url is not None:
                entry["imageUrl"] = image_url
            if first_match_time is not None:
                try:
                    entry["firstMatchTime"] = int(first_match_time)
                except (TypeError, ValueError):
                    pass
            self._save_unlocked()
            return dict(entry)
    def rename(self, old_title: str, new_title: str) -> bool:
        if not new_title or old_title == new_title:
            return False
        with self._lock:
            entry = self._data["matches"].pop(old_title, None)
            if entry is None:
                return False
            self._data["matches"][new_title] = entry
            self._save_unlocked()
            return True
    def remove(self, title: str) -> bool:
        with self._lock:
            existed = title in self._data["matches"]
            if existed:
                del self._data["matches"][title]
                self._save_unlocked()
            return existed
    def all(self) -> dict:
        with self._lock:
            return {"matches": {k: dict(v)
                                for k, v in self._data["matches"].items()}}
    # ------------------------------------------------------------------
    # Internal IO
    # ------------------------------------------------------------------
    def _load(self) -> None:
        if not self._path.is_file():
            return
        try:
            with self._path.open("r", encoding="utf-8") as f:
                loaded = json.load(f)
        except (OSError, json.JSONDecodeError) as exc:
            print(f"[MatchesCache] failed to load {self._path}: {exc}",
                  flush=True)
            return
        if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
            self._data = loaded
    def _save_unlocked(self) -> None:
        self._path.parent.mkdir(parents=True, exist_ok=True)
        tmp = self._path.with_suffix(self._path.suffix + ".tmp")
        with tmp.open("w", encoding="utf-8") as f:
            json.dump(self._data, f, ensure_ascii=False, indent=2)
        tmp.replace(self._path)
@@ -0,0 +1,341 @@
 """
 matches_web_app.py
 ==================
 Flask web UI for inspecting and editing the matches.json file produced by
 MatchesCache.
 Routes
 ------
 GET  /                       HTML table view (one row per cached match)
 GET  /api/matches            JSON dump of the full cache
 POST /api/matches            Upsert / rename an entry
                             body: {originalTitle?, title, mangabakaId,
                                    mangabakaName, imageUrl, firstMatchTime?}
 POST /api/matches/delete     Remove an entry      body: {title}
 POST /api/build              Trigger a full re-scan via SuwayomiMover.build_matches_only
                             (only available if a mover is wired in)
 The Title cell is rendered as a link to MangaBaka's search page, restricted
 to the manga / manhwa / manhua types.
 """
 from __future__ import annotations
 import threading
 from urllib.parse import quote_plus
 from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 _INDEX_HTML = """<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>MangaBaka matches</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
    table { border-collapse: collapse; width: 100%; }
    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
    tr:nth-child(even) td { background: #161616; }
    td.image img { max-width: 90px; max-height: 130px; display:block; }
    td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
    td.title a { color: #60a5fa; text-decoration: none; }
    td.title a:hover { text-decoration: underline; }
    td.actions { white-space: nowrap; }
    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
    .dirty td { background: #1f2937 !important; }
  </style>
 </head>
 <body>
 <h1>MangaBaka matches</h1>
 <div class="bar">
  <input id="filter" type="search" placeholder="Filter by title…">
  <button id="reload">Reload</button>
  <button id="build"  class="primary">Build all (rescan)</button>
  <span class="status" id="status"></span>
 </div>
 <table>
  <thead>
    <tr>
      <th>Title</th>
      <th>mangabakaId</th>
      <th>mangabakaName</th>
      <th>firstMatchTime</th>
      <th>Image</th>
      <th></th>
    </tr>
  </thead>
  <tbody id="rows"></tbody>
 </table>
 <script>
 const TYPES = "&type=manhwa&type=manhua&type=manga";
 function fmtTime(unix) {
  if (!unix) return "";
  const d = new Date(unix * 1000);
  return d.toLocaleString();
 }
 function searchUrl(title) {
  return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
 }
 function setStatus(msg) { document.getElementById("status").textContent = msg; }
 function makeRow(title, e) {
  const tr = document.createElement("tr");
  tr.dataset.originalTitle = title;
  const titleTd = document.createElement("td");
  titleTd.className = "title";
  const titleLink = document.createElement("a");
  titleLink.href = searchUrl(title);
  titleLink.target = "_blank";
  titleLink.rel = "noopener";
  titleLink.textContent = title;
  const titleInput = document.createElement("input");
  titleInput.value = title;
  titleInput.style.marginTop = ".25rem";
  titleInput.addEventListener("input", () => {
    titleLink.textContent = titleInput.value;
    titleLink.href = searchUrl(titleInput.value);
    tr.classList.add("dirty");
  });
  titleTd.append(titleLink, titleInput);
  tr.appendChild(titleTd);
  function field(value) {
    const td = document.createElement("td");
    const inp = document.createElement("input");
    inp.value = value || "";
    inp.addEventListener("input", () => tr.classList.add("dirty"));
    td.appendChild(inp);
    return [td, inp];
  }
  const [idTd,   idInp]   = field(e.mangabakaId);
  const [nameTd, nameInp] = field(e.mangabakaName);
  const [urlTd,  urlInp]  = field(e.imageUrl);
  tr.appendChild(idTd);
  tr.appendChild(nameTd);
  const timeTd = document.createElement("td");
  timeTd.textContent = fmtTime(e.firstMatchTime);
  tr.appendChild(timeTd);
  const imgTd = document.createElement("td");
  imgTd.className = "image";
  const img = document.createElement("img");
  img.src = e.imageUrl || "";
  img.alt = "";
  img.loading = "lazy";
  urlInp.addEventListener("input", () => { img.src = urlInp.value; });
  imgTd.append(img, urlInp);
  tr.appendChild(imgTd);
  const actTd = document.createElement("td");
  actTd.className = "actions";
  const save = document.createElement("button");
  save.textContent = "Save";
  save.className = "primary";
  save.addEventListener("click", async () => {
    save.disabled = true;
    setStatus("Saving " + titleInput.value + "…");
    const body = {
      originalTitle: tr.dataset.originalTitle,
      title:         titleInput.value,
      mangabakaId:   idInp.value,
      mangabakaName: nameInp.value,
      imageUrl:      urlInp.value,
    };
    try {
      const r = await fetch("/api/matches", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify(body),
      });
      if (!r.ok) throw new Error(await r.text());
      tr.dataset.originalTitle = titleInput.value;
      tr.classList.remove("dirty");
      setStatus("Saved " + titleInput.value);
    } catch (err) {
      setStatus("Save failed: " + err.message);
    } finally {
      save.disabled = false;
    }
  });
  const del = document.createElement("button");
  del.textContent = "Delete";
  del.className = "danger";
  del.style.marginLeft = ".25rem";
  del.addEventListener("click", async () => {
    if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
    setStatus("Deleting " + tr.dataset.originalTitle + "…");
    try {
      const r = await fetch("/api/matches/delete", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({ title: tr.dataset.originalTitle }),
      });
      if (!r.ok) throw new Error(await r.text());
      tr.remove();
      setStatus("Deleted");
    } catch (err) {
      setStatus("Delete failed: " + err.message);
    }
  });
  actTd.append(save, del);
  tr.appendChild(actTd);
  return tr;
 }
 async function load() {
  setStatus("Loading…");
  const tbody = document.getElementById("rows");
  tbody.innerHTML = "";
  try {
    const r = await fetch("/api/matches");
    const data = await r.json();
    const matches = data.matches || {};
    const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
    for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
    setStatus(titles.length + " entries");
    applyFilter();
  } catch (err) {
    setStatus("Load failed: " + err.message);
  }
 }
 function applyFilter() {
  const q = document.getElementById("filter").value.toLowerCase();
  for (const tr of document.querySelectorAll("#rows tr")) {
    const t = tr.dataset.originalTitle.toLowerCase();
    tr.style.display = t.includes(q) ? "" : "none";
  }
 }
 document.getElementById("filter").addEventListener("input", applyFilter);
 document.getElementById("reload").addEventListener("click", load);
 document.getElementById("build").addEventListener("click", async () => {
  if (!confirm("Run full scan? This may take several minutes.")) return;
  setStatus("Building… (running on the server)");
  try {
    const r = await fetch("/api/build", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    setStatus("Build finished");
    load();
  } catch (err) {
    setStatus("Build failed: " + err.message);
  }
 });
 load();
 </script>
 </body>
 </html>
 """
 class MatchesWebApp:
    """
    Flask app exposing the MatchesCache. `mover` is optional — if provided,
    POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
    thread.
    """
    def __init__(self, cache: MatchesCache, *,
                 mover=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._mover = mover
        self._host = host
        self._port = port
        self._build_lock = threading.Lock()
        self._app = Flask(__name__)
        self._register_routes()
    @property
    def app(self) -> Flask:
        return self._app
    def start(self) -> threading.Thread:
        """Starts the Flask server on a daemon thread and returns it."""
        thread = threading.Thread(
            target=self._app.run,
            kwargs={"host": self._host, "port": self._port,
                    "debug": False, "use_reloader": False,
                    "threaded": True},
            name="MatchesWebApp",
            daemon=True,
        )
        thread.start()
        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
              flush=True)
        return thread
    # ------------------------------------------------------------------
    # Routes
    # ------------------------------------------------------------------
    def _register_routes(self) -> None:
        app = self._app
        cache = self._cache
        @app.get("/")
        def index() -> Response:
            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
        @app.get("/api/matches")
        def api_list():
            return jsonify(cache.all())
        @app.post("/api/matches")
        def api_upsert():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            original = (body.get("originalTitle") or "").strip() or title
            if original != title:
                cache.rename(original, title)
            entry = cache.upsert(
                title,
                mangabaka_id=body.get("mangabakaId"),
                mangabaka_name=body.get("mangabakaName"),
                image_url=body.get("imageUrl"),
                first_match_time=body.get("firstMatchTime"),
            )
            return jsonify({"title": title, "entry": entry})
        @app.post("/api/matches/delete")
        def api_delete():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            removed = cache.remove(title)
            return jsonify({"removed": removed, "title": title})
        @app.post("/api/build")
        def api_build():
            if self._mover is None:
                return Response("no mover configured", status=503)
            if not self._build_lock.acquire(blocking=False):
                return Response("build already running", status=409)
            try:
                result = self._mover.build_matches_only()
            finally:
                self._build_lock.release()
            return jsonify(result)
@@ -51,12 +51,13 @@ from pathlib import Path
 import requests
-from ComicInfoBuilder import ComicInfoBuilder
+from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from KavitaPersonUpdater import KavitaPersonUpdater
 from MatchesCache import MatchesCache
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
                 kavita_api_key: "str | None" = None,
                 language: str = "en",
                 request_timeout: int = 30,
-                 delete_source: bool = True):
+                 delete_source: bool = True,
                 matches_cache: "MatchesCache | None" = None,
                 api_base_url: str = "https://api.mangabaka.dev/v1"):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
        self._timeout = request_timeout
        self._delete_source = delete_source
        self._matches_cache = matches_cache
        self._api_base_url = api_base_url.rstrip("/")
        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")
    def build_matches_only(self) -> dict:
        """
        Walks every series under the Suwayomi root and resolves each one
        to a MangaBaka match — nothing else.
        For every series:
          - Reads the first chapter's ComicInfo.xml to obtain the canonical
            Series name (falls back to the folder name).
          - Cleans the name (strips source labels) the same way the real
            move pipeline does.
          - If the title is already in the matches cache, skips it.
          - Otherwise issues a MangaBaka search and adds the top hit to
            the cache (which is persisted to disk immediately).
        Returns the full cache contents as a Python dict.
        """
        if self._matches_cache is None:
            raise RuntimeError(
                "build_matches_only requires a MatchesCache instance")
        search_url = f"{self._api_base_url}/series/search"
        for source_dir in sorted(self._src.iterdir()):
            if not source_dir.is_dir():
                continue
            for manga_dir in sorted(source_dir.iterdir()):
                if not manga_dir.is_dir():
                    continue
                raw_series = manga_dir.name
                for chapter_dir in sorted(manga_dir.iterdir(),
                                          key=lambda p: _chapter_sort_key(p.name)):
                    if chapter_dir.is_dir():
                        fields = _read_suwayomi_fields(chapter_dir)
                        if fields.get("Series"):
                            raw_series = fields["Series"]
                            break
                builder_title = _clean_suwayomi_title(raw_series)
                if self._matches_cache.get(builder_title):
                    print(f"[matches] {builder_title} — cached")
                    continue
                print(f"[matches] {builder_title} — searching")
                try:
                    resp = self._session.get(
                        search_url,
                        params={"q": builder_title, "page": 1, "limit": 1},
                        timeout=self._timeout)
                    resp.raise_for_status()
                    data = resp.json().get("data") or []
                    if not data:
                        print(f"  [warn] no MangaBaka match for {builder_title!r}")
                        continue
                    series = data[0]
                    self._matches_cache.add(
                        builder_title,
                        mangabaka_id=series.get("id"),
                        mangabaka_name=series.get("title") or "",
                        image_url=_pick_cover_url(series.get("cover")),
                    )
                except Exception as exc:
                    print(f"  [warn] search failed for {builder_title!r}: {exc}")
        return self._matches_cache.all()
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
        # One builder per series — metadata fetched once, reused for all chapters.
        builder = ComicInfoBuilder(
            builder_title, chapter=1,
            api_base_url=self._api_base_url,
            language=self._language,
            request_timeout=self._timeout,
            session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
            works_resolver=self._works_resolver,
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
        )
        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.