manga matching and WebApp

2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
@@ -23,43 +23,43 @@ jobs:
      - name: Push Image
        run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest

-#  deploy:
-#    needs: build
-#    runs-on: ubuntu-latest
-#    steps:
-#      - name: Checkout
-#        uses: actions/checkout@v4
-#
-#      - name: Create deployment directory
-#        uses: appleboy/ssh-action@v1.0.3
-#        with:
-#          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
-#          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
-#
-#      - name: Copy docker-compose via SCP
-#        uses: appleboy/scp-action@v0.1.7
-#        with:
-#          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
-#          source: "docker-compose.prod.yml"
-#          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
-#
-#      - name: Deploy via SSH
-#        uses: appleboy/ssh-action@v1.0.3
-#        with:
-#          host: ${{ secrets.SSH_HOST }}
-#          username: ${{ secrets.SSH_USER }}
-#          password: ${{ secrets.SSH_PASSWORD }}
-#          port: ${{ secrets.SSH_PORT || 22 }}
-#          script: |
-#            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
-#            mv docker-compose.prod.yml docker-compose.yml
-#            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
-#            sudo docker compose pull
-#            sudo docker compose up -d --remove-orphans
-#            sudo docker image prune -f
+  deploy:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Create deployment directory
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.SSH_HOST }}
+          username: ${{ secrets.SSH_USER }}
+          password: ${{ secrets.SSH_PASSWORD }}
+          port: ${{ secrets.SSH_PORT || 22 }}
+          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+
+      - name: Copy docker-compose via SCP
+        uses: appleboy/scp-action@v0.1.7
+        with:
+          host: ${{ secrets.SSH_HOST }}
+          username: ${{ secrets.SSH_USER }}
+          password: ${{ secrets.SSH_PASSWORD }}
+          port: ${{ secrets.SSH_PORT || 22 }}
+          source: "docker-compose.prod.yml"
+          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
+
+      - name: Deploy via SSH
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+          host: ${{ secrets.SSH_HOST }}
+          username: ${{ secrets.SSH_USER }}
+          password: ${{ secrets.SSH_PASSWORD }}
+          port: ${{ secrets.SSH_PORT || 22 }}
+          script: |
+            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+            mv docker-compose.prod.yml docker-compose.yml
+            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
+            sudo docker compose pull
+            sudo docker compose up -d --remove-orphans
+            sudo docker image prune -f
@@ -18,6 +18,8 @@ ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

 # Mount points used by main.py defaults
-VOLUME ["/mnt/suwayomi", "/mnt/kavita"]
+VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
+
+EXPOSE 8080

 CMD ["python", "/app/main.py"]
@@ -9,6 +9,11 @@ services:
      LANGUAGE:       "${LANGUAGE:-en}"
      SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
      DELETE_SOURCE:  "${DELETE_SOURCE:-true}"
+      MATCH_PATH:     "${MATCH_PATH:-/config/matches.json}"
+      WEB_PORT:       "${WEB_PORT:-8080}"
+    ports:
+      - "${WEB_PORT:-8080}:8080"
    volumes:
      - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
      - "${HOST_KAVITA_PATH}:/mnt/kavita"
+      - "${HOST_CONFIG_PATH}:/config"
@@ -24,6 +24,9 @@ Environment variables
    SETTLE_SECONDS      default 600   (10-minute quiet window)
    REQUEST_TIMEOUT     default 30
    DELETE_SOURCE       default true  (delete source folders after pack)
+    MATCH_PATH          default /config/matches.json
+    WEB_PORT            default 8080  (Flask web UI for matches.json)
+    WEB_HOST            default 0.0.0.0
 """

 from __future__ import annotations
@@ -38,6 +41,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))

 from src.SuwayomiMover import SuwayomiMover                       # noqa: E402
 from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher       # noqa: E402
+from src.MatchesCache import MatchesCache                          # noqa: E402
+from src.MatchesWebApp import MatchesWebApp                        # noqa: E402


 def _env_str(name: str, default: "str | None" = None,
@@ -77,6 +82,9 @@ def main() -> int:
    settle_seconds  = _env_int("SETTLE_SECONDS",  600)
    request_timeout = _env_int("REQUEST_TIMEOUT",  30)
    delete_source   = _env_bool("DELETE_SOURCE",  True)
+    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
+    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
+    web_port        = _env_int("WEB_PORT", 8080)

    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
    print(f"[main] kavita    = {kavita_path}",    flush=True)
@@ -84,6 +92,10 @@ def main() -> int:
    print(f"[main] settle    = {settle_seconds}s", flush=True)
    print(f"[main] language  = {language}",       flush=True)
    print(f"[main] delete src= {delete_source}",  flush=True)
+    print(f"[main] match path= {match_path}",     flush=True)
+    print(f"[main] web       = {web_host}:{web_port}", flush=True)
+
+    matches_cache = MatchesCache(match_path)

    mover = SuwayomiMover(
        suwayomi_path, kavita_path,
@@ -92,20 +104,23 @@ def main() -> int:
        language=language,
        request_timeout=request_timeout,
        delete_source=delete_source,
+        matches_cache=matches_cache,
    )

-    watcher = SuwayomiFolderWatcher(
-        suwayomi_path, mover, settle_seconds=settle_seconds)
+    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)

-    def shutdown(signum, _frame):
-        print(f"[main] received signal {signum}", flush=True)
-        watcher.stop()
+    web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
+    web_app.start()

-    signal.signal(signal.SIGTERM, shutdown)
-    signal.signal(signal.SIGINT,  shutdown)
-
-    watcher.start()
-    watcher.wait()   # blocks until stop() is called via a signal
+    # def shutdown(signum, _frame):
+    #     print(f"[main] received signal {signum}", flush=True)
+    #     watcher.stop()
+    #
+    # signal.signal(signal.SIGTERM, shutdown)
+    # signal.signal(signal.SIGINT,  shutdown)
+    #
+    # watcher.start()
+    # watcher.wait()   # blocks until stop() is called via a signal
    return 0


@@ -1,3 +1,4 @@
 requests>=2.31
 Pillow>=10.0
 watchdog>=4.0
+Flask>=3.0
@@ -48,6 +48,7 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
+from MatchesCache import MatchesCache

 try:
    from PIL import Image
@@ -170,7 +171,8 @@ class ComicInfoBuilder:
                 volume_resolver: "MangaDexVolumeResolver | None" = None,
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None,
-                 al_resolver: "AniListResolver | None" = None):
+                 al_resolver: "AniListResolver | None" = None,
+                 matches_cache: "MatchesCache | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")

@@ -197,6 +199,7 @@ class ComicInfoBuilder:
            request_timeout=request_timeout)
        self._al_resolver = al_resolver or AniListResolver(
            request_timeout=request_timeout)
+        self._matches_cache = matches_cache

        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
@@ -353,14 +356,43 @@ class ComicInfoBuilder:
        return series

    def _search_best_series(self, title: str):
-        """Searches for `title` and returns the best matching series entry."""
+        """
+        Resolves `title` to a MangaBaka series.
+
+        Lookup order:
+          1. matches.json cache (if attached) — uses the stored series ID
+             to fetch the full series, skipping the search step entirely.
+          2. Fresh MangaBaka search — top hit. The match is persisted to
+             matches.json before being returned so it survives a crash.
+        """
+        if self._matches_cache is not None:
+            cached = self._matches_cache.get(title)
+            if cached and cached.get("mangabakaId"):
+                try:
+                    return self._fetch_series_by_id(cached["mangabakaId"])
+                except Exception as exc:
+                    print(f"[ComicInfoBuilder] cached id "
+                          f"{cached['mangabakaId']} for {title!r} failed "
+                          f"({exc}); falling back to fresh search",
+                          flush=True)
+
        url = f"{self.api_base_url}/series/search"
        resp = self._session.get(
            url, params={"q": title, "page": 1, "limit": 1},
            timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data") or []
-        return data[0] if data else None
+        series = data[0] if data else None
+
+        if series and self._matches_cache is not None:
+            self._matches_cache.add(
+                title,
+                mangabaka_id=series.get("id"),
+                mangabaka_name=series.get("title") or "",
+                image_url=_pick_cover_url(series.get("cover")),
+            )
+
+        return series

    def _fetch_series_by_id(self, series_id) -> dict:
        url = f"{self.api_base_url}/series/{series_id}"
@@ -0,0 +1,139 @@
+"""
+matches_cache.py
+================
+
+Persistent JSON cache that maps a Suwayomi/series search title to the
+MangaBaka series it was matched against.
+
+Structure on disk::
+
+    {
+      "matches": {
+        "<search title>": {
+          "mangabakaId":    "12345",
+          "mangabakaName":  "One-Punch Man",
+          "imageUrl":       "https://.../cover.jpg",
+          "firstMatchTime": 1700000000
+        },
+        ...
+      }
+    }
+
+The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
+search request, and is written back to disk on every mutation so a crash
+does not lose matches that were resolved in the current run.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+import time
+from pathlib import Path
+
+
+class MatchesCache:
+    def __init__(self, path):
+        self._path = Path(path)
+        self._lock = threading.RLock()
+        self._data: dict = {"matches": {}}
+        self._load()
+
+    # ------------------------------------------------------------------
+    # Public lookup / mutation API
+    # ------------------------------------------------------------------
+    def get(self, title: str) -> "dict | None":
+        with self._lock:
+            entry = self._data["matches"].get(title)
+            return dict(entry) if entry else None
+
+    def add(self, title: str, *,
+            mangabaka_id,
+            mangabaka_name: str,
+            image_url: "str | None") -> dict:
+        entry = {
+            "mangabakaId":    str(mangabaka_id) if mangabaka_id is not None else "",
+            "mangabakaName":  mangabaka_name or "",
+            "imageUrl":       image_url or "",
+            "firstMatchTime": int(time.time()),
+        }
+        with self._lock:
+            self._data["matches"][title] = entry
+            self._save_unlocked()
+        return dict(entry)
+
+    def upsert(self, title: str, *,
+               mangabaka_id=None,
+               mangabaka_name=None,
+               image_url=None,
+               first_match_time=None) -> dict:
+        with self._lock:
+            entry = self._data["matches"].get(title)
+            if entry is None:
+                entry = {
+                    "mangabakaId":    "",
+                    "mangabakaName":  "",
+                    "imageUrl":       "",
+                    "firstMatchTime": int(time.time()),
+                }
+                self._data["matches"][title] = entry
+            if mangabaka_id is not None:
+                entry["mangabakaId"] = str(mangabaka_id)
+            if mangabaka_name is not None:
+                entry["mangabakaName"] = mangabaka_name
+            if image_url is not None:
+                entry["imageUrl"] = image_url
+            if first_match_time is not None:
+                try:
+                    entry["firstMatchTime"] = int(first_match_time)
+                except (TypeError, ValueError):
+                    pass
+            self._save_unlocked()
+            return dict(entry)
+
+    def rename(self, old_title: str, new_title: str) -> bool:
+        if not new_title or old_title == new_title:
+            return False
+        with self._lock:
+            entry = self._data["matches"].pop(old_title, None)
+            if entry is None:
+                return False
+            self._data["matches"][new_title] = entry
+            self._save_unlocked()
+            return True
+
+    def remove(self, title: str) -> bool:
+        with self._lock:
+            existed = title in self._data["matches"]
+            if existed:
+                del self._data["matches"][title]
+                self._save_unlocked()
+            return existed
+
+    def all(self) -> dict:
+        with self._lock:
+            return {"matches": {k: dict(v)
+                                for k, v in self._data["matches"].items()}}
+
+    # ------------------------------------------------------------------
+    # Internal IO
+    # ------------------------------------------------------------------
+    def _load(self) -> None:
+        if not self._path.is_file():
+            return
+        try:
+            with self._path.open("r", encoding="utf-8") as f:
+                loaded = json.load(f)
+        except (OSError, json.JSONDecodeError) as exc:
+            print(f"[MatchesCache] failed to load {self._path}: {exc}",
+                  flush=True)
+            return
+        if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
+            self._data = loaded
+
+    def _save_unlocked(self) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        tmp = self._path.with_suffix(self._path.suffix + ".tmp")
+        with tmp.open("w", encoding="utf-8") as f:
+            json.dump(self._data, f, ensure_ascii=False, indent=2)
+        tmp.replace(self._path)
@@ -0,0 +1,341 @@
+"""
+matches_web_app.py
+==================
+
+Flask web UI for inspecting and editing the matches.json file produced by
+MatchesCache.
+
+Routes
+------
+GET  /                       HTML table view (one row per cached match)
+GET  /api/matches            JSON dump of the full cache
+POST /api/matches            Upsert / rename an entry
+                             body: {originalTitle?, title, mangabakaId,
+                                    mangabakaName, imageUrl, firstMatchTime?}
+POST /api/matches/delete     Remove an entry      body: {title}
+POST /api/build              Trigger a full re-scan via SuwayomiMover.build_matches_only
+                             (only available if a mover is wired in)
+
+The Title cell is rendered as a link to MangaBaka's search page, restricted
+to the manga / manhwa / manhua types.
+"""
+
+from __future__ import annotations
+
+import threading
+from urllib.parse import quote_plus
+
+from flask import Flask, jsonify, request, Response
+
+from MatchesCache import MatchesCache
+
+
+_INDEX_HTML = """<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>MangaBaka matches</title>
+  <style>
+    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
+    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
+    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
+    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
+    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
+    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
+    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
+    table { border-collapse: collapse; width: 100%; }
+    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
+    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
+    tr:nth-child(even) td { background: #161616; }
+    td.image img { max-width: 90px; max-height: 130px; display:block; }
+    td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
+    td.title a { color: #60a5fa; text-decoration: none; }
+    td.title a:hover { text-decoration: underline; }
+    td.actions { white-space: nowrap; }
+    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
+    .dirty td { background: #1f2937 !important; }
+  </style>
+</head>
+<body>
+<h1>MangaBaka matches</h1>
+<div class="bar">
+  <input id="filter" type="search" placeholder="Filter by title…">
+  <button id="reload">Reload</button>
+  <button id="build"  class="primary">Build all (rescan)</button>
+  <span class="status" id="status"></span>
+</div>
+
+<table>
+  <thead>
+    <tr>
+      <th>Title</th>
+      <th>mangabakaId</th>
+      <th>mangabakaName</th>
+      <th>firstMatchTime</th>
+      <th>Image</th>
+      <th></th>
+    </tr>
+  </thead>
+  <tbody id="rows"></tbody>
+</table>
+
+<script>
+const TYPES = "&type=manhwa&type=manhua&type=manga";
+
+function fmtTime(unix) {
+  if (!unix) return "";
+  const d = new Date(unix * 1000);
+  return d.toLocaleString();
+}
+
+function searchUrl(title) {
+  return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
+}
+
+function setStatus(msg) { document.getElementById("status").textContent = msg; }
+
+function makeRow(title, e) {
+  const tr = document.createElement("tr");
+  tr.dataset.originalTitle = title;
+
+  const titleTd = document.createElement("td");
+  titleTd.className = "title";
+  const titleLink = document.createElement("a");
+  titleLink.href = searchUrl(title);
+  titleLink.target = "_blank";
+  titleLink.rel = "noopener";
+  titleLink.textContent = title;
+  const titleInput = document.createElement("input");
+  titleInput.value = title;
+  titleInput.style.marginTop = ".25rem";
+  titleInput.addEventListener("input", () => {
+    titleLink.textContent = titleInput.value;
+    titleLink.href = searchUrl(titleInput.value);
+    tr.classList.add("dirty");
+  });
+  titleTd.append(titleLink, titleInput);
+  tr.appendChild(titleTd);
+
+  function field(value) {
+    const td = document.createElement("td");
+    const inp = document.createElement("input");
+    inp.value = value || "";
+    inp.addEventListener("input", () => tr.classList.add("dirty"));
+    td.appendChild(inp);
+    return [td, inp];
+  }
+
+  const [idTd,   idInp]   = field(e.mangabakaId);
+  const [nameTd, nameInp] = field(e.mangabakaName);
+  const [urlTd,  urlInp]  = field(e.imageUrl);
+  tr.appendChild(idTd);
+  tr.appendChild(nameTd);
+
+  const timeTd = document.createElement("td");
+  timeTd.textContent = fmtTime(e.firstMatchTime);
+  tr.appendChild(timeTd);
+
+  const imgTd = document.createElement("td");
+  imgTd.className = "image";
+  const img = document.createElement("img");
+  img.src = e.imageUrl || "";
+  img.alt = "";
+  img.loading = "lazy";
+  urlInp.addEventListener("input", () => { img.src = urlInp.value; });
+  imgTd.append(img, urlInp);
+  tr.appendChild(imgTd);
+
+  const actTd = document.createElement("td");
+  actTd.className = "actions";
+  const save = document.createElement("button");
+  save.textContent = "Save";
+  save.className = "primary";
+  save.addEventListener("click", async () => {
+    save.disabled = true;
+    setStatus("Saving " + titleInput.value + "…");
+    const body = {
+      originalTitle: tr.dataset.originalTitle,
+      title:         titleInput.value,
+      mangabakaId:   idInp.value,
+      mangabakaName: nameInp.value,
+      imageUrl:      urlInp.value,
+    };
+    try {
+      const r = await fetch("/api/matches", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(body),
+      });
+      if (!r.ok) throw new Error(await r.text());
+      tr.dataset.originalTitle = titleInput.value;
+      tr.classList.remove("dirty");
+      setStatus("Saved " + titleInput.value);
+    } catch (err) {
+      setStatus("Save failed: " + err.message);
+    } finally {
+      save.disabled = false;
+    }
+  });
+  const del = document.createElement("button");
+  del.textContent = "Delete";
+  del.className = "danger";
+  del.style.marginLeft = ".25rem";
+  del.addEventListener("click", async () => {
+    if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
+    setStatus("Deleting " + tr.dataset.originalTitle + "…");
+    try {
+      const r = await fetch("/api/matches/delete", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ title: tr.dataset.originalTitle }),
+      });
+      if (!r.ok) throw new Error(await r.text());
+      tr.remove();
+      setStatus("Deleted");
+    } catch (err) {
+      setStatus("Delete failed: " + err.message);
+    }
+  });
+  actTd.append(save, del);
+  tr.appendChild(actTd);
+  return tr;
+}
+
+async function load() {
+  setStatus("Loading…");
+  const tbody = document.getElementById("rows");
+  tbody.innerHTML = "";
+  try {
+    const r = await fetch("/api/matches");
+    const data = await r.json();
+    const matches = data.matches || {};
+    const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
+    for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
+    setStatus(titles.length + " entries");
+    applyFilter();
+  } catch (err) {
+    setStatus("Load failed: " + err.message);
+  }
+}
+
+function applyFilter() {
+  const q = document.getElementById("filter").value.toLowerCase();
+  for (const tr of document.querySelectorAll("#rows tr")) {
+    const t = tr.dataset.originalTitle.toLowerCase();
+    tr.style.display = t.includes(q) ? "" : "none";
+  }
+}
+
+document.getElementById("filter").addEventListener("input", applyFilter);
+document.getElementById("reload").addEventListener("click", load);
+document.getElementById("build").addEventListener("click", async () => {
+  if (!confirm("Run full scan? This may take several minutes.")) return;
+  setStatus("Building… (running on the server)");
+  try {
+    const r = await fetch("/api/build", { method: "POST" });
+    if (!r.ok) throw new Error(await r.text());
+    setStatus("Build finished");
+    load();
+  } catch (err) {
+    setStatus("Build failed: " + err.message);
+  }
+});
+
+load();
+</script>
+</body>
+</html>
+"""
+
+
+class MatchesWebApp:
+    """
+    Flask app exposing the MatchesCache. `mover` is optional — if provided,
+    POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
+    thread.
+    """
+
+    def __init__(self, cache: MatchesCache, *,
+                 mover=None,
+                 host: str = "0.0.0.0",
+                 port: int = 8080):
+        self._cache = cache
+        self._mover = mover
+        self._host = host
+        self._port = port
+        self._build_lock = threading.Lock()
+        self._app = Flask(__name__)
+        self._register_routes()
+
+    @property
+    def app(self) -> Flask:
+        return self._app
+
+    def start(self) -> threading.Thread:
+        """Starts the Flask server on a daemon thread and returns it."""
+        thread = threading.Thread(
+            target=self._app.run,
+            kwargs={"host": self._host, "port": self._port,
+                    "debug": False, "use_reloader": False,
+                    "threaded": True},
+            name="MatchesWebApp",
+            daemon=True,
+        )
+        thread.start()
+        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
+              flush=True)
+        return thread
+
+    # ------------------------------------------------------------------
+    # Routes
+    # ------------------------------------------------------------------
+    def _register_routes(self) -> None:
+        app = self._app
+        cache = self._cache
+
+        @app.get("/")
+        def index() -> Response:
+            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
+
+        @app.get("/api/matches")
+        def api_list():
+            return jsonify(cache.all())
+
+        @app.post("/api/matches")
+        def api_upsert():
+            body = request.get_json(silent=True) or {}
+            title = (body.get("title") or "").strip()
+            if not title:
+                return Response("title is required", status=400)
+            original = (body.get("originalTitle") or "").strip() or title
+            if original != title:
+                cache.rename(original, title)
+            entry = cache.upsert(
+                title,
+                mangabaka_id=body.get("mangabakaId"),
+                mangabaka_name=body.get("mangabakaName"),
+                image_url=body.get("imageUrl"),
+                first_match_time=body.get("firstMatchTime"),
+            )
+            return jsonify({"title": title, "entry": entry})
+
+        @app.post("/api/matches/delete")
+        def api_delete():
+            body = request.get_json(silent=True) or {}
+            title = (body.get("title") or "").strip()
+            if not title:
+                return Response("title is required", status=400)
+            removed = cache.remove(title)
+            return jsonify({"removed": removed, "title": title})
+
+        @app.post("/api/build")
+        def api_build():
+            if self._mover is None:
+                return Response("no mover configured", status=503)
+            if not self._build_lock.acquire(blocking=False):
+                return Response("build already running", status=409)
+            try:
+                result = self._mover.build_matches_only()
+            finally:
+                self._build_lock.release()
+            return jsonify(result)
@@ -51,12 +51,13 @@ from pathlib import Path

 import requests

-from ComicInfoBuilder import ComicInfoBuilder
+from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from KavitaPersonUpdater import KavitaPersonUpdater
+from MatchesCache import MatchesCache


 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
                 kavita_api_key: "str | None" = None,
                 language: str = "en",
                 request_timeout: int = 30,
-                 delete_source: bool = True):
+                 delete_source: bool = True,
+                 matches_cache: "MatchesCache | None" = None,
+                 api_base_url: str = "https://api.mangabaka.dev/v1"):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
        self._timeout = request_timeout
        self._delete_source = delete_source
+        self._matches_cache = matches_cache
+        self._api_base_url = api_base_url.rstrip("/")

        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")

+    def build_matches_only(self) -> dict:
+        """
+        Walks every series under the Suwayomi root and resolves each one
+        to a MangaBaka match — nothing else.
+
+        For every series:
+          - Reads the first chapter's ComicInfo.xml to obtain the canonical
+            Series name (falls back to the folder name).
+          - Cleans the name (strips source labels) the same way the real
+            move pipeline does.
+          - If the title is already in the matches cache, skips it.
+          - Otherwise issues a MangaBaka search and adds the top hit to
+            the cache (which is persisted to disk immediately).
+
+        Returns the full cache contents as a Python dict.
+        """
+        if self._matches_cache is None:
+            raise RuntimeError(
+                "build_matches_only requires a MatchesCache instance")
+
+        search_url = f"{self._api_base_url}/series/search"
+
+        for source_dir in sorted(self._src.iterdir()):
+            if not source_dir.is_dir():
+                continue
+            for manga_dir in sorted(source_dir.iterdir()):
+                if not manga_dir.is_dir():
+                    continue
+
+                raw_series = manga_dir.name
+                for chapter_dir in sorted(manga_dir.iterdir(),
+                                          key=lambda p: _chapter_sort_key(p.name)):
+                    if chapter_dir.is_dir():
+                        fields = _read_suwayomi_fields(chapter_dir)
+                        if fields.get("Series"):
+                            raw_series = fields["Series"]
+                            break
+
+                builder_title = _clean_suwayomi_title(raw_series)
+
+                if self._matches_cache.get(builder_title):
+                    print(f"[matches] {builder_title} — cached")
+                    continue
+
+                print(f"[matches] {builder_title} — searching")
+                try:
+                    resp = self._session.get(
+                        search_url,
+                        params={"q": builder_title, "page": 1, "limit": 1},
+                        timeout=self._timeout)
+                    resp.raise_for_status()
+                    data = resp.json().get("data") or []
+                    if not data:
+                        print(f"  [warn] no MangaBaka match for {builder_title!r}")
+                        continue
+                    series = data[0]
+                    self._matches_cache.add(
+                        builder_title,
+                        mangabaka_id=series.get("id"),
+                        mangabaka_name=series.get("title") or "",
+                        image_url=_pick_cover_url(series.get("cover")),
+                    )
+                except Exception as exc:
+                    print(f"  [warn] search failed for {builder_title!r}: {exc}")
+
+        return self._matches_cache.all()
+
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
        # One builder per series — metadata fetched once, reused for all chapters.
        builder = ComicInfoBuilder(
            builder_title, chapter=1,
+            api_base_url=self._api_base_url,
            language=self._language,
            request_timeout=self._timeout,
            session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
            works_resolver=self._works_resolver,
            mal_resolver=self._mal,
            al_resolver=self._al,
+            matches_cache=self._matches_cache,
        )

        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.