manga matching and WebApp

2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
@@ -51,12 +51,13 @@ from pathlib import Path

 import requests

-from ComicInfoBuilder import ComicInfoBuilder
+from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from KavitaPersonUpdater import KavitaPersonUpdater
+from MatchesCache import MatchesCache


 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
                 kavita_api_key: "str | None" = None,
                 language: str = "en",
                 request_timeout: int = 30,
-                 delete_source: bool = True):
+                 delete_source: bool = True,
+                 matches_cache: "MatchesCache | None" = None,
+                 api_base_url: str = "https://api.mangabaka.dev/v1"):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
        self._timeout = request_timeout
        self._delete_source = delete_source
+        self._matches_cache = matches_cache
+        self._api_base_url = api_base_url.rstrip("/")

        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")

+    def build_matches_only(self) -> dict:
+        """
+        Walks every series under the Suwayomi root and resolves each one
+        to a MangaBaka match — nothing else.
+
+        For every series:
+          - Reads the first chapter's ComicInfo.xml to obtain the canonical
+            Series name (falls back to the folder name).
+          - Cleans the name (strips source labels) the same way the real
+            move pipeline does.
+          - If the title is already in the matches cache, skips it.
+          - Otherwise issues a MangaBaka search and adds the top hit to
+            the cache (which is persisted to disk immediately).
+
+        Returns the full cache contents as a Python dict.
+        """
+        if self._matches_cache is None:
+            raise RuntimeError(
+                "build_matches_only requires a MatchesCache instance")
+
+        search_url = f"{self._api_base_url}/series/search"
+
+        for source_dir in sorted(self._src.iterdir()):
+            if not source_dir.is_dir():
+                continue
+            for manga_dir in sorted(source_dir.iterdir()):
+                if not manga_dir.is_dir():
+                    continue
+
+                raw_series = manga_dir.name
+                for chapter_dir in sorted(manga_dir.iterdir(),
+                                          key=lambda p: _chapter_sort_key(p.name)):
+                    if chapter_dir.is_dir():
+                        fields = _read_suwayomi_fields(chapter_dir)
+                        if fields.get("Series"):
+                            raw_series = fields["Series"]
+                            break
+
+                builder_title = _clean_suwayomi_title(raw_series)
+
+                if self._matches_cache.get(builder_title):
+                    print(f"[matches] {builder_title} — cached")
+                    continue
+
+                print(f"[matches] {builder_title} — searching")
+                try:
+                    resp = self._session.get(
+                        search_url,
+                        params={"q": builder_title, "page": 1, "limit": 1},
+                        timeout=self._timeout)
+                    resp.raise_for_status()
+                    data = resp.json().get("data") or []
+                    if not data:
+                        print(f"  [warn] no MangaBaka match for {builder_title!r}")
+                        continue
+                    series = data[0]
+                    self._matches_cache.add(
+                        builder_title,
+                        mangabaka_id=series.get("id"),
+                        mangabaka_name=series.get("title") or "",
+                        image_url=_pick_cover_url(series.get("cover")),
+                    )
+                except Exception as exc:
+                    print(f"  [warn] search failed for {builder_title!r}: {exc}")
+
+        return self._matches_cache.all()
+
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
        # One builder per series — metadata fetched once, reused for all chapters.
        builder = ComicInfoBuilder(
            builder_title, chapter=1,
+            api_base_url=self._api_base_url,
            language=self._language,
            request_timeout=self._timeout,
            session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
            works_resolver=self._works_resolver,
            mal_resolver=self._mal,
            al_resolver=self._al,
+            matches_cache=self._matches_cache,
        )

        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.