manga matching and WebApp
Build and Deploy / build (push) Successful in 32s
Build and Deploy / deploy (push) Successful in 25s

This commit is contained in:
2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
+76 -2
View File
@@ -51,12 +51,13 @@ from pathlib import Path
import requests
from ComicInfoBuilder import ComicInfoBuilder
from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
kavita_api_key: "str | None" = None,
language: str = "en",
request_timeout: int = 30,
delete_source: bool = True):
delete_source: bool = True,
matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1"):
self._src = Path(suwayomi_path)
self._dst = Path(kavita_path)
self._language = language
self._timeout = request_timeout
self._delete_source = delete_source
self._matches_cache = matches_cache
self._api_base_url = api_base_url.rstrip("/")
# Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
def build_matches_only(self) -> dict:
"""
Walks every series under the Suwayomi root and resolves each one
to a MangaBaka match — nothing else.
For every series:
- Reads the first chapter's ComicInfo.xml to obtain the canonical
Series name (falls back to the folder name).
- Cleans the name (strips source labels) the same way the real
move pipeline does.
- If the title is already in the matches cache, skips it.
- Otherwise issues a MangaBaka search and adds the top hit to
the cache (which is persisted to disk immediately).
Returns the full cache contents as a Python dict.
"""
if self._matches_cache is None:
raise RuntimeError(
"build_matches_only requires a MatchesCache instance")
search_url = f"{self._api_base_url}/series/search"
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
continue
raw_series = manga_dir.name
for chapter_dir in sorted(manga_dir.iterdir(),
key=lambda p: _chapter_sort_key(p.name)):
if chapter_dir.is_dir():
fields = _read_suwayomi_fields(chapter_dir)
if fields.get("Series"):
raw_series = fields["Series"]
break
builder_title = _clean_suwayomi_title(raw_series)
if self._matches_cache.get(builder_title):
print(f"[matches] {builder_title} — cached")
continue
print(f"[matches] {builder_title} — searching")
try:
resp = self._session.get(
search_url,
params={"q": builder_title, "page": 1, "limit": 1},
timeout=self._timeout)
resp.raise_for_status()
data = resp.json().get("data") or []
if not data:
print(f" [warn] no MangaBaka match for {builder_title!r}")
continue
series = data[0]
self._matches_cache.add(
builder_title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=_pick_cover_url(series.get("cover")),
)
except Exception as exc:
print(f" [warn] search failed for {builder_title!r}: {exc}")
return self._matches_cache.all()
# ------------------------------------------------------------------
# Internal: series
# ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
# One builder per series — metadata fetched once, reused for all chapters.
builder = ComicInfoBuilder(
builder_title, chapter=1,
api_base_url=self._api_base_url,
language=self._language,
request_timeout=self._timeout,
session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
works_resolver=self._works_resolver,
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
)
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.