manga matching and WebApp
This commit is contained in:
+76
-2
@@ -51,12 +51,13 @@ from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from ComicInfoBuilder import ComicInfoBuilder
|
||||
from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
|
||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||
from MALResolver import MALResolver
|
||||
from AniListResolver import AniListResolver
|
||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||
from MatchesCache import MatchesCache
|
||||
|
||||
|
||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
||||
@@ -287,12 +288,16 @@ class SuwayomiMover:
|
||||
kavita_api_key: "str | None" = None,
|
||||
language: str = "en",
|
||||
request_timeout: int = 30,
|
||||
delete_source: bool = True):
|
||||
delete_source: bool = True,
|
||||
matches_cache: "MatchesCache | None" = None,
|
||||
api_base_url: str = "https://api.mangabaka.dev/v1"):
|
||||
self._src = Path(suwayomi_path)
|
||||
self._dst = Path(kavita_path)
|
||||
self._language = language
|
||||
self._timeout = request_timeout
|
||||
self._delete_source = delete_source
|
||||
self._matches_cache = matches_cache
|
||||
self._api_base_url = api_base_url.rstrip("/")
|
||||
|
||||
# Shared HTTP session and resolvers — reused across all series/chapters
|
||||
# to maximise cache hits and minimise API round-trips.
|
||||
@@ -357,6 +362,73 @@ class SuwayomiMover:
|
||||
raise FileNotFoundError(
|
||||
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
||||
|
||||
def build_matches_only(self) -> dict:
|
||||
"""
|
||||
Walks every series under the Suwayomi root and resolves each one
|
||||
to a MangaBaka match — nothing else.
|
||||
|
||||
For every series:
|
||||
- Reads the first chapter's ComicInfo.xml to obtain the canonical
|
||||
Series name (falls back to the folder name).
|
||||
- Cleans the name (strips source labels) the same way the real
|
||||
move pipeline does.
|
||||
- If the title is already in the matches cache, skips it.
|
||||
- Otherwise issues a MangaBaka search and adds the top hit to
|
||||
the cache (which is persisted to disk immediately).
|
||||
|
||||
Returns the full cache contents as a Python dict.
|
||||
"""
|
||||
if self._matches_cache is None:
|
||||
raise RuntimeError(
|
||||
"build_matches_only requires a MatchesCache instance")
|
||||
|
||||
search_url = f"{self._api_base_url}/series/search"
|
||||
|
||||
for source_dir in sorted(self._src.iterdir()):
|
||||
if not source_dir.is_dir():
|
||||
continue
|
||||
for manga_dir in sorted(source_dir.iterdir()):
|
||||
if not manga_dir.is_dir():
|
||||
continue
|
||||
|
||||
raw_series = manga_dir.name
|
||||
for chapter_dir in sorted(manga_dir.iterdir(),
|
||||
key=lambda p: _chapter_sort_key(p.name)):
|
||||
if chapter_dir.is_dir():
|
||||
fields = _read_suwayomi_fields(chapter_dir)
|
||||
if fields.get("Series"):
|
||||
raw_series = fields["Series"]
|
||||
break
|
||||
|
||||
builder_title = _clean_suwayomi_title(raw_series)
|
||||
|
||||
if self._matches_cache.get(builder_title):
|
||||
print(f"[matches] {builder_title} — cached")
|
||||
continue
|
||||
|
||||
print(f"[matches] {builder_title} — searching")
|
||||
try:
|
||||
resp = self._session.get(
|
||||
search_url,
|
||||
params={"q": builder_title, "page": 1, "limit": 1},
|
||||
timeout=self._timeout)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data") or []
|
||||
if not data:
|
||||
print(f" [warn] no MangaBaka match for {builder_title!r}")
|
||||
continue
|
||||
series = data[0]
|
||||
self._matches_cache.add(
|
||||
builder_title,
|
||||
mangabaka_id=series.get("id"),
|
||||
mangabaka_name=series.get("title") or "",
|
||||
image_url=_pick_cover_url(series.get("cover")),
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f" [warn] search failed for {builder_title!r}: {exc}")
|
||||
|
||||
return self._matches_cache.all()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: series
|
||||
# ------------------------------------------------------------------
|
||||
@@ -396,6 +468,7 @@ class SuwayomiMover:
|
||||
# One builder per series — metadata fetched once, reused for all chapters.
|
||||
builder = ComicInfoBuilder(
|
||||
builder_title, chapter=1,
|
||||
api_base_url=self._api_base_url,
|
||||
language=self._language,
|
||||
request_timeout=self._timeout,
|
||||
session=self._session,
|
||||
@@ -403,6 +476,7 @@ class SuwayomiMover:
|
||||
works_resolver=self._works_resolver,
|
||||
mal_resolver=self._mal,
|
||||
al_resolver=self._al,
|
||||
matches_cache=self._matches_cache,
|
||||
)
|
||||
|
||||
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
||||
|
||||
Reference in New Issue
Block a user