cleanup

2026-06-11 21:31:20 +02:00
parent 4996026b91
commit 8a44b85a48
9 changed files with 276 additions and 153 deletions
@@ -17,6 +17,8 @@ services:
      # (local time, see TZ)
      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
      UPDATER_LOG:      "${UPDATER_LOG:-/config/volume_updater.log}"
+      # Persistent cover cache (empty = temp dir, deleted on container stop)
+      COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
      # Timezone for the cron schedule — without this 19:00 means 19:00 UTC
      TZ:               "${TZ:-Europe/Berlin}"
    ports:
@@ -32,12 +32,13 @@ Environment variables
                        default "0 19 * * 1,4" = 19:00 every Mon + Thu
                        (local time — set TZ inside the container!)
    UPDATER_LOG         default /config/volume_updater.log
+    COVER_CACHE_PATH    directory for the persistent cover cache;
+                        empty (default) = temporary cache, deleted on exit
 """

 from __future__ import annotations

 import os
-import signal
 import sys
 from pathlib import Path

@@ -94,6 +95,7 @@ def main() -> int:
    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
    updater_log      = _env_str("UPDATER_LOG", "/config/volume_updater.log")
+    cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None

    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
    print(f"[main] kavita    = {kavita_path}",    flush=True)
@@ -114,6 +116,7 @@ def main() -> int:
        request_timeout=request_timeout,
        delete_source=delete_source,
        matches_cache=matches_cache,
+        cover_cache_dir=cover_cache_path,
    )

    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
@@ -130,6 +133,7 @@ def main() -> int:
                request_timeout=request_timeout,
                log_path=updater_log,
                schedule=updater_schedule,
+                cover_cache_dir=cover_cache_path,
            )
            updater.start()
        except ValueError as exc:
@@ -37,7 +37,6 @@ Data source notes

 from __future__ import annotations

-import difflib
 import re
 import xml.etree.ElementTree as ET
 from pathlib import Path
@@ -50,6 +49,7 @@ from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from CoverCache import CoverCache

 try:
    from PIL import Image
@@ -179,7 +179,8 @@ class ComicInfoBuilder:
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None,
                 al_resolver: "AniListResolver | None" = None,
-                 matches_cache: "MatchesCache | None" = None):
+                 matches_cache: "MatchesCache | None" = None,
+                 cover_cache: "CoverCache | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")

@@ -210,6 +211,7 @@ class ComicInfoBuilder:
        self._al_resolver = al_resolver or AniListResolver(
            request_timeout=request_timeout)
        self._matches_cache = matches_cache
+        self._cover_cache = cover_cache or _default_cover_cache()

        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
@@ -580,11 +582,13 @@ class ComicInfoBuilder:
    # ======================================================================
    def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
        """
-        Downloads the cover for the current chapter/volume.
+        Fetches the cover for the current chapter/volume and writes it into
+        `folder`.

-        If a volume is known and a volume-specific cover exists in MangaBaka
-        works, that cover is used.  Otherwise the series default cover is
-        downloaded (raw variant preferred).
+        If a volume is known and a volume-specific cover exists in MangaBaka,
+        that cover is used; otherwise the series default cover.  The image
+        itself comes from the CoverCache, so a cover shared by many chapters
+        is downloaded only once.
        """
        md = self._get_metadata()
        volume = self._determine_volume()
@@ -602,18 +606,13 @@ class ComicInfoBuilder:
        if not cover_url:
            cover_url = _pick_cover_url(md.get("cover"))

-        if not cover_url:
+        fetched = self._cover_cache.get(cover_url) if cover_url else None
+        if not fetched:
            return None

-        try:
-            resp = self._session.get(cover_url, timeout=self.request_timeout)
-            resp.raise_for_status()
-        except requests.RequestException:
-            return None
-
-        ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
+        data, ext = fetched
        target = folder / f"{cover_filename}{ext}"
-        target.write_bytes(resp.content)
+        target.write_bytes(data)
        return target

    # ======================================================================
@@ -656,6 +655,41 @@ class ComicInfoBuilder:
        "manhua": ("zh-latn",),
    }

+    @staticmethod
+    def _pick_best_title(titles, language_codes: tuple,
+                         prefer_trait: "str | None" = None) -> "str | None":
+        """
+        Picks the highest-scoring entry from a MangaBaka `titles` list for
+        any of the given language codes.
+
+        Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
+        (+1); first seen wins on ties.  Returns None when no entry matches.
+        """
+        if not isinstance(titles, list):
+            return None
+        best_score = -1
+        best_title: "str | None" = None
+        for entry in titles:
+            if not isinstance(entry, dict):
+                continue
+            lang = (entry.get("language") or entry.get("lang") or "").lower()
+            if lang not in language_codes:
+                continue
+            title = entry.get("title")
+            if not title:
+                continue
+            traits = entry.get("traits") or []
+            score = 0
+            if prefer_trait and prefer_trait in traits:
+                score += 4
+            if "official" in traits:
+                score += 2
+            if entry.get("is_primary"):
+                score += 1
+            if score > best_score:
+                best_score, best_title = score, title
+        return best_title
+
    @classmethod
    def _romanized_for_native(cls, md: dict) -> "str | None":
        """
@@ -686,30 +720,7 @@ class ComicInfoBuilder:
            return None

        titles = md.get("titles") or md.get("alt_titles") or []
-        if not isinstance(titles, list):
-            return None
-
-        best_score = -1
-        best_title: "str | None" = None
-        for entry in titles:
-            if not isinstance(entry, dict):
-                continue
-            lang = (entry.get("language") or entry.get("lang") or "").lower()
-            if lang not in langs:
-                continue
-            title = entry.get("title")
-            if not title:
-                continue
-            traits = entry.get("traits") or []
-            score = 0
-            if "official" in traits:
-                score += 2
-            if entry.get("is_primary"):
-                score += 1
-            if score > best_score:
-                best_score = score
-                best_title = title
-        return best_title
+        return cls._pick_best_title(titles, langs)

    def _get_sort_title(self, md: dict) -> "str | None":
        """
@@ -745,31 +756,7 @@ class ComicInfoBuilder:

        def pick(language_codes: tuple, prefer_trait: "str | None" = None
                 ) -> "str | None":
-            """Picks the best title entry for any of the given language codes."""
-            if not isinstance(titles, list):
-                return None
-            best_score = -1
-            best_title: "str | None" = None
-            for entry in titles:
-                if not isinstance(entry, dict):
-                    continue
-                lang = (entry.get("language") or entry.get("lang") or "").lower()
-                if lang not in language_codes:
-                    continue
-                title = entry.get("title")
-                if not title:
-                    continue
-                traits = entry.get("traits") or []
-                score = 0
-                if prefer_trait and prefer_trait in traits:
-                    score += 4
-                if "official" in traits:
-                    score += 2
-                if entry.get("is_primary"):
-                    score += 1
-                if score > best_score:
-                    best_score, best_title = score, title
-            return best_title
+            return self._pick_best_title(titles, language_codes, prefer_trait)

        result: dict[str, str] = {}

@@ -1080,6 +1067,18 @@ class ComicInfoBuilder:
 # generic image-block picker; _pick_cover_url is kept for backward compat.
 _pick_cover_url = _pick_image_url

+# Shared fallback CoverCache for builders constructed without an explicit
+# one (temporary directory, removed at process exit).  Created lazily so
+# importing this module never touches the filesystem.
+_shared_cover_cache: "CoverCache | None" = None
+
+
+def _default_cover_cache() -> CoverCache:
+    global _shared_cover_cache
+    if _shared_cover_cache is None:
+        _shared_cover_cache = CoverCache()
+    return _shared_cover_cache
+

 def _pick_thumbnail_url(cover) -> "str | None":
    """
@@ -0,0 +1,136 @@
+"""
+cover_cache.py
+==============
+
+Disk-backed cache for downloaded cover images, keyed by URL.
+
+Why
+---
+The mover packs every chapter of a series individually, and each chapter
+needs a cover image.  Without caching, the same multi-megabyte cover is
+downloaded once per chapter (20-chapter volume = 20 identical downloads).
+This cache turns that into a single download per unique URL.
+
+Persistence
+-----------
+* ``cache_dir`` given     -> covers persist across runs in that directory.
+* ``cache_dir`` omitted   -> a temporary directory is used and removed
+                             automatically when the process exits.
+
+Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
+from the URL / Content-Type at download time so it can be reused when
+writing the cover into a chapter folder.
+
+Thread safety: downloads are serialised per cache instance, so concurrent
+mover / updater threads never fetch the same URL twice.
+
+Dependencies
+------------
+    requests    ->  pip install requests
+"""
+
+from __future__ import annotations
+
+import atexit
+import hashlib
+import shutil
+import tempfile
+import threading
+from pathlib import Path
+
+import requests
+
+
+class CoverCache:
+    """
+    URL-keyed image cache on disk.
+
+    Parameters
+    ----------
+    cache_dir       : Directory for cached covers.  None -> temporary
+                      directory, deleted automatically at process exit.
+    session         : Optional shared requests.Session for downloads.
+    request_timeout : HTTP timeout in seconds.
+    """
+
+    def __init__(self, cache_dir=None, *,
+                 session: "requests.Session | None" = None,
+                 request_timeout: int = 30):
+        self._persistent = cache_dir is not None
+        if self._persistent:
+            self._dir = Path(cache_dir)
+            self._dir.mkdir(parents=True, exist_ok=True)
+        else:
+            self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
+            atexit.register(self.close)
+
+        self._session = session or requests.Session()
+        self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
+        self._timeout = request_timeout
+        self._lock = threading.Lock()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def get(self, url: str) -> "tuple[bytes, str] | None":
+        """
+        Returns ``(image_bytes, extension)`` for the URL — from cache when
+        present, downloading (and caching) otherwise.  Returns None when
+        the URL is empty or the download fails.
+        """
+        if not url:
+            return None
+
+        with self._lock:
+            cached = self._find_cached(url)
+            if cached is not None:
+                try:
+                    return cached.read_bytes(), cached.suffix
+                except OSError:
+                    pass  # unreadable cache file -> re-download
+
+            return self._download(url)
+
+    def clear(self) -> None:
+        """Removes all cached covers (the directory itself is kept)."""
+        with self._lock:
+            for f in self._dir.glob("*"):
+                if f.is_file():
+                    f.unlink(missing_ok=True)
+
+    def close(self) -> None:
+        """Deletes the cache directory when it is non-persistent."""
+        if not self._persistent:
+            shutil.rmtree(self._dir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _key(url: str) -> str:
+        return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
+
+    def _find_cached(self, url: str) -> "Path | None":
+        matches = list(self._dir.glob(self._key(url) + ".*"))
+        return matches[0] if matches else None
+
+    def _download(self, url: str) -> "tuple[bytes, str] | None":
+        try:
+            resp = self._session.get(url, timeout=self._timeout)
+            resp.raise_for_status()
+        except requests.RequestException:
+            return None
+
+        # Local import avoids a circular module dependency:
+        # ComicInfoBuilder imports CoverCache at module level.
+        from ComicInfoBuilder import _guess_extension
+        ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
+
+        target = self._dir / f"{self._key(url)}{ext}"
+        try:
+            tmp = target.with_suffix(target.suffix + ".tmp")
+            tmp.write_bytes(resp.content)
+            tmp.replace(target)
+        except OSError:
+            pass  # cache write failure is non-fatal — still return the bytes
+        return resp.content, ext
@@ -52,7 +52,7 @@ from pathlib import Path

 import requests

-from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
+from ComicInfoBuilder import ComicInfoBuilder, _IMAGE_EXTS
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
@@ -62,6 +62,7 @@ from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
                           _sanitize_dirname, _normalise_volume_value)
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from CronSchedule import CronSchedule
+from CoverCache import CoverCache

 try:
    from PIL import Image
@@ -133,6 +134,8 @@ class KavitaVolumeCoverUpdater:
                       e.g. "0 19 * * 1,4" = 19:00 every Monday and
                       Thursday.  Evaluated in local time — set the TZ env
                       var inside Docker.  Default: "0 19 * * 1,4".
+    cover_cache_dir  : Directory for the persistent cover cache.  None ->
+                       temporary cache, deleted at process exit.
    """

    def __init__(self,
@@ -143,7 +146,8 @@ class KavitaVolumeCoverUpdater:
                 request_timeout: int = 30,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 log_path=None,
-                 schedule: str = "0 19 * * 1,4"):
+                 schedule: str = "0 19 * * 1,4",
+                 cover_cache_dir=None):
        self._dst = Path(kavita_path)
        self._matches_cache = matches_cache
        self._language = language
@@ -165,6 +169,8 @@ class KavitaVolumeCoverUpdater:
        self._works_resolver = MangaBakaWorksResolver(
            api_base_url=api_base_url,
            request_timeout=request_timeout, session=session)
+        self._cover_cache = CoverCache(
+            cover_cache_dir, session=session, request_timeout=request_timeout)

        self._stop = threading.Event()
        self._thread: "threading.Thread | None" = None
@@ -225,6 +231,12 @@ class KavitaVolumeCoverUpdater:
            print(f"[updater] kavita path missing: {self._dst}", flush=True)
            return summary

+        # The whole point of a scan is detecting volume assignments added
+        # since the previous run — start from fresh API data, not the
+        # process-lifetime resolver caches.
+        self._vol_resolver.clear_cache()
+        self._works_resolver.clear_cache()
+
        for series_dir in sorted(self._dst.iterdir()):
            if self._stop.is_set():
                break
@@ -277,6 +289,7 @@ class KavitaVolumeCoverUpdater:
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
+            cover_cache=self._cover_cache,
        )
        md = builder.fetch_metadata()
        series_id = str(md.get("id") or "")
@@ -367,7 +380,8 @@ class KavitaVolumeCoverUpdater:
    # ------------------------------------------------------------------
    def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
        """
-        Downloads the MangaBaka volume cover.
+        Fetches the MangaBaka volume cover via the CoverCache (one download
+        per unique URL, even across chapters sharing a volume).
        Returns ("000<ext>", bytes) or None when no cover is available.
        """
        try:
@@ -376,13 +390,11 @@ class KavitaVolumeCoverUpdater:
            url = None
        if not url:
            return None
-        try:
-            resp = self._session.get(url, timeout=self._timeout)
-            resp.raise_for_status()
-        except requests.RequestException:
+        fetched = self._cover_cache.get(url)
+        if not fetched:
            return None
-        ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
-        return (f"000{ext}", resp.content)
+        data, ext = fetched
+        return (f"000{ext}", data)

    # ------------------------------------------------------------------
    # Archive update (single read + single write per archive)
@@ -119,26 +119,18 @@ class MangaBakaWorksResolver:
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
-    def get_works(self, series_id: str) -> list[dict]:
+    def _fetch_all_pages(self, endpoint: str) -> list[dict]:
        """
-        Returns volume-level works for a series, filtered to those that have
-        a usable cover image.  Results are cached per series.
-
-        Pages through the API (limit=50) until the response returns an empty
-        page, collecting all works before applying the cover filter.
+        Pages through a MangaBaka list endpoint (limit=50 per page) and
+        returns all collected `data` items.  Network errors end the
+        pagination early; items fetched so far are returned.
        """
-        if not series_id:
-            return []
-
-        if series_id in self._cache:
-            return self._cache[series_id]
-
-        all_works: list[dict] = []
+        items: list[dict] = []
        page = 1
        try:
            while True:
                resp = self._session.get(
-                    f"{self.api_base_url}/series/{series_id}/works",
+                    f"{self.api_base_url}/series/{endpoint}",
                    params={"limit": 50, "page": page},
                    timeout=self.request_timeout,
                )
@@ -146,17 +138,35 @@ class MangaBakaWorksResolver:
                page_data = resp.json().get("data") or []
                if not page_data:
                    break
-                all_works.extend(page_data)
+                items.extend(page_data)
                if len(page_data) < 50:
                    break
                page += 1
        except requests.RequestException:
-            if not all_works:
-                return []
+            pass
+        return items
+
+    def get_works(self, series_id: str) -> list[dict]:
+        """
+        Returns volume-level works for a series, filtered to those that have
+        a usable cover image.
+
+        Non-empty results are cached per series; empty results are not, so
+        works added on MangaBaka later become visible without restarting
+        the (long-running) process.
+        """
+        if not series_id:
+            return []
+
+        if series_id in self._cache:
+            return self._cache[series_id]
+
+        all_works = self._fetch_all_pages(f"{series_id}/works")

        # Discard works that carry no usable cover
        works_with_cover = [w for w in all_works if w.get("images")]
-        self._cache[series_id] = works_with_cover
+        if works_with_cover:
+            self._cache[series_id] = works_with_cover
        return works_with_cover

    def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
@@ -190,25 +200,7 @@ class MangaBakaWorksResolver:
        if series_id in self._images_cache:
            return self._images_cache[series_id]

-        raw_items: list[dict] = []
-        page = 1
-        try:
-            while True:
-                resp = self._session.get(
-                    f"{self.api_base_url}/series/{series_id}/images",
-                    params={"limit": 50, "page": page},
-                    timeout=self.request_timeout,
-                )
-                resp.raise_for_status()
-                page_data = resp.json().get("data") or []
-                if not page_data:
-                    break
-                raw_items.extend(page_data)
-                if len(page_data) < 50:
-                    break
-                page += 1
-        except requests.RequestException:
-            pass
+        raw_items = self._fetch_all_pages(f"{series_id}/images")

        # Group by normalised volume index; collect all languages per volume.
        by_volume: dict[str, dict[str, str]] = {}  # norm_vol -> {lang: url}
@@ -236,7 +228,10 @@ class MangaBakaWorksResolver:
            if url:
                result[norm] = url

-        self._images_cache[series_id] = result
+        # Empty results are not cached — covers added on MangaBaka later
+        # become visible without restarting the long-running process.
+        if result:
+            self._images_cache[series_id] = result
        return result

    def get_cover_for_volume_from_images(self, series_id: str,
@@ -43,7 +43,6 @@ Dependencies
 from __future__ import annotations

 import difflib
-import re

 import requests

@@ -29,7 +29,6 @@ from __future__ import annotations

 import queue
 import threading
-import time
 from datetime import datetime
 from pathlib import Path

@@ -52,7 +52,8 @@ from pathlib import Path

 import requests

-from ComicInfoBuilder import (ComicInfoBuilder, _pick_cover_url, _pick_thumbnail_url, _SEARCH_TYPES)
+from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
+                              _SEARCH_TYPES, _IMAGE_EXTS, _natural_key)
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
@@ -60,9 +61,9 @@ from AniListResolver import AniListResolver
 from KavitaPersonUpdater import KavitaPersonUpdater
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from CoverCache import CoverCache


-_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
 _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')

 # JSON file written into each Kavita series folder, listing every chapter
@@ -133,11 +134,6 @@ _SOURCE_LABEL_RE = re.compile(
 _WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')


-def _natural_key(name: str) -> list:
-    return [int(p) if p.isdigit() else p.lower()
-            for p in re.split(r"(\d+)", name)]
-
-
 def _sanitize_dirname(name: str) -> str:
    """
    Makes a string safe to use as a Windows (or SMB) directory name.
@@ -192,34 +188,6 @@ def _clean_suwayomi_title(title: str) -> str:
    return _SOURCE_LABEL_RE.sub("", title).strip()


-def _mal_id_from_metadata(md: dict) -> "int | None":
-    """Extracts the MAL ID from a MangaBaka series dict's source map."""
-    for raw_key, info in (md.get("source") or {}).items():
-        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
-            if isinstance(info, dict):
-                mal_id = info.get("id")
-                if mal_id is not None:
-                    try:
-                        return int(mal_id)
-                    except (TypeError, ValueError):
-                        pass
-    return None
-
-
-def _al_id_from_metadata(md: dict) -> "int | None":
-    """Extracts the AniList ID from a MangaBaka series dict's source map."""
-    for raw_key, info in (md.get("source") or {}).items():
-        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
-            if isinstance(info, dict):
-                al_id = info.get("id")
-                if al_id is not None:
-                    try:
-                        return int(al_id)
-                    except (TypeError, ValueError):
-                        pass
-    return None
-
-
 def _chapter_image_size(chapter_dir: Path) -> int:
    """Returns the total file size of all images in a chapter folder."""
    return sum(
@@ -336,6 +304,8 @@ class SuwayomiMover:
    language        : ComicInfo LanguageISO and SeriesSort language ("en").
    request_timeout : HTTP timeout in seconds for all API / image requests.
    delete_source   : Remove the source chapter folder after successful pack.
+    cover_cache_dir : Directory for the persistent cover cache.  None ->
+                      temporary cache, deleted at process exit.
    """

    def __init__(self,
@@ -348,7 +318,8 @@ class SuwayomiMover:
                 request_timeout: int = 30,
                 delete_source: bool = True,
                 matches_cache: "MatchesCache | None" = None,
-                 api_base_url: str = "https://api.mangabaka.dev/v1"):
+                 api_base_url: str = "https://api.mangabaka.dev/v1",
+                 cover_cache_dir=None):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
@@ -371,6 +342,8 @@ class SuwayomiMover:
            request_timeout=request_timeout, session=session)
        self._works_resolver = MangaBakaWorksResolver(
            request_timeout=request_timeout, session=session)
+        self._cover_cache = CoverCache(
+            cover_cache_dir, session=session, request_timeout=request_timeout)

        self._person_updater: "KavitaPersonUpdater | None" = None
        if kavita_base_url and kavita_api_key:
@@ -550,6 +523,7 @@ class SuwayomiMover:
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
+            cover_cache=self._cover_cache,
        )

        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.
@@ -604,9 +578,9 @@ class SuwayomiMover:
        # AniList is used as fallback when MAL returns no characters/staff.
        person_result: "dict | None" = None
        if self._person_updater:
-            mal_id = (_mal_id_from_metadata(md) if md else None
+            mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
                      or self._mal.find_mal_id(builder_title))
-            al_id  = _al_id_from_metadata(md) if md else None
+            al_id  = ComicInfoBuilder._al_id_from_source(md) if md else None
            if mal_id or al_id:
                try:
                    person_result = self._person_updater.update_for_manga(
@@ -661,11 +635,14 @@ class SuwayomiMover:
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
+    import os
+
    # Local (no-Docker) smoke test.  Adjust paths to your environment.
+    # Set the KAVITA_API_KEY env var — never commit API keys to the repo.
    SUWAYOMI_PATH = r"M:\config\downloads\mangas"
    KAVITA_PATH   = r"\\192.168.2.2\root\ServerData\Kavita\test"
    KAVITA_URL    = "http://192.168.2.2:5000"
-    KAVITA_KEY    = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
+    KAVITA_KEY    = os.environ.get("KAVITA_API_KEY", "")

    # matches.json lives next to this script during local testing.
    MATCHES_PATH  = Path(__file__).resolve().parent.parent / "matches.json"