time measurement

merged ln metadata into manga mover
2026-06-15 11:23:37 +02:00 · 2026-06-15 11:23:20 +02:00 · 2026-06-14 10:47:47 +02:00 · 2026-06-11 21:31:20 +02:00 · 2026-06-11 20:02:06 +02:00 · 2026-06-11 19:57:11 +02:00
32 changed files with 5606 additions and 899 deletions
@@ -0,0 +1,23 @@
 # Shared
 KAVITA_URL=http://192.168.1.100:5000
 KAVITA_API_KEY=your-api-key-here
 LANGUAGE=en
 TZ=Europe/Berlin
 # Manga container (manga-mover-and-metadata-collector)
 HOST_SUWAYOMI_PATH=/path/to/suwayomi/downloads
 HOST_KAVITA_PATH=/path/to/kavita/library
 HOST_MANGA_CONFIG_PATH=/path/to/manga-config
 MANGA_WEB_PORT=8080
 SETTLE_SECONDS=600
 DELETE_SOURCE=true
 UPDATER_ENABLED=true
 UPDATER_SCHEDULE=0 19 * * 1,4
 COVER_CACHE_PATH=/config/covers
 PERF_PATH=/config/perf_stats.json
 # Light-novel container (kavita-lightnovel-metadata-fetcher)
 HOST_LN_CONFIG_PATH=/path/to/ln-config
 LN_WEB_PORT=8081
 LN_LIBRARY_IDS=3,5
@@ -5,6 +5,11 @@ on:
    branches:
      - master
 env:
  REGISTRY: gitea.johannesbot.de/johannesbot
  MANGA_IMAGE: manga-mover-and-metadata-collector
  LN_IMAGE: kavita-lightnovel-metadata-fetcher
 jobs:
  build:
    runs-on: ubuntu-latest
@@ -17,11 +22,16 @@ jobs:
          echo "${{ secrets.REGISTRY_PASSWORD }}" | \
          docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
-      - name: Build Image
+      - name: Build Manga Image
-        run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest .
+        run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest .
-      - name: Push Image
+      - name: Build LN Image
-        run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
+        run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest .
      - name: Push Images
        run: |
          docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest
          docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest
  deploy:
    needs: build
@@ -37,7 +47,7 @@ jobs:
          username: ${{ secrets.SSH_USER }}
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
-          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+          script: mkdir -p /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
      - name: Copy docker-compose via SCP
        uses: appleboy/scp-action@v0.1.7
@@ -47,7 +57,7 @@ jobs:
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
          source: "docker-compose.prod.yml"
-          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
+          target: "/home/${{ secrets.SSH_USER }}/kavita-metadata-collector"
      - name: Deploy via SSH
        uses: appleboy/ssh-action@v1.0.3
@@ -57,7 +67,7 @@ jobs:
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
          script: |
-            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+            cd /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
            mv docker-compose.prod.yml docker-compose.yml
            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
            sudo docker compose pull
@@ -0,0 +1,38 @@
 name: Build Release
 on:
  push:
    tags:
      - 'v*'
 env:
  REGISTRY: gitea.johannesbot.de/johannesbot
  MANGA_IMAGE: manga-mover-and-metadata-collector
  LN_IMAGE: kavita-lightnovel-metadata-fetcher
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Login to Gitea Registry
        run: |
          echo "${{ secrets.REGISTRY_PASSWORD }}" | \
          docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
      - name: Extract Tag
        id: tag
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
      - name: Build Manga Image
        run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
      - name: Build LN Image
        run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
      - name: Push Images
        run: |
          docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }}
          docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }}
@@ -1,8 +1,18 @@
 # One Dockerfile, two images: the build arg APP selects the entry point.
 #
 #   docker build --build-arg APP=manga -t .../manga-mover-and-metadata-collector .
 #   docker build --build-arg APP=ln    -t .../kavita-lightnovel-metadata-fetcher .
 #
 # Both variants share src/; the variant-specific code lives in
 # src/manga/ resp. src/ln/ and is selected by the entry point.
 FROM python:3.12-slim
 ARG APP=manga
 WORKDIR /app
-# System deps for Pillow (image dimensions); kept minimal.
+# System deps for Pillow (image dimensions, manga variant); kept minimal.
 RUN apt-get update \
 && apt-get install -y --no-install-recommends \
        libjpeg62-turbo \
@@ -12,14 +22,16 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY src/          /app/src/
-COPY main.py  /app/main.py
+COPY main_manga.py main_ln.py /app/
 ENV PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1
+    PYTHONDONTWRITEBYTECODE=1 \
    APP_VARIANT=${APP}
-# Mount points used by main.py defaults
+# /config is used by both variants; the manga variant additionally mounts
-VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
+# /mnt/suwayomi and /mnt/kavita (see docker-compose.prod.yml).
 VOLUME ["/config"]
 EXPOSE 8080
-CMD ["python", "/app/main.py"]
+CMD python /app/main_${APP_VARIANT}.py
@@ -1,5 +1,8 @@
 services:
-  manga-mover:
+  # ------------------------------------------------------------------
  # Manga: Suwayomi -> Kavita mover + metadata enrichment
  # ------------------------------------------------------------------
  manga-mover-and-metadata-collector:
    image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
    container_name: manga-mover-and-metadata-collector
    restart: unless-stopped
@@ -9,11 +12,41 @@ services:
      LANGUAGE:       "${LANGUAGE:-en}"
      SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
      DELETE_SOURCE:  "${DELETE_SOURCE:-true}"
-      MATCH_PATH:     "${MATCH_PATH:-/config/matches.json}"
+      MATCH_PATH:     "/config/matches.json"
-      WEB_PORT:       "${WEB_PORT:-8080}"
+      # Volume/cover back-fill updater
      UPDATER_ENABLED:  "${UPDATER_ENABLED:-true}"
      # Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
      # (local time, see TZ)
      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
      UPDATER_LOG:      "/config/volume_updater.log"
      # Persistent cover cache (empty = temp dir, deleted on container stop)
      COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
      # Per-step move timing stats (viewable at /perf); empty disables it
      PERF_PATH:        "${PERF_PATH:-/config/perf_stats.json}"
      # Timezone for the cron schedule — without this 19:00 means 19:00 UTC
      TZ:               "${TZ:-Europe/Berlin}"
    ports:
-      - "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
+      - "${MANGA_WEB_PORT:-8080}:8080"
    volumes:
      - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
      - "${HOST_KAVITA_PATH}:/mnt/kavita"
-      - "${HOST_CONFIG_PATH}:/config"
+      - "${HOST_MANGA_CONFIG_PATH}:/config"
  # ------------------------------------------------------------------
  # Light novels: Kavita metadata fetcher (HTTP only, no file mover)
  # ------------------------------------------------------------------
  kavita-lightnovel-metadata-fetcher:
    image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest
    container_name: kavita-lightnovel-metadata-fetcher
    restart: unless-stopped
    environment:
      KAVITA_URL:     "${KAVITA_URL}"
      KAVITA_API_KEY: "${KAVITA_API_KEY}"
      LIBRARY_IDS:    "${LN_LIBRARY_IDS}"
      LANGUAGE:       "${LANGUAGE:-en}"
      MATCH_PATH:     "/config/matches.json"
      TZ:             "${TZ:-Europe/Berlin}"
    ports:
      - "${LN_WEB_PORT:-8081}:8080"
    volumes:
      - "${HOST_LN_CONFIG_PATH}:/config"
@@ -0,0 +1,129 @@
 """
 main_ln.py
 ==========
 Container entry point for the **light-novel** variant (Kavita metadata
 fetcher).  The manga variant has its own entry point (main_manga.py);
 both share the modules in src/ and add their variant-specific code from
 src/ln/ resp. src/manga/.
 Reads configuration from environment variables, starts the orchestrator
 and exposes the Flask WebApp on WEB_HOST:WEB_PORT.  Everything happens
 through HTTP — there is no folder watcher and no file mover (Kavita is
 the source of truth for the library content; this service only writes
 metadata back to it).
 Environment variables
 ---------------------
  Required:
    KAVITA_URL          base URL of the Kavita server, e.g. http://kavita:5000
    KAVITA_API_KEY      Kavita API key (Settings -> User -> API key)
  Optional:
    LIBRARY_IDS         comma-separated default library ids (e.g. "3,5").
                        Empty = user picks in the WebUI each time.
    LANGUAGE            default "en"
    REQUEST_TIMEOUT     default 30
    MATCH_PATH          default /config/matches.json
    WEB_PORT            default 8080
    WEB_HOST            default 0.0.0.0
 """
 from __future__ import annotations
 import os
 import sys
 from pathlib import Path
 try:
    from dotenv import load_dotenv
    load_dotenv()
 except ImportError:
    pass
 # Shared code in src/, LN-specific code in src/ln/.  Modules are imported
 # by their plain names so src-internal imports resolve to the same module
 # objects (a `src.X` import would load everything twice).
 _BASE = Path(__file__).resolve().parent
 sys.path.insert(0, str(_BASE / "src"))
 sys.path.insert(0, str(_BASE / "src" / "ln"))
 from MatchesCache import MatchesCache                       # noqa: E402
 from LightNovelOrchestrator import LightNovelOrchestrator   # noqa: E402
 from MatchesWebApp import MatchesWebApp                     # noqa: E402
 def _env_str(name: str, default: "str | None" = None,
             required: bool = False) -> "str | None":
    value = os.environ.get(name, default)
    if required and not value:
        print(f"[main] missing required env var: {name}", flush=True)
        sys.exit(2)
    return value
 def _env_int(name: str, default: int) -> int:
    raw = os.environ.get(name)
    if raw is None or raw == "":
        return default
    try:
        return int(raw)
    except ValueError:
        print(f"[main] {name}={raw!r} is not a valid integer; "
              f"falling back to {default}", flush=True)
        return default
 def _env_int_list(name: str) -> list[int]:
    raw = os.environ.get(name) or ""
    out: list[int] = []
    for part in raw.split(","):
        part = part.strip()
        if not part:
            continue
        try:
            out.append(int(part))
        except ValueError:
            print(f"[main] {name}: ignoring non-integer value {part!r}",
                  flush=True)
    return out
 def main() -> int:
    kavita_url      = _env_str("KAVITA_URL",     required=True)
    kavita_api_key  = _env_str("KAVITA_API_KEY", required=True)
    language        = _env_str("LANGUAGE", "en") or "en"
    request_timeout = _env_int("REQUEST_TIMEOUT", 30)
    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
    web_port        = _env_int("WEB_PORT", 8080)
    library_ids     = _env_int_list("LIBRARY_IDS")
    print(f"[main] kavita url    = {kavita_url}",     flush=True)
    print(f"[main] language      = {language}",       flush=True)
    print(f"[main] match path    = {match_path}",     flush=True)
    print(f"[main] libraries     = {library_ids or '(picked in WebUI)'}",
          flush=True)
    print(f"[main] web           = {web_host}:{web_port}", flush=True)
    cache = MatchesCache(match_path)
    orchestrator = LightNovelOrchestrator(
        kavita_url=kavita_url,
        kavita_api_key=kavita_api_key,
        matches_cache=cache,
        language=language,
        request_timeout=request_timeout,
    )
    app = MatchesWebApp(
        cache, orchestrator=orchestrator,
        default_library_ids=library_ids,
        host=web_host, port=web_port,
    )
    app.start()
    app.wait()
    return 0
 if __name__ == "__main__":
    sys.exit(main())
@@ -1,10 +1,11 @@
 """
-main.py
+main_manga.py
-=======
+=============
-Container entry point.  Watches the mounted Suwayomi download directory
+Container entry point for the **manga** variant (Suwayomi -> Kavita mover
-and, after a quiet period, triggers SuwayomiMover (which also runs the
+plus metadata enrichment).  The light-novel variant has its own entry
-Kavita person sync for every processed series).
+point (main_ln.py); both share the modules in src/ and add their
 variant-specific code from src/manga/ resp. src/ln/.
 Mount points (Docker)
 ---------------------
@@ -15,7 +16,7 @@ Environment variables
 ---------------------
  Required:
    KAVITA_URL          base URL of the Kavita server, e.g. http://kavita:5000
-    KAVITA_API_KEY      Kavita API key (Settings → User → API key)
+    KAVITA_API_KEY      Kavita API key (Settings -> User -> API key)
  Optional:
    SUWAYOMI_PATH       default /mnt/suwayomi
@@ -27,22 +28,42 @@ Environment variables
    MATCH_PATH          default /config/matches.json
    WEB_PORT            default 8080  (Flask web UI for matches.json)
    WEB_HOST            default 0.0.0.0
    UPDATER_ENABLED     default true  (volume/cover back-fill cron)
    UPDATER_SCHEDULE    cron expression for the updater scans,
                        default "0 19 * * 1,4" = 19:00 every Mon + Thu
                        (local time — set TZ inside the container!)
    UPDATER_LOG         default /config/volume_updater.log
    COVER_CACHE_PATH    directory for the persistent cover cache;
                        empty (default) = temporary cache, deleted on exit
    PERF_PATH           JSON file for per-step move timing stats;
                        empty disables profiling. Default /config/perf_stats.json
 """
 from __future__ import annotations
 import os
 import signal
 import sys
 from pathlib import Path
-# Make src/ importable when running as `python main.py`.
+try:
-sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
+    from dotenv import load_dotenv
    load_dotenv()
 except ImportError:
    pass
-from src.SuwayomiMover import SuwayomiMover                       # noqa: E402
+# Shared code in src/, manga-specific code in src/manga/.  Modules are
-from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher       # noqa: E402
+# imported by their plain names so src-internal imports resolve to the
-from src.MatchesCache import MatchesCache                          # noqa: E402
+# same module objects (a `src.X` import would load everything twice).
-from src.MatchesWebApp import MatchesWebApp                        # noqa: E402
+_BASE = Path(__file__).resolve().parent
 sys.path.insert(0, str(_BASE / "src"))
 sys.path.insert(0, str(_BASE / "src" / "manga"))
 from SuwayomiMover import SuwayomiMover                        # noqa: E402
 from SuwayomiFolderWatcher import SuwayomiFolderWatcher        # noqa: E402,F401
 from MatchesCache import MatchesCache                          # noqa: E402
 from MatchesWebApp import MatchesWebApp                        # noqa: E402
 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater  # noqa: E402
 from PerfStats import PerfStats                                # noqa: E402
 def _env_str(name: str, default: "str | None" = None,
@@ -76,8 +97,8 @@ def _env_bool(name: str, default: bool) -> bool:
 def main() -> int:
    suwayomi_path   = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi")
    kavita_path     = _env_str("KAVITA_PATH",   "/mnt/kavita")
-    kavita_url      = _env_str("KAVITA_URL",     required=True)
+    kavita_url      = _env_str("KAVITA_URL",     "http://kavita:5000")
-    kavita_api_key  = _env_str("KAVITA_API_KEY", required=True)
+    kavita_api_key  = _env_str("KAVITA_API_KEY",  "")
    language        = _env_str("LANGUAGE", "en") or "en"
    settle_seconds  = _env_int("SETTLE_SECONDS",  600)
    request_timeout = _env_int("REQUEST_TIMEOUT",  30)
@@ -85,6 +106,11 @@ def main() -> int:
    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
    web_port        = _env_int("WEB_PORT", 8080)
    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
    updater_log      = _env_str("UPDATER_LOG", "/config/volume_updater.log")
    cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
    perf_path        = _env_str("PERF_PATH", "/config/perf_stats.json") or None
    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
    print(f"[main] kavita    = {kavita_path}",    flush=True)
@@ -96,6 +122,7 @@ def main() -> int:
    print(f"[main] web       = {web_host}:{web_port}", flush=True)
    matches_cache = MatchesCache(match_path)
    perf_stats = PerfStats(perf_path)
    mover = SuwayomiMover(
        suwayomi_path, kavita_path,
@@ -105,20 +132,34 @@ def main() -> int:
        request_timeout=request_timeout,
        delete_source=delete_source,
        matches_cache=matches_cache,
        cover_cache_dir=cover_cache_path,
        perf_stats=perf_stats,
    )
    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
-    web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
+    web_app = MatchesWebApp(matches_cache, mover=mover, perf_stats=perf_stats,
                            host=web_host, port=web_port)
    web_app.start()
-    # def shutdown(signum, _frame):
+    if updater_enabled:
-    #     print(f"[main] received signal {signum}", flush=True)
+        try:
-    #     watcher.stop()
+            updater = KavitaVolumeCoverUpdater(
-    #
+                kavita_path,
-    # signal.signal(signal.SIGTERM, shutdown)
+                matches_cache=matches_cache,
-    # signal.signal(signal.SIGINT,  shutdown)
+                language=language,
-    #
+                request_timeout=request_timeout,
                log_path=updater_log,
                schedule=updater_schedule,
                cover_cache_dir=cover_cache_path,
            )
            updater.start()
        except ValueError as exc:
            # Invalid cron expression — keep the service up, just without
            # the updater, and make the config error obvious in the logs.
            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
                  f"volume/cover updater DISABLED", flush=True)
    # watcher.start()
    # watcher.wait()   # blocks until stop() is called via a signal
    web_app.wait()     # keep process alive while the watcher is disabled
@@ -2,3 +2,4 @@ requests>=2.31
 Pillow>=10.0
 watchdog>=4.0
 Flask>=3.0
 python-dotenv>=1.0
@@ -32,27 +32,35 @@ Dependencies
 from __future__ import annotations
 import datetime
 import difflib
 import time
 import requests
 from MediaResolver import MediaResolver
 from TextUtils import best_similarity
 # --------------------------------------------------------------------------
 # GraphQL query strings
 # --------------------------------------------------------------------------
-_SEARCH_MANGA = """
+# AniList models both manga and light novels as type MANGA; the format
 # clause decides which of the two a search returns.  The placeholder is
 # substituted at construction time (see `media_format`).
 _SEARCH_MANGA_TEMPLATE = """
 query ($search: String) {
  Page(page: 1, perPage: 5) {
-    media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
+    media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
      id title { romaji english native } siteUrl
    }
  }
 }
 """
 _FORMAT_CLAUSES = {
    "manga": "format_not_in: [NOVEL]",
    "novel": "format_in: [NOVEL]",
 }
 _MANGA_STATS = """
 query ($id: Int) {
  Media(id: $id, type: MANGA) {
@@ -131,10 +139,24 @@ class AniListResolver(MediaResolver):
            cls._instance._initialized = False
        return cls._instance
-    def __init__(self, *, request_timeout: int = 30):
+    def __init__(self, *, request_timeout: int = 30,
                 media_format: str = "manga"):
        """
        media_format : "manga" (excludes novels) or "novel" (novels only).
                       Only the FIRST construction in the process sets it
                       (singleton); construct the resolver with the correct
                       format in the entry point / orchestrator.
        """
        if self._initialized:
            return
        if media_format not in _FORMAT_CLAUSES:
            raise ValueError(f"media_format must be one of "
                             f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
        self.media_format = media_format
        self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
            "__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
        self.request_timeout = request_timeout
        self._session = requests.Session()
@@ -178,7 +200,7 @@ class AniListResolver(MediaResolver):
            return self._id_cache[key]
        try:
-            data = self._gql(_SEARCH_MANGA, {"search": title})
+            data = self._gql(self._search_query, {"search": title})
            results = ((data.get("data") or {})
                       .get("Page", {})
                       .get("media") or [])
@@ -469,18 +491,11 @@ class AniListResolver(MediaResolver):
 def _score_title(query: str, entry: dict) -> float:
    """Returns the best title-similarity score for an AniList media entry."""
    title_obj = entry.get("title") or {}
-    candidates = [
+    return best_similarity(query, (
-        title_obj.get("romaji") or "",
+        title_obj.get("romaji"),
-        title_obj.get("english") or "",
+        title_obj.get("english"),
-        title_obj.get("native") or "",
+        title_obj.get("native"),
-    ]
+    ))
    best = 0.0
    q = query.lower()
    for t in candidates:
        if t:
            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
            best = max(best, ratio)
    return best
 # --------------------------------------------------------------------------
@@ -0,0 +1,148 @@
 """
 cover_cache.py
 ==============
 Disk-backed cache for downloaded cover images, keyed by URL.
 Why
 ---
 The mover packs every chapter of a series individually, and each chapter
 needs a cover image.  Without caching, the same multi-megabyte cover is
 downloaded once per chapter (20-chapter volume = 20 identical downloads).
 This cache turns that into a single download per unique URL.
 Persistence
 -----------
 * ``cache_dir`` given     -> covers persist across runs in that directory.
 * ``cache_dir`` omitted   -> a temporary directory is used and removed
                             automatically when the process exits.
 Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
 from the URL / Content-Type at download time so it can be reused when
 writing the cover into a chapter folder.
 Thread safety: downloads are serialised per cache instance, so concurrent
 mover / updater threads never fetch the same URL twice.
 Dependencies
 ------------
    requests    ->  pip install requests
 """
 from __future__ import annotations
 import atexit
 import hashlib
 import shutil
 import tempfile
 import threading
 from pathlib import Path
 import requests
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
 def _guess_extension(url: str, content_type: str) -> str:
    """Derives an image file extension from a URL or HTTP Content-Type."""
    url_ext = Path(url.split("?")[0]).suffix.lower()
    if url_ext in _IMAGE_EXTS:
        return url_ext
    ct = (content_type or "").lower()
    if "png"  in ct: return ".png"
    if "webp" in ct: return ".webp"
    if "gif"  in ct: return ".gif"
    return ".jpg"
 class CoverCache:
    """
    URL-keyed image cache on disk.
    Parameters
    ----------
    cache_dir       : Directory for cached covers.  None -> temporary
                      directory, deleted automatically at process exit.
    session         : Optional shared requests.Session for downloads.
    request_timeout : HTTP timeout in seconds.
    """
    def __init__(self, cache_dir=None, *,
                 session: "requests.Session | None" = None,
                 request_timeout: int = 30):
        self._persistent = cache_dir is not None
        if self._persistent:
            self._dir = Path(cache_dir)
            self._dir.mkdir(parents=True, exist_ok=True)
        else:
            self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
            atexit.register(self.close)
        self._session = session or requests.Session()
        self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
        self._timeout = request_timeout
        self._lock = threading.Lock()
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
    def get(self, url: str) -> "tuple[bytes, str] | None":
        """
        Returns ``(image_bytes, extension)`` for the URL — from cache when
        present, downloading (and caching) otherwise.  Returns None when
        the URL is empty or the download fails.
        """
        if not url:
            return None
        with self._lock:
            cached = self._find_cached(url)
            if cached is not None:
                try:
                    return cached.read_bytes(), cached.suffix
                except OSError:
                    pass  # unreadable cache file -> re-download
            return self._download(url)
    def clear(self) -> None:
        """Removes all cached covers (the directory itself is kept)."""
        with self._lock:
            for f in self._dir.glob("*"):
                if f.is_file():
                    f.unlink(missing_ok=True)
    def close(self) -> None:
        """Deletes the cache directory when it is non-persistent."""
        if not self._persistent:
            shutil.rmtree(self._dir, ignore_errors=True)
    # ------------------------------------------------------------------
    # Internal
    # ------------------------------------------------------------------
    @staticmethod
    def _key(url: str) -> str:
        return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
    def _find_cached(self, url: str) -> "Path | None":
        matches = list(self._dir.glob(self._key(url) + ".*"))
        return matches[0] if matches else None
    def _download(self, url: str) -> "tuple[bytes, str] | None":
        try:
            resp = self._session.get(url, timeout=self._timeout)
            resp.raise_for_status()
        except requests.RequestException:
            return None
        ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
        target = self._dir / f"{self._key(url)}{ext}"
        try:
            tmp = target.with_suffix(target.suffix + ".tmp")
            tmp.write_bytes(resp.content)
            tmp.replace(target)
        except OSError:
            pass  # cache write failure is non-fatal — still return the bytes
        return resp.content, ext
@@ -0,0 +1,159 @@
 """
 cron_schedule.py
 ================
 Minimal cron-expression parser — no external dependency.
 Supports the classic 5-field syntax::
    ┌──────── minute        (0-59)
    │ ┌────── hour          (0-23)
    │ │ ┌──── day of month  (1-31)
    │ │ │ ┌── month         (1-12 or jan-dec)
    │ │ │ │ ┌ day of week   (0-7 or sun-sat; 0 and 7 = Sunday)
    │ │ │ │ │
    0 19 * * 1,4    ->  19:00 every Monday and Thursday
 Field syntax: ``*``, single values, ranges (``a-b``), steps (``*/n``,
 ``a-b/n``) and comma lists.  Month / weekday names (``jan``, ``mon``, …)
 are accepted case-insensitively.
 As in Vixie cron, when *both* day-of-month and day-of-week are restricted
 the job runs when **either** matches.
 Times are evaluated against the local system clock (``datetime.now()``) —
 in Docker set the ``TZ`` environment variable so "19:00" means local time.
 """
 from __future__ import annotations
 from datetime import datetime, timedelta
 _MONTH_NAMES = {"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6,
                "jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12}
 _DAY_NAMES   = {"sun": 0, "mon": 1, "tue": 2, "wed": 3, "thu": 4,
                "fri": 5, "sat": 6}
 def _parse_value(token: str, lo: int, hi: int,
                 names: "dict[str, int] | None") -> int:
    token = token.strip().lower()
    if names and token in names:
        return names[token]
    try:
        value = int(token)
    except ValueError:
        raise ValueError(f"invalid cron value {token!r}") from None
    if not (lo <= value <= hi):
        raise ValueError(f"cron value {value} out of range {lo}-{hi}")
    return value
 def _parse_field(field: str, lo: int, hi: int,
                 names: "dict[str, int] | None" = None) -> "set[int]":
    """Parses one cron field into the set of matching integer values."""
    result: set[int] = set()
    for part in field.split(","):
        part = part.strip()
        if not part:
            raise ValueError(f"empty element in cron field {field!r}")
        step = 1
        if "/" in part:
            part, step_text = part.split("/", 1)
            try:
                step = int(step_text)
            except ValueError:
                raise ValueError(f"invalid cron step {step_text!r}") from None
            if step < 1:
                raise ValueError(f"cron step must be >= 1, got {step}")
        if part == "*":
            start, end = lo, hi
        elif "-" in part:
            a, b = part.split("-", 1)
            start = _parse_value(a, lo, hi, names)
            end   = _parse_value(b, lo, hi, names)
            if end < start:
                raise ValueError(f"inverted cron range {part!r}")
        else:
            start = end = _parse_value(part, lo, hi, names)
        result.update(range(start, end + 1, step))
    return result
 class CronSchedule:
    """
    Parsed 5-field cron expression with ``next_after()`` evaluation.
    Usage::
        cron = CronSchedule("0 19 * * mon,thu")
        run_at = cron.next_after(datetime.now())
    """
    def __init__(self, expression: str):
        self.expression = expression.strip()
        fields = self.expression.split()
        if len(fields) != 5:
            raise ValueError(
                f"cron expression needs 5 fields "
                f"(minute hour dom month dow), got {len(fields)}: "
                f"{expression!r}")
        minute, hour, dom, month, dow = fields
        self._minutes = _parse_field(minute, 0, 59)
        self._hours   = _parse_field(hour,   0, 23)
        self._dom     = _parse_field(dom,    1, 31)
        self._months  = _parse_field(month,  1, 12, _MONTH_NAMES)
        dow_values    = _parse_field(dow,    0, 7,  _DAY_NAMES)
        # 7 is an alias for Sunday (= 0)
        self._dow = {0 if v == 7 else v for v in dow_values}
        # Vixie-cron rule: dom/dow are OR-combined when both are restricted.
        self._dom_restricted = dom != "*"
        self._dow_restricted = dow != "*"
    def __repr__(self) -> str:
        return f"CronSchedule({self.expression!r})"
    # ------------------------------------------------------------------
    def _day_matches(self, day: "datetime.date") -> bool:
        if day.month not in self._months:
            return False
        dom_ok = day.day in self._dom
        # Python: Monday=0 … Sunday=6  ->  cron: Sunday=0 … Saturday=6
        dow_ok = ((day.weekday() + 1) % 7) in self._dow
        if self._dom_restricted and self._dow_restricted:
            return dom_ok or dow_ok
        if self._dom_restricted:
            return dom_ok
        if self._dow_restricted:
            return dow_ok
        return True
    def next_after(self, dt: datetime) -> datetime:
        """
        Returns the first matching time strictly after ``dt``
        (second/microsecond precision is dropped).
        """
        cand = (dt + timedelta(minutes=1)).replace(second=0, microsecond=0)
        hours   = sorted(self._hours)
        minutes = sorted(self._minutes)
        # Walk day by day (covers rare dom/month combos like Feb 29).
        for _ in range(366 * 5):
            if self._day_matches(cand.date()):
                for h in hours:
                    if h < cand.hour:
                        continue
                    for m in minutes:
                        if h == cand.hour and m < cand.minute:
                            continue
                        return cand.replace(hour=h, minute=m)
            cand = (cand + timedelta(days=1)).replace(hour=0, minute=0)
        raise ValueError(
            f"cron {self.expression!r}: no occurrence within 5 years")
@@ -0,0 +1,272 @@
 """
 kavita_client.py
 ================
 Thin HTTP client for the Kavita server REST API (v0.9.x).
 Authenticates via the ``x-api-key`` header.  All series / library /
 collection / metadata reads and writes used by the light-novel updater
 go through this single client so request shaping (paging, content types,
 timeouts, retries) is consistent.
 The class is intentionally state-light: no caching layer, just one
 ``requests.Session``.  Higher-level diff / update logic lives in
 KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync.
 """
 from __future__ import annotations
 import base64
 from typing import Iterable
 import requests
 class KavitaClient:
    def __init__(self, base_url: str, api_key: str, *,
                 request_timeout: int = 30):
        self._base = base_url.rstrip("/")
        self._timeout = request_timeout
        # API session: sends + receives JSON.
        self._session = requests.Session()
        self._session.headers.update({
            "x-api-key":    api_key,
            "Accept":       "application/json",
            "Content-Type": "application/json",
        })
        # Plain session for downloading external images (covers).  Must NOT
        # carry the API headers — some CDNs refuse to return image bytes
        # when the client sends Accept: application/json.
        self._image_session = requests.Session()
        self._image_session.headers.update({
            "User-Agent": "KavitaLightNovelUpdater/1.0",
        })
    # ------------------------------------------------------------------
    # Libraries
    # ------------------------------------------------------------------
    def list_libraries(self) -> list[dict]:
        """Returns all libraries the authenticated user can access."""
        r = self._session.get(f"{self._base}/api/Library/libraries",
                              timeout=self._timeout)
        r.raise_for_status()
        return r.json() or []
    # ------------------------------------------------------------------
    # Series
    # ------------------------------------------------------------------
    def list_series_in_library(self, library_id: int, *,
                               page_size: int = 200) -> list[dict]:
        """
        Returns all SeriesDto entries in the given library.
        Uses POST /api/Series/all-v2 with a FilterV2 that scopes by
        library id.  Pages through until an empty page is returned.
        """
        results: list[dict] = []
        page = 1
        while True:
            body = {
                "statements": [
                    {
                        "comparison": 0,     # Equal
                        "field": 19,         # Libraries field id (Kavita v0.9.x)
                        "value": str(library_id),
                    }
                ],
                "combination": 1,           # And
                "sortOptions": {"isAscending": True, "sortField": 1},
                "limitTo": 0,
            }
            r = self._session.post(
                f"{self._base}/api/Series/all-v2",
                params={"PageNumber": page, "PageSize": page_size},
                json=body, timeout=self._timeout)
            r.raise_for_status()
            chunk = r.json() or []
            if not chunk:
                break
            results.extend(chunk)
            if len(chunk) < page_size:
                break
            page += 1
        return results
    def get_series(self, series_id: int) -> dict:
        """Returns the SeriesDto for the given series id."""
        r = self._session.get(f"{self._base}/api/Series/{series_id}",
                              timeout=self._timeout)
        r.raise_for_status()
        return r.json() or {}
    def update_series(self, series: dict) -> None:
        """Updates the Series-level data (name, sortName, malId, …)."""
        r = self._session.post(f"{self._base}/api/Series/update",
                               json=series, timeout=self._timeout)
        r.raise_for_status()
    # ------------------------------------------------------------------
    # Series metadata
    # ------------------------------------------------------------------
    def get_series_metadata(self, series_id: int) -> dict:
        """Returns the SeriesMetadataDto for a series."""
        r = self._session.get(
            f"{self._base}/api/Series/metadata",
            params={"seriesId": series_id}, timeout=self._timeout)
        r.raise_for_status()
        return r.json() or {}
    def update_series_metadata(self, metadata: dict) -> None:
        """
        Writes a SeriesMetadataDto back to Kavita.
        Kavita expects the payload wrapped: {seriesMetadata: {...}}.
        """
        r = self._session.post(
            f"{self._base}/api/Series/metadata",
            json={"seriesMetadata": metadata},
            timeout=self._timeout)
        r.raise_for_status()
    # ------------------------------------------------------------------
    # Related series
    # ------------------------------------------------------------------
    def get_related(self, series_id: int) -> dict:
        """Returns all related series grouped by relation type."""
        r = self._session.get(
            f"{self._base}/api/Series/all-related",
            params={"seriesId": series_id}, timeout=self._timeout)
        r.raise_for_status()
        return r.json() or {}
    def update_related(self, payload: dict) -> None:
        """
        Sets the related-series relationships for a series.
        Payload shape (UpdateRelatedSeriesDto):
            {seriesId, prequels, sequels, sideStories, spinOffs,
             adaptations, characters, contains, others,
             alternativeSettings, alternativeVersions, doujinshis,
             editions, annuals}
        Each *_ids list contains target series ids (ints).
        """
        r = self._session.post(
            f"{self._base}/api/Series/update-related",
            json=payload, timeout=self._timeout)
        r.raise_for_status()
    # ------------------------------------------------------------------
    # Collections
    # ------------------------------------------------------------------
    def list_collections(self) -> list[dict]:
        """Returns all collection tags visible to the authenticated user."""
        r = self._session.get(
            f"{self._base}/api/Collection",
            params={"ownedOnly": "false", "sortByLastModified": "false"},
            timeout=self._timeout)
        r.raise_for_status()
        return r.json() or []
    def add_series_to_collection(self, *, collection_id: int,
                                 title: str,
                                 series_ids: Iterable[int]) -> dict:
        """
        Adds (or creates) a collection and attaches series to it.
        Pass collection_id=0 to create a new collection named `title`.
        For an existing collection set collection_id to its id (title is
        still required by the API but acts as no-op when the id matches).
        """
        body = {
            "collectionTagId":    int(collection_id),
            "collectionTagTitle": title,
            "seriesIds":          [int(s) for s in series_ids],
        }
        r = self._session.post(
            f"{self._base}/api/Collection/update-for-series",
            json=body, timeout=self._timeout)
        r.raise_for_status()
        try:
            return r.json() or {}
        except ValueError:
            return {}
    # ------------------------------------------------------------------
    # Persons
    # ------------------------------------------------------------------
    def search_persons(self, name: str) -> list[dict]:
        """Returns PersonDto entries matching `name` (Kavita's own search)."""
        r = self._session.get(
            f"{self._base}/api/Person/search",
            params={"queryString": name}, timeout=self._timeout)
        r.raise_for_status()
        return r.json() or []
    def update_person(self, payload: dict) -> None:
        """Writes a person record (malId, aniListId, description, …)."""
        r = self._session.post(f"{self._base}/api/Person/update",
                               json=payload, timeout=self._timeout)
        r.raise_for_status()
    # ------------------------------------------------------------------
    # Cover uploads
    # ------------------------------------------------------------------
    def upload_series_cover(self, series_id: int, image_url: str, *,
                            lock: bool = False) -> None:
        """Downloads an external image and uploads it as the series cover."""
        self._upload_cover("/api/Upload/series", series_id, image_url, lock)
    def upload_person_cover(self, person_id: int, image_url: str, *,
                            lock: bool = False) -> None:
        """Downloads an external image and uploads it as a person cover."""
        self._upload_cover("/api/Upload/person", person_id, image_url, lock)
    def _upload_cover(self, endpoint: str, entity_id: int,
                      image_url: str, lock: bool) -> None:
        """
        Shared cover-upload path.  Kavita's upload endpoints accept a raw
        base64 blob (no ``data:`` prefix) in the ``url`` field — a data
        URI or the two-step upload-by-url flow are rejected with HTTP 400
        (verified against Kavita 0.9.0.2).
        """
        img = self._image_session.get(image_url, timeout=self._timeout)
        img.raise_for_status()
        b64 = base64.b64encode(img.content).decode()
        r = self._session.post(
            f"{self._base}{endpoint}",
            json={"id": entity_id, "url": b64, "lockCover": lock},
            timeout=self._timeout)
        if r.status_code >= 400:
            # Include the body excerpt — Kavita's upload errors carry the
            # actual reason there, not in the status line.
            raise requests.HTTPError(
                f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
                response=r)
    # ------------------------------------------------------------------
    # Generic GET helper (used by callers that need a response object)
    # ------------------------------------------------------------------
    def get(self, path: str, params: "dict | None" = None) -> requests.Response:
        return self._session.get(f"{self._base}{path}",
                                 params=params, timeout=self._timeout)
    def post(self, path: str, *,
             json: "dict | list | None" = None,
             params: "dict | None" = None) -> requests.Response:
        return self._session.post(f"{self._base}{path}",
                                  json=json, params=params,
                                  timeout=self._timeout)
 def _short_body(resp: requests.Response, limit: int = 400) -> str:
    """Returns the response body trimmed to `limit` chars for error messages."""
    try:
        text = resp.text or ""
    except Exception:
        return "<unreadable response body>"
    text = text.strip().replace("\n", " ").replace("\r", " ")
    if len(text) > limit:
        text = text[:limit] + "…"
    return text or "<empty body>"
@@ -15,46 +15,22 @@ the updater:
     an 'about' text (requires an extra Jikan request per character; only
     performed when update_descriptions=True).
-Kavita API version
+All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
------------------
+(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
 Tested against Kavita 0.9.0.2.
 Authentication
 --------------
 Uses the `x-api-key` header (API key from Kavita user settings).
 No JWT login is required.
 Relevant endpoints (Kavita 0.9.0.2)
 -------------------------------------
  GET  /api/Person/search       find persons by name / alias
  POST /api/Person/update       write metadata (malId, description, …)
  POST /api/Upload/person       set cover image (base64 data URI)
  POST /api/Upload/upload-by-url  download an external URL to temp storage
                                  (used as an alternative upload path)
 Cover upload flow
 -----------------
 The image is downloaded locally, base64-encoded, and sent as a data URI
 to POST /api/Upload/person.  This is more reliable than the
 upload-by-url → upload/person two-step because it avoids Kavita's temp
 file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
 Dependencies
 ------------
    requests    ->  pip install requests
 """
 from __future__ import annotations
 import base64
 import datetime
 import difflib
 import re
 import requests
 from KavitaClient import KavitaClient
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id
 class KavitaPersonUpdater:
@@ -63,41 +39,22 @@ class KavitaPersonUpdater:
    Parameters
    ----------
-    kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
+    client          : Shared KavitaClient (session, auth, cover uploads)
    api_key         : Kavita API key (Settings → User → API key)
    mal_resolver    : Shared MALResolver singleton (created automatically if omitted)
-    request_timeout : HTTP timeout in seconds for both Kavita and image requests
+    al_resolver     : Shared AniListResolver singleton (created automatically if omitted)
    min_name_score  : Minimum difflib similarity ratio (0–1) required to accept a
                      Kavita person as a match for a MAL name.  Default 0.80.
    """
-    def __init__(self, kavita_base_url: str, api_key: str, *,
+    def __init__(self, client: KavitaClient, *,
                 mal_resolver: "MALResolver | None" = None,
                 al_resolver: "AniListResolver | None" = None,
                 request_timeout: int = 30,
                 min_name_score: float = 0.80):
-        self._base = kavita_base_url.rstrip("/")
+        self._client = client
        self._timeout = request_timeout
        self._min_score = min_name_score
        self._mal = mal_resolver or MALResolver()
        self._al  = al_resolver  or AniListResolver()
        # Session used for Kavita API calls.
        self._session = requests.Session()
        self._session.headers.update({
            "x-api-key": api_key,
            "Content-Type": "application/json",
            "Accept": "application/json",
        })
        # Plain session used to download external images (MAL CDN etc.).
        # Must NOT carry the Kavita API headers — Accept: application/json
        # would prevent MAL CDN from returning the image bytes.
        self._image_session = requests.Session()
        self._image_session.headers.update({
            "User-Agent": "KavitaPersonUpdater/1.0",
        })
        # Cache: normalised name -> list of PersonDto dicts (best matches first)
        self._person_search_cache: dict[str, list[dict]] = {}
@@ -195,11 +152,28 @@ class KavitaPersonUpdater:
            if not name and not raw_name:
                continue
-            # Search by the cleaned (XML-safe) name first; if Kavita stores
+            if kind == "character":
                # Characters are stored under their disambiguated name
                # ("Rem (MAL 118737)") — see person_name_with_id.  The
                # series metadata write creates the person under exactly
                # this name, so only that form is searched.
                search_names = [person_name_with_id(
                    name, mal_id=entry.get("mal_id"),
                    al_id=entry.get("al_id"))]
            else:
                # Staff: cleaned (XML-safe) name first; if Kavita stores
                # the legacy comma form, retry with the raw MAL name.
-            matches = self._find_kavita_person(name) if name else []
+                search_names = [name]
-            if not matches and raw_name and raw_name != name:
+                if raw_name and raw_name != name:
-                matches = self._find_kavita_person(raw_name)
+                    search_names.append(raw_name)
            matches: list[dict] = []
            for search_name in search_names:
                if not search_name:
                    continue
                matches = self._find_kavita_person(search_name)
                if matches:
                    break
            if not matches:
                result["not_found"] += 1
@@ -230,29 +204,17 @@ class KavitaPersonUpdater:
            return self._person_search_cache[key]
        try:
-            resp = self._session.get(
+            persons = self._client.search_persons(name)
                f"{self._base}/api/Person/search",
                params={"queryString": name},
                timeout=self._timeout,
            )
            resp.raise_for_status()
            persons: list[dict] = resp.json() or []
        except requests.RequestException:
            self._person_search_cache[key] = []
            return []
-        def score(p: dict) -> float:
+        scored = []
-            candidates = [p.get("name") or ""]
+        for p in persons:
-            candidates += [a for a in (p.get("aliases") or []) if a]
+            candidates = [p.get("name")] + list(p.get("aliases") or [])
-            best = 0.0
+            scored.append((best_similarity(key, candidates), p))
-            q = key
+        scored.sort(key=lambda pair: pair[0], reverse=True)
-            for c in candidates:
+        filtered = [p for score, p in scored if score >= self._min_score]
                r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
                best = max(best, r)
            return best
        ranked = sorted(persons, key=score, reverse=True)
        filtered = [p for p in ranked if score(p) >= self._min_score]
        self._person_search_cache[key] = filtered
        return filtered
@@ -289,6 +251,20 @@ class KavitaPersonUpdater:
        current_mal_id: int = person.get("malId") or 0
        current_al_id:  int = person.get("aniListId") or 0
        # Collision guard: the Kavita person is already linked to a
        # *different* tracker entity — same display name, different
        # character/person.  Never overwrite; first writer wins.
        if ((mal_id and current_mal_id and current_mal_id != mal_id)
                or (al_id and current_al_id and current_al_id != al_id)):
            if errors is not None:
                errors.append(
                    f"conflict: '{person_name}' (#{person_id}) is linked to "
                    f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} "
                    f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} "
                    f"— skipped")
            return False
        needs_mal_id = bool(mal_id and current_mal_id != mal_id)
        needs_al_id  = bool(al_id  and current_al_id  != al_id)
@@ -323,12 +299,7 @@ class KavitaPersonUpdater:
                "aniListId": al_id if needs_al_id  else (current_al_id  or None),
            }
            try:
-                resp = self._session.post(
+                self._client.update_person(payload)
                    f"{self._base}/api/Person/update",
                    json=payload,
                    timeout=self._timeout,
                )
                resp.raise_for_status()
                changed = True
            except requests.RequestException as e:
                if errors is not None:
@@ -350,88 +321,21 @@ class KavitaPersonUpdater:
                and bool(person.get("coverImage"))
            )
            if image_url and not already_uploaded:
-                if self._upload_cover(person_id, image_url,
+                try:
-                                      person_name=person_name,
+                    self._client.upload_person_cover(person_id, image_url)
                                      errors=errors):
                    changed = True
                except requests.RequestException as e:
                    if errors is not None:
                        errors.append(
                            f"cover upload failed for #{person_id} "
                            f"'{person_name}' ({image_url}): {e}")
        return changed
    # ------------------------------------------------------------------
    # Internal: cover upload
    # ------------------------------------------------------------------
    def _upload_cover(self, person_id: int, image_url: str,
                      lock: bool = False, *,
                      person_name: str = "",
                      errors: "list | None" = None) -> bool:
        """
        Uploads a cover image to a Kavita person.
        The image is downloaded with the plain (header-less) image session
        and posted to `POST /api/Upload/person` as a raw base64 string in
        the `url` field.
        Notes on protocol quirks discovered against Kavita 0.9.0.2:
          - The two-step `upload-by-url` -> `Upload/person` flow returns
            "Unable to save cover image to Person" (HTTP 400).
          - A `data:image/jpeg;base64,...` data URI is rejected with the
            same error.
          - Only the raw base64 blob (no prefix) is accepted.
        """
        label = (f"#{person_id} '{person_name}'"
                 if person_name else f"#{person_id}")
        # 1) Download the image with a clean session — the Kavita session's
        #    `Accept: application/json` header makes some CDNs refuse to
        #    return image bytes.
        try:
            img_resp = self._image_session.get(image_url,
                                               timeout=self._timeout)
            img_resp.raise_for_status()
        except requests.RequestException as e:
            if errors is not None:
                errors.append(
                    f"image download failed for {label} ({image_url}): {e}")
            return False
        b64 = base64.b64encode(img_resp.content).decode()
        # 2) POST the raw base64 blob.
        try:
            resp = self._session.post(
                f"{self._base}/api/Upload/person",
                json={"id": person_id, "url": b64, "lockCover": lock},
                timeout=self._timeout,
            )
            if resp.status_code >= 400:
                if errors is not None:
                    errors.append(
                        f"Upload/person HTTP {resp.status_code} for {label}: "
                        f"{_short_body(resp)}")
                return False
            return True
        except requests.RequestException as e:
            if errors is not None:
                errors.append(
                    f"Upload/person failed for {label}: {e}")
            return False
 # --------------------------------------------------------------------------
 # Module helpers: description builders
 # --------------------------------------------------------------------------
 def _plain_to_html(text: str) -> str:
    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
    if not text:
        return ""
    parts: list[str] = []
    for para in re.split(r"\n{2,}", text.strip()):
        para = para.strip()
        if para:
            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
    return "".join(parts)
 def _format_birthday(birthday: str) -> str:
    """Converts an ISO 8601 birthday string to "D Month YYYY"."""
    if not birthday:
@@ -457,7 +361,7 @@ def _build_character_description(details: dict) -> str:
        parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
    about = (details.get("about") or "").strip()
    if about:
-        parts.append(_plain_to_html(about))
+        parts.append(paragraphs_to_html(about))
    return "<br>".join(parts)
@@ -501,33 +405,19 @@ def _build_person_description(details: dict) -> str:
        parts.append(f'<table>{"".join(rows)}</table>')
    about = (details.get("about") or "").strip()
    if about:
-        parts.append(_plain_to_html(about))
+        parts.append(paragraphs_to_html(about))
    return "<br>".join(parts)
 # --------------------------------------------------------------------------
 # Module helper
 # --------------------------------------------------------------------------
 def _short_body(resp: requests.Response, limit: int = 400) -> str:
    """Returns the response body trimmed to `limit` chars for error logging."""
    try:
        text = resp.text or ""
    except Exception:
        return "<unreadable response body>"
    text = text.strip().replace("\n", " ").replace("\r", " ")
    if len(text) > limit:
        text = text[:limit] + "…"
    return text or "<empty body>"
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
-    KAVITA_URL = "http://192.168.2.2:5000"
+    import os
    KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
-    updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
+    client = KavitaClient(os.environ["KAVITA_URL"],
                          os.environ["KAVITA_API_KEY"])
    updater = KavitaPersonUpdater(client)
    mal = MALResolver()
    mal_id = mal.find_mal_id("よふかしのうた")
@@ -30,12 +30,12 @@ Dependencies
 from __future__ import annotations
 import datetime
 import difflib
 import time
 import requests
 from MediaResolver import MediaResolver
 from TextUtils import best_similarity
 class MALResolver(MediaResolver):
@@ -57,12 +57,21 @@ class MALResolver(MediaResolver):
            cls._instance._initialized = False
        return cls._instance
-    def __init__(self, *, request_timeout: int = 30):
+    def __init__(self, *, request_timeout: int = 30,
                 search_type: str = "manga"):
        """
        search_type : Jikan `type` filter for title searches — "manga" for
                      the manga container, "lightnovel" for the LN container.
                      Only the FIRST construction in the process sets it
                      (singleton); construct the resolver with the correct
                      type in the entry point / orchestrator.
        """
        if self._initialized:
            return
        self.JIKAN_BASE = "https://api.jikan.moe/v4"
        self.request_timeout = request_timeout
        self.search_type = search_type
        self._session = requests.Session()
        self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
@@ -106,7 +115,7 @@ class MALResolver(MediaResolver):
        try:
            data = self._get(f"{self.JIKAN_BASE}/manga",
-                             {"q": title, "limit": 5, "type": "manga"})
+                             {"q": title, "limit": 5, "type": self.search_type})
            results = data.get("data") or []
        except requests.RequestException:
            return None
@@ -404,19 +413,12 @@ def _clean_mal_name(name: str) -> str:
 def _score_title(query: str, entry: dict) -> float:
    """Returns the best title-similarity score for a Jikan manga entry."""
    candidates = [
-        entry.get("title") or "",
+        entry.get("title"),
-        entry.get("title_english") or "",
+        entry.get("title_english"),
-        entry.get("title_japanese") or "",
+        entry.get("title_japanese"),
    ]
-    for alt in (entry.get("titles") or []):
+    candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
-        candidates.append(alt.get("title") or "")
+    return best_similarity(query, candidates)
    best = 0.0
    q = query.lower()
    for t in candidates:
        if t:
            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
            best = max(best, ratio)
    return best
 # --------------------------------------------------------------------------
@@ -0,0 +1,92 @@
 """
 mangabaka_rate_limit.py
 =======================
 Process-wide rate limiter for the MangaBaka API.
 Apply via:
    from MangaBakaRateLimit import apply_to_session
    apply_to_session(session)
 This mounts a custom ``requests.adapters.HTTPAdapter`` on the given
 ``requests.Session`` for the ``api.mangabaka.dev`` host.  Every request
 going through that adapter is:
  * throttled so that no two requests are dispatched within
    ``_MIN_INTERVAL`` seconds of one another, and
  * retried on HTTP 429, honouring the ``Retry-After`` header when
    present, otherwise exponential backoff capped at ``_MAX_BACKOFF``.
 Throttle state is module-global, so even if several sessions exist in
 the same process they share one budget — important because they all hit
 the same upstream IP-based limit.
 """
 from __future__ import annotations
 import threading
 import time
 from requests.adapters import HTTPAdapter
 # Tune these if MangaBaka tightens or loosens limits.
 _MIN_INTERVAL = 1.1      # seconds between consecutive requests
 _MAX_RETRIES  = 6        # retries on 429 before giving up
 _MAX_BACKOFF  = 60.0     # cap on per-attempt backoff sleep
 # --- shared throttle state --------------------------------------------------
 _state_lock        = threading.Lock()
 _last_request_time = 0.0
 def _wait_for_slot() -> None:
    """Block until the next request slot is available, then reserve it."""
    global _last_request_time
    while True:
        with _state_lock:
            now  = time.monotonic()
            wait = _MIN_INTERVAL - (now - _last_request_time)
            if wait <= 0:
                _last_request_time = now
                return
        time.sleep(wait)
 class _MangaBakaRateLimitAdapter(HTTPAdapter):
    def send(self, request, **kwargs):
        response = None
        for attempt in range(_MAX_RETRIES + 1):
            _wait_for_slot()
            response = super().send(request, **kwargs)
            if response.status_code != 429:
                return response
            retry_after = response.headers.get("Retry-After")
            try:
                wait = (float(retry_after) if retry_after
                        else min(_MAX_BACKOFF, 2.0 * (2 ** attempt)))
            except ValueError:
                wait = min(_MAX_BACKOFF, 2.0 * (2 ** attempt))
            print(f"[MangaBaka] 429 — backing off {wait:.1f}s "
                  f"(attempt {attempt + 1}/{_MAX_RETRIES})",
                  flush=True)
            response.close()
            time.sleep(wait)
        # Retries exhausted — let the caller deal with the last 429.
        return response
 def apply_to_session(session) -> None:
    """
    Mount the rate-limit adapter on ``session`` so every MangaBaka call
    is automatically throttled. Safe to call multiple times (later mounts
    just replace the earlier adapter for the same prefix).
    """
    adapter = _MangaBakaRateLimitAdapter()
    session.mount("https://api.mangabaka.dev/", adapter)
    session.mount("http://api.mangabaka.dev/",  adapter)
@@ -2,7 +2,7 @@
 mangabaka_works_resolver.py
 ===========================
-Fetches volume-level (work) data from the MangaBaka API.
+Fetches volume-level (work) data and volume cover images from the MangaBaka API.
 Each "work" is a physical tankobon volume and may carry:
  - volume number
@@ -11,10 +11,16 @@ Each "work" is a physical tankobon volume and may carry:
  - release date
  - cover image  (raw / default / small variants)
-Only works that have a usable cover are kept in the cache.
+Cover resolution order (per volume)
-Works without a cover are discarded at fetch time.
+------------------------------------
-If no volume is assigned for a chapter, callers fall back to the
+1. GET /v1/series/{id}/images  — covers that exist independently of a work
-default series cover from the series object itself.
+   (some series have covers but no works).  English edition preferred;
   original language used when no English cover is available.
 2. GET /v1/series/{id}/works   — physical tankobon data including covers.
   Fallback when /images returns nothing for the requested volume.
 If no volume cover is found at all, callers fall back to the series-level
 default cover from the series object itself.
 Dependencies
 ------------
@@ -26,10 +32,75 @@ from __future__ import annotations
 import requests
 # --------------------------------------------------------------------------
 # Generic image-block URL picker (shared by /images and /works responses)
 # --------------------------------------------------------------------------
 def _pick_image_url(image) -> "str | None":
    """
    Returns the best URL from a MangaBaka image block.
    Handles the common ``{raw, x150, x250, x350}`` structure used by both
    the ``cover`` field on series/work objects and the ``image`` field on
    ``/images`` endpoint items::
        {
          "raw":  {"url": "...", "size": ..., "height": ..., "width": ...},
          "x150": {"x1": "...", "x2": "...", "x3": "..."},
          "x250": {...},
          "x350": {...}
        }
    Preference: raw original > x350@x3 > x250@x3 > x150@x3 > … (falling
    through to lower densities and sizes as needed).
    """
    if not image:
        return None
    if isinstance(image, str):
        return image
    if not isinstance(image, dict):
        return None
    # 1) Raw / unscaled image
    raw = image.get("raw")
    if isinstance(raw, dict):
        url = raw.get("url")
        if isinstance(url, str) and url:
            return url
    elif isinstance(raw, str) and raw:
        return raw
    # 2) Size-keyed CDN variants, largest first, highest density first
    for size_key in ("x350", "x250", "x150"):
        variant = image.get(size_key)
        if isinstance(variant, dict):
            for density in ("x3", "x2", "x1"):
                url = variant.get(density)
                if isinstance(url, str) and url:
                    return url
        elif isinstance(variant, str) and variant:
            return variant
    # 3) Last-ditch: any HTTP URL anywhere in the structure
    for val in image.values():
        if isinstance(val, str) and val.startswith("http"):
            return val
        if isinstance(val, dict):
            for sub_val in val.values():
                if isinstance(sub_val, str) and sub_val.startswith("http"):
                    return sub_val
    return None
 class MangaBakaWorksResolver:
    """
-    Fetches and caches MangaBaka volume (work) data for a series.
+    Fetches and caches MangaBaka volume (work) data and cover images.
-    Only works that have a cover image are retained in the cache.
+
    Cover lookup order per volume
    ------------------------------
    1. ``/v1/series/{id}/images`` — edition covers (English > original).
    2. ``/v1/series/{id}/works``  — physical tankobon covers.
    Only works that carry a cover image are retained in the works cache.
    """
    def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
@@ -42,30 +113,24 @@ class MangaBakaWorksResolver:
        # Cache: series_id (str) -> list of work dicts (only those with covers)
        self._cache: dict[str, list[dict]] = {}
        # Cache: series_id (str) -> {norm_vol (str): url (str)}
        self._images_cache: dict[str, dict[str, str]] = {}
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
-    def get_works(self, series_id: str) -> list[dict]:
+    def _fetch_all_pages(self, endpoint: str) -> list[dict]:
        """
-        Returns volume-level works for a series, filtered to those that have
+        Pages through a MangaBaka list endpoint (limit=50 per page) and
-        a usable cover image.  Results are cached per series.
+        returns all collected `data` items.  Network errors end the
-
+        pagination early; items fetched so far are returned.
        Pages through the API (limit=50) until the response returns an empty
        page, collecting all works before applying the cover filter.
        """
-        if not series_id:
+        items: list[dict] = []
            return []
        if series_id in self._cache:
            return self._cache[series_id]
        all_works: list[dict] = []
        page = 1
        try:
            while True:
                resp = self._session.get(
-                    f"{self.api_base_url}/series/{series_id}/works",
+                    f"{self.api_base_url}/series/{endpoint}",
                    params={"limit": 50, "page": page},
                    timeout=self.request_timeout,
                )
@@ -73,16 +138,34 @@ class MangaBakaWorksResolver:
                page_data = resp.json().get("data") or []
                if not page_data:
                    break
-                all_works.extend(page_data)
+                items.extend(page_data)
                if len(page_data) < 50:
                    break
                page += 1
        except requests.RequestException:
-            if not all_works:
+            pass
        return items
    def get_works(self, series_id: str) -> list[dict]:
        """
        Returns volume-level works for a series, filtered to those that have
        a usable cover image.
        Non-empty results are cached per series; empty results are not, so
        works added on MangaBaka later become visible without restarting
        the (long-running) process.
        """
        if not series_id:
            return []
        if series_id in self._cache:
            return self._cache[series_id]
        all_works = self._fetch_all_pages(f"{series_id}/works")
        # Discard works that carry no usable cover
        works_with_cover = [w for w in all_works if w.get("images")]
        if works_with_cover:
            self._cache[series_id] = works_with_cover
        return works_with_cover
@@ -101,12 +184,85 @@ class MangaBakaWorksResolver:
                return work
        return None
-    def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
+    def get_volume_covers(self, series_id: str) -> "dict[str, str]":
-        """Returns the cover URL for a specific volume, or None if not found."""
+        """
-        work = self.get_work_for_volume(series_id, volume)
+        Fetches all volume-type cover images for a series from
-        if not work:
+        ``/v1/series/{id}/images`` and returns a
        ``{normalised_volume_str: url}`` mapping.
        English-edition covers are preferred; the first available language
        is used as fallback when no English cover exists for a volume.
        Results are cached per series.
        """
        if not series_id:
            return {}
        if series_id in self._images_cache:
            return self._images_cache[series_id]
        raw_items = self._fetch_all_pages(f"{series_id}/images")
        # Group by normalised volume index; collect all languages per volume.
        by_volume: dict[str, dict[str, str]] = {}  # norm_vol -> {lang: url}
        for item in raw_items:
            if item.get("type") != "volume":
                continue
            idx = item.get("index_numeric")
            if idx is None:
                continue
            norm = _norm_vol(idx)
            lang = (item.get("language") or "").lower() or "unknown"
            url  = _pick_image_url(item.get("image"))
            if not url:
                continue
            if norm not in by_volume:
                by_volume[norm] = {}
            # First entry per language wins (API order reflects quality/rank).
            if lang not in by_volume[norm]:
                by_volume[norm][lang] = url
        # Pick best language per volume: English first, then first available.
        result: dict[str, str] = {}
        for norm, lang_map in by_volume.items():
            url = lang_map.get("en") or next(iter(lang_map.values()), None)
            if url:
                result[norm] = url
        # Empty results are not cached — covers added on MangaBaka later
        # become visible without restarting the long-running process.
        if result:
            self._images_cache[series_id] = result
        return result
    def get_cover_for_volume_from_images(self, series_id: str,
                                         volume) -> "str | None":
        """
        Returns the cover URL for a specific volume from the /images endpoint,
        or None if not available.
        """
        covers = self.get_volume_covers(series_id)
        if not covers:
            return None
-        return self._pick_cover_url(work.get("images")[0].get("image"))
+        return covers.get(_norm_vol(volume))
    def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
        """
        Returns the best cover URL for a specific volume.
        Tries the ``/images`` endpoint first (covers that exist even when no
        physical work has been catalogued), then falls back to the ``/works``
        endpoint.  Returns None if neither source has a cover for the volume.
        """
        # 1. /images endpoint (covers without works)
        url = self.get_cover_for_volume_from_images(series_id, volume)
        if url:
            return url
        # 2. /works endpoint fallback
        work = self.get_work_for_volume(series_id, volume)
        if not work or not work.get("images"):
            return None
        return _pick_image_url(work["images"][0].get("image"))
    def get_page_counts(self, series_id: str) -> "dict[str, int]":
        """
@@ -125,59 +281,9 @@ class MangaBakaWorksResolver:
        return result
    def clear_cache(self) -> None:
-        """Clears the internal works cache."""
+        """Clears both the works cache and the images cover cache."""
        self._cache.clear()
-
+        self._images_cache.clear()
    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _pick_cover_url(cover) -> "str | None":
        """
        Selects the best cover URL from a MangaBaka cover object.
        Real API shape:
            "raw":  {"url": "...", "size": ..., "height": ..., "width": ...}
            "x150": {"x1": "...", "x2": "...", "x3": "..."}
            "x250": {...}
            "x350": {...}
        Order: raw original > x350@x3 > x250@x3 > x150@x3 ...
        """
        if not cover:
            return None
        if isinstance(cover, str):
            return cover
        if not isinstance(cover, dict):
            return None
        raw = cover.get("raw")
        if isinstance(raw, dict):
            url = raw.get("url")
            if isinstance(url, str) and url:
                return url
        elif isinstance(raw, str) and raw:
            return raw
        for size_key in ("x350", "x250", "x150"):
            variant = cover.get(size_key)
            if isinstance(variant, dict):
                for density in ("x3", "x2", "x1"):
                    url = variant.get(density)
                    if isinstance(url, str) and url:
                        return url
            elif isinstance(variant, str) and variant:
                return variant
        # Last-ditch: any HTTP URL anywhere in the structure
        for val in cover.values():
            if isinstance(val, str) and val.startswith("http"):
                return val
            if isinstance(val, dict):
                for sub_val in val.values():
                    if isinstance(sub_val, str) and sub_val.startswith("http"):
                        return sub_val
        return None
 # --------------------------------------------------------------------------
@@ -1,355 +0,0 @@
 """
 matches_web_app.py
 ==================
 Flask web UI for inspecting and editing the matches.json file produced by
 MatchesCache.
 Routes
 ------
 GET  /                       HTML table view (one row per cached match)
 GET  /api/matches            JSON dump of the full cache
 POST /api/matches            Upsert / rename an entry
                             body: {originalTitle?, title, mangabakaId,
                                    mangabakaName, imageUrl, firstMatchTime?}
 POST /api/matches/delete     Remove an entry      body: {title}
 POST /api/build              Trigger a full re-scan via SuwayomiMover.build_matches_only
                             (only available if a mover is wired in)
 The Title cell is rendered as a link to MangaBaka's search page, restricted
 to the manga / manhwa / manhua types.
 """
 from __future__ import annotations
 import threading
 from urllib.parse import quote_plus
 from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 _INDEX_HTML = """<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>MangaBaka matches</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
    table { border-collapse: collapse; width: 100%; }
    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
    tr:nth-child(even) td { background: #161616; }
    td.image img { max-width: 90px; max-height: 130px; display:block; }
    td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
    td.title a { color: #60a5fa; text-decoration: none; }
    td.title a:hover { text-decoration: underline; }
    td.actions { white-space: nowrap; }
    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
    .dirty td { background: #1f2937 !important; }
  </style>
 </head>
 <body>
 <h1>MangaBaka matches</h1>
 <div class="bar">
  <input id="filter" type="search" placeholder="Filter by title…">
  <button id="reload">Reload</button>
  <button id="build"  class="primary">Build all (rescan)</button>
  <span class="status" id="status"></span>
 </div>
 <table>
  <thead>
    <tr>
      <th>Title</th>
      <th>mangabakaId</th>
      <th>mangabakaName</th>
      <th>firstMatchTime</th>
      <th>Image</th>
      <th></th>
    </tr>
  </thead>
  <tbody id="rows"></tbody>
 </table>
 <script>
 const TYPES = "&type=manhwa&type=manhua&type=manga";
 function fmtTime(unix) {
  if (!unix) return "";
  const d = new Date(unix * 1000);
  return d.toLocaleString();
 }
 function searchUrl(title) {
  return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
 }
 function setStatus(msg) { document.getElementById("status").textContent = msg; }
 function makeRow(title, e) {
  const tr = document.createElement("tr");
  tr.dataset.originalTitle = title;
  const titleTd = document.createElement("td");
  titleTd.className = "title";
  const titleLink = document.createElement("a");
  titleLink.href = searchUrl(title);
  titleLink.target = "_blank";
  titleLink.rel = "noopener";
  titleLink.textContent = title;
  const titleInput = document.createElement("input");
  titleInput.value = title;
  titleInput.style.marginTop = ".25rem";
  titleInput.addEventListener("input", () => {
    titleLink.textContent = titleInput.value;
    titleLink.href = searchUrl(titleInput.value);
    tr.classList.add("dirty");
  });
  titleTd.append(titleLink, titleInput);
  tr.appendChild(titleTd);
  function field(value) {
    const td = document.createElement("td");
    const inp = document.createElement("input");
    inp.value = value || "";
    inp.addEventListener("input", () => tr.classList.add("dirty"));
    td.appendChild(inp);
    return [td, inp];
  }
  const [idTd,   idInp]   = field(e.mangabakaId);
  const [nameTd, nameInp] = field(e.mangabakaName);
  const [urlTd,  urlInp]  = field(e.imageUrl);
  tr.appendChild(idTd);
  tr.appendChild(nameTd);
  const timeTd = document.createElement("td");
  timeTd.textContent = fmtTime(e.firstMatchTime);
  tr.appendChild(timeTd);
  const imgTd = document.createElement("td");
  imgTd.className = "image";
  const img = document.createElement("img");
  img.src = e.imageUrl || "";
  img.alt = "";
  img.loading = "lazy";
  urlInp.addEventListener("input", () => { img.src = urlInp.value; });
  imgTd.append(img, urlInp);
  tr.appendChild(imgTd);
  const actTd = document.createElement("td");
  actTd.className = "actions";
  const save = document.createElement("button");
  save.textContent = "Save";
  save.className = "primary";
  save.addEventListener("click", async () => {
    save.disabled = true;
    setStatus("Saving " + titleInput.value + "…");
    const body = {
      originalTitle: tr.dataset.originalTitle,
      title:         titleInput.value,
      mangabakaId:   idInp.value,
      mangabakaName: nameInp.value,
      imageUrl:      urlInp.value,
    };
    try {
      const r = await fetch("/api/matches", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify(body),
      });
      if (!r.ok) throw new Error(await r.text());
      tr.dataset.originalTitle = titleInput.value;
      tr.classList.remove("dirty");
      setStatus("Saved " + titleInput.value);
    } catch (err) {
      setStatus("Save failed: " + err.message);
    } finally {
      save.disabled = false;
    }
  });
  const del = document.createElement("button");
  del.textContent = "Delete";
  del.className = "danger";
  del.style.marginLeft = ".25rem";
  del.addEventListener("click", async () => {
    if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
    setStatus("Deleting " + tr.dataset.originalTitle + "…");
    try {
      const r = await fetch("/api/matches/delete", {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({ title: tr.dataset.originalTitle }),
      });
      if (!r.ok) throw new Error(await r.text());
      tr.remove();
      setStatus("Deleted");
    } catch (err) {
      setStatus("Delete failed: " + err.message);
    }
  });
  actTd.append(save, del);
  tr.appendChild(actTd);
  return tr;
 }
 async function load() {
  setStatus("Loading…");
  const tbody = document.getElementById("rows");
  tbody.innerHTML = "";
  try {
    const r = await fetch("/api/matches");
    const data = await r.json();
    const matches = data.matches || {};
    const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
    for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
    setStatus(titles.length + " entries");
    applyFilter();
  } catch (err) {
    setStatus("Load failed: " + err.message);
  }
 }
 function applyFilter() {
  const q = document.getElementById("filter").value.toLowerCase();
  for (const tr of document.querySelectorAll("#rows tr")) {
    const t = tr.dataset.originalTitle.toLowerCase();
    tr.style.display = t.includes(q) ? "" : "none";
  }
 }
 document.getElementById("filter").addEventListener("input", applyFilter);
 document.getElementById("reload").addEventListener("click", load);
 document.getElementById("build").addEventListener("click", async () => {
  if (!confirm("Run full scan? This may take several minutes.")) return;
  setStatus("Building… (running on the server)");
  try {
    const r = await fetch("/api/build", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    setStatus("Build finished");
    load();
  } catch (err) {
    setStatus("Build failed: " + err.message);
  }
 });
 load();
 </script>
 </body>
 </html>
 """
 class MatchesWebApp:
    """
    Flask app exposing the MatchesCache. `mover` is optional — if provided,
    POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
    thread.
    """
    def __init__(self, cache: MatchesCache, *,
                 mover=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._mover = mover
        self._host = host
        self._port = port
        self._build_lock = threading.Lock()
        self._app = Flask(__name__)
        self._thread: "threading.Thread | None" = None
        self._register_routes()
    @property
    def app(self) -> Flask:
        return self._app
    def start(self) -> threading.Thread:
        """
        Starts the Flask server on a background thread and returns it.
        The thread is non-daemon so the process stays alive even when the
        caller does not explicitly join() — important when this is the
        only foreground task (e.g. watcher disabled for testing).
        """
        if self._thread is not None and self._thread.is_alive():
            return self._thread
        self._thread = threading.Thread(
            target=self._app.run,
            kwargs={"host": self._host, "port": self._port,
                    "debug": False, "use_reloader": False,
                    "threaded": True},
            name="MatchesWebApp",
            daemon=False,
        )
        self._thread.start()
        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
              flush=True)
        return self._thread
    def wait(self) -> None:
        """Blocks until the Flask thread exits (or returns immediately if not started)."""
        if self._thread is not None:
            self._thread.join()
    # ------------------------------------------------------------------
    # Routes
    # ------------------------------------------------------------------
    def _register_routes(self) -> None:
        app = self._app
        cache = self._cache
        @app.get("/")
        def index() -> Response:
            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
        @app.get("/api/matches")
        def api_list():
            return jsonify(cache.all())
        @app.post("/api/matches")
        def api_upsert():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            original = (body.get("originalTitle") or "").strip() or title
            if original != title:
                cache.rename(original, title)
            entry = cache.upsert(
                title,
                mangabaka_id=body.get("mangabakaId"),
                mangabaka_name=body.get("mangabakaName"),
                image_url=body.get("imageUrl"),
                first_match_time=body.get("firstMatchTime"),
            )
            return jsonify({"title": title, "entry": entry})
        @app.post("/api/matches/delete")
        def api_delete():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            removed = cache.remove(title)
            return jsonify({"removed": removed, "title": title})
        @app.post("/api/build")
        def api_build():
            if self._mover is None:
                return Response("no mover configured", status=503)
            if not self._build_lock.acquire(blocking=False):
                return Response("build already running", status=409)
            try:
                result = self._mover.build_matches_only()
            finally:
                self._build_lock.release()
            return jsonify(result)
@@ -0,0 +1,72 @@
 """
 text_utils.py
 =============
 Small text helpers shared across modules:
 * ``paragraphs_to_html`` — converts plain text with blank-line paragraph
  breaks into compact HTML (used for Kavita summary / description fields,
  which must not contain raw newlines).
 * ``best_similarity`` — best difflib ratio between a query string and a
  list of candidate strings (used for title / person-name matching).
 """
 from __future__ import annotations
 import difflib
 import re
 from typing import Iterable
 def paragraphs_to_html(text: str) -> str:
    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
    if not text:
        return ""
    parts: list[str] = []
    for para in re.split(r"\n{2,}", text.strip()):
        para = para.strip()
        if para:
            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
    return "".join(parts)
 def best_similarity(query: str, candidates: Iterable[str]) -> float:
    """
    Returns the best case-insensitive difflib similarity ratio (0..1)
    between `query` and any non-empty candidate.
    """
    q = (query or "").lower()
    best = 0.0
    for candidate in candidates:
        if candidate:
            ratio = difflib.SequenceMatcher(
                None, q, str(candidate).lower()).ratio()
            best = max(best, ratio)
    return best
 def person_name_with_id(name: str, *,
                        mal_id: "int | None" = None,
                        al_id:  "int | None" = None) -> str:
    """
    Disambiguates a character name with its tracker id: "Rem (MAL 118737)".
    Kavita Person records are global and keyed by name only, so two
    different characters who share a name would collapse into one record.
    Suffixing the tracker *character* id keeps them apart while still
    sharing the record across the manga and light-novel version of the
    same series (MAL/AniList character ids are per character, not per
    medium).  MAL is preferred; AniList ids get an "AL" marker so the two
    id spaces cannot collide.  Without any id the name is returned as-is.
    The format must stay in sync with the manga project so both tools
    address the same Kavita person records.
    """
    name = (name or "").strip()
    if not name:
        return name
    if mal_id:
        return f"{name} (MAL {mal_id})"
    if al_id:
        return f"{name} (AL {al_id})"
    return name
@@ -0,0 +1,313 @@
 """
 kavita_series_updater.py
 ========================
 Diff-based update of a single Kavita series record from a
 LightNovelMetadataBuilder output dict.
 Behaviour
 ---------
 * Locked fields in Kavita (``*Locked`` flags) are never touched, no matter
  what MangaBaka returns.
 * Scalar fields (summary, releaseYear, ageRating, publicationStatus,
  language, score, sortName, localizedName) are overwritten when the
  newly-built value differs from the value currently stored in Kavita.
 * List fields (genres, tags, characters, writers, coverArtists,
  publishers, imprints) are diff-merged: a name appearing in the new
  set but not in the current one is added (id=0 so Kavita creates the
  record); a name that is in Kavita but no longer in the new set is
  dropped.  Comparison is case-insensitive on the ``name`` field.
 * Web links are stored as a comma-separated string in Kavita; this
  updater treats them as a set and re-joins on write.
 * Series-level cover image (URL different from last time) is re-uploaded
  whenever ``coverImageLocked`` is False.  The MangaBaka cover URL is
  stamped onto matches.json as ``imageUrl`` so a subsequent run can skip
  the upload when nothing changed.
 Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per
 series so the WebApp can surface what happened.
 """
 from __future__ import annotations
 from typing import Iterable
 from KavitaClient import KavitaClient
 # Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key).
 # `item_key` is the dict key Kavita uses for the display name on each item:
 # GenreTagDto / TagDto use "title", PersonDto uses "name".
 _LIST_FIELDS: list[tuple[str, str, str]] = [
    ("genres",       "genresLocked",      "title"),
    ("tags",         "tagsLocked",        "title"),
    ("characters",   "characterLocked",   "name"),
    ("writers",      "writerLocked",      "name"),
    ("coverArtists", "coverArtistLocked", "name"),
    ("publishers",   "publisherLocked",   "name"),
    ("imprints",     "imprintLocked",     "name"),
 ]
 def _norm(name: str) -> str:
    return (name or "").strip().lower()
 def _merge_list(
    current: list[dict],
    new_names: Iterable[str],
    item_key: str,
 ) -> "tuple[list[dict], bool]":
    """
    Diff-merges a Kavita list field with the canonical name list from
    MangaBaka.  Returns (merged_list, changed_flag).
    `item_key` is the dict key Kavita uses for the display name on each
    item ("title" for GenreTagDto/TagDto, "name" for PersonDto).
    * Items in `current` whose display value appears in `new_names` are
      kept verbatim so existing ids and ancillary fields survive.
    * New names (no matching entry in `current`) are appended with
      ``{"id": 0, <item_key>: <name>}`` — Kavita creates the record on save.
    * Items in `current` whose display value is *not* in `new_names` are
      dropped.
    """
    new_set = [n for n in new_names if n and n.strip()]
    new_index = {_norm(n): n.strip() for n in new_set}
    merged: list[dict] = []
    kept_keys: set[str] = set()
    for item in (current or []):
        key = _norm(item.get(item_key))
        if key in new_index:
            merged.append(item)
            kept_keys.add(key)
    added = False
    for key, display in new_index.items():
        if key not in kept_keys:
            merged.append({"id": 0, item_key: display})
            added = True
    removed = len(current or []) != len(kept_keys)
    return merged, added or removed
 def _parse_web_links(value) -> list[str]:
    if not value:
        return []
    if isinstance(value, list):
        return [str(v).strip() for v in value if v]
    return [p.strip() for p in str(value).split(",") if p.strip()]
 def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]":
    current = _parse_web_links(current_str)
    new_norm = [l for l in new_links if l]
    if not new_norm:
        return ",".join(current), False
    # Mirror MangaBaka's set: keep order from new_norm, then anything from
    # current that's still in new_norm (already covered above).  Anything
    # in current that's not in new_norm is dropped.
    new_set = set(new_norm)
    merged = list(new_norm)
    changed = sorted(new_set) != sorted(set(current))
    return ",".join(merged), changed
 class KavitaSeriesUpdater:
    def __init__(self, client: KavitaClient):
        self._client = client
    # ------------------------------------------------------------------
    # Public
    # ------------------------------------------------------------------
    def update_series(self, series_id: int, built: dict, *,
                      previous_cover_url: "str | None" = None) -> dict:
        """
        Applies the diff between Kavita's current state for `series_id`
        and the freshly-built MangaBaka dict.  Returns a per-field diff
        report.
        """
        series   = self._client.get_series(series_id)
        metadata = self._client.get_series_metadata(series_id)
        report: dict = {}
        meta_changed = self._diff_metadata(metadata, built, report)
        if meta_changed:
            self._client.update_series_metadata(metadata)
        series_changed = self._diff_series(series, built, report)
        if series_changed:
            self._client.update_series(series)
        # Cover: only re-upload when not locked AND URL actually changed.
        new_cover = built.get("coverUrl")
        if (new_cover
                and not series.get("coverImageLocked")
                and new_cover != previous_cover_url):
            try:
                self._client.upload_series_cover(series_id, new_cover)
                report["coverImage"] = "changed"
            except Exception as exc:
                report["coverImage"] = f"error: {exc}"
        elif series.get("coverImageLocked"):
            report["coverImage"] = "locked"
        else:
            report["coverImage"] = "skipped"
        return report
    # ------------------------------------------------------------------
    # Internal: SeriesMetadataDto
    # ------------------------------------------------------------------
    def _diff_metadata(self, metadata: dict, built: dict,
                       report: dict) -> bool:
        changed = False
        # ----- Scalars ------------------------------------------------
        # (built_key, metadata_key, locked_key, transform, skip_when_zero)
        # `skip_when_zero` covers fields where 0 means "no data" rather
        # than a real value (releaseYear, ageRating).  publicationStatus 0
        # is a valid "Ongoing" status — never skip it.
        scalar_map = [
            ("summary",            "summary",            "summaryLocked",            None, False),
            ("releaseYear",        "releaseYear",        "releaseYearLocked",        int,  True),
            ("ageRating",          "ageRating",          "ageRatingLocked",          int,  True),
            ("publicationStatus",  "publicationStatus",  "publicationStatusLocked",  int,  False),
            ("language",           "language",           "languageLocked",           None, False),
        ]
        for built_key, meta_key, locked_key, transform, skip_zero in scalar_map:
            new_val = built.get(built_key)
            if new_val is None or new_val == "":
                report[meta_key] = "skipped"
                continue
            if transform is not None:
                try:
                    new_val = transform(new_val)
                except (TypeError, ValueError):
                    report[meta_key] = "skipped"
                    continue
            if skip_zero and new_val == 0:
                report[meta_key] = "skipped"
                continue
            if metadata.get(locked_key):
                report[meta_key] = "locked"
                continue
            if metadata.get(meta_key) != new_val:
                metadata[meta_key] = new_val
                changed = True
                report[meta_key] = "changed"
            else:
                report[meta_key] = "unchanged"
        # ----- Web links (single comma-separated string) ---------------
        # SeriesMetadataDto has no dedicated lock for webLinks — always update.
        web_str, web_changed = _merge_web_links(
            metadata.get("webLinks"), built.get("webLinks") or [])
        if web_changed:
            metadata["webLinks"] = web_str
            changed = True
            report["webLinks"] = "changed"
        else:
            report["webLinks"] = "unchanged"
        # ----- List fields --------------------------------------------
        list_map = {
            "genres":       built.get("genres"),
            "tags":         built.get("tags"),
            "characters":   built.get("characters"),
            "writers":      built.get("writers"),
            "coverArtists": built.get("coverArtists"),
            "publishers":   built.get("publishers"),
            "imprints":     [built["imprint"]] if built.get("imprint") else [],
        }
        for meta_key, locked_key, item_key in _LIST_FIELDS:
            new_names = list_map.get(meta_key) or []
            if metadata.get(locked_key):
                report[meta_key] = "locked"
                continue
            if not new_names and not (metadata.get(meta_key) or []):
                report[meta_key] = "unchanged"
                continue
            merged, list_changed = _merge_list(
                metadata.get(meta_key) or [], new_names, item_key)
            if list_changed:
                metadata[meta_key] = merged
                changed = True
                report[meta_key] = "changed"
            else:
                report[meta_key] = "unchanged"
        return changed
    # ------------------------------------------------------------------
    # Internal: SeriesDto (sortName, userRating, tracker ids)
    # ------------------------------------------------------------------
    def _diff_series(self, series: dict, built: dict, report: dict) -> bool:
        changed = False
        # sortName / localizedName
        if not series.get("sortNameLocked"):
            new_sort = built.get("sortName") or ""
            if new_sort and series.get("sortName") != new_sort:
                series["sortName"] = new_sort
                changed = True
                report["sortName"] = "changed"
            else:
                report["sortName"] = "unchanged"
        else:
            report["sortName"] = "locked"
        if not series.get("localizedNameLocked"):
            new_loc = built.get("localizedName") or ""
            if new_loc and series.get("localizedName") != new_loc:
                series["localizedName"] = new_loc
                changed = True
                report["localizedName"] = "changed"
            else:
                report["localizedName"] = "unchanged"
        else:
            report["localizedName"] = "locked"
        # Tracker ids — Kavita exposes malId, aniListId, mangaBakaId
        for built_key, series_key in (
            ("malId",       "malId"),
            ("anilistId",   "aniListId"),
            ("mangabakaId", "mangaBakaId"),
        ):
            new_val = built.get(built_key)
            if new_val in (None, "", 0):
                continue
            try:
                new_int = int(new_val)
            except (TypeError, ValueError):
                continue
            if int(series.get(series_key) or 0) != new_int:
                series[series_key] = new_int
                changed = True
                report[series_key] = "changed"
        # userRating from MangaBaka (0..5)
        new_score = built.get("score")
        if new_score is not None:
            try:
                new_score = float(new_score)
            except (TypeError, ValueError):
                new_score = None
        if new_score is not None:
            current_score = series.get("userRating")
            try:
                current_score = float(current_score) if current_score is not None else None
            except (TypeError, ValueError):
                current_score = None
            if current_score != new_score:
                series["userRating"] = new_score
                series["hasUserRated"] = True
                changed = True
                report["userRating"] = "changed"
            else:
                report["userRating"] = "unchanged"
        return changed
@@ -0,0 +1,571 @@
 """
 light_novel_metadata_builder.py
 ===============================
 Fetches series-level metadata for a light novel from MangaBaka, enriches
 it with MyAnimeList / AniList tracker statistics and character data, and
 returns a structured dict ready to be diffed against Kavita's
 SeriesMetadataDto.
 Differences vs. the manga project's ComicInfoBuilder:
  - No chapter / page handling — Kavita reads volumes from the files.
  - No XML output — produces a plain dict.
  - No MangaDex resolver — light novels don't have a chapter→volume
    mapping problem.
  - MangaBaka search type is fixed to ``novel`` so only light/web novels
    are returned.
 """
 from __future__ import annotations
 import re
 import requests
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from TextUtils import paragraphs_to_html, person_name_with_id
 # MangaBaka series type for the search endpoint.
 _SEARCH_TYPES = ["novel"]
 # MangaBaka content_rating  ->  Kavita AgeRating enum
 # Kavita AgeRating values (from openapi.json):
 #   0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly
 _AGE_RATING_MAP = {
    "safe":          3,    # Everyone
    "suggestive":    8,    # Teen
    "erotica":       10,   # Mature17Plus
    "pornographic":  13,   # AdultsOnly
 }
 # MangaBaka status  ->  Kavita PublicationStatus enum
 # Kavita PublicationStatus (from openapi.json):
 #   0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended
 _PUB_STATUS_MAP = {
    "ongoing":   0,
    "hiatus":    1,
    "completed": 2,
    "cancelled": 3,
    "ended":     4,
 }
 # External-tracker URL templates used to enrich the web-links list.
 _TRACKER_URL_TEMPLATES = {
    "anilist":          "https://anilist.co/manga/{id}",
    "myanimelist":      "https://myanimelist.net/manga/{id}",
    "mal":              "https://myanimelist.net/manga/{id}",
    "mangaupdates":     "https://www.mangaupdates.com/series.html?id={id}",
    "kitsu":            "https://kitsu.app/manga/{id}",
    "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
    "ann":              "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
    "animeplanet":      "https://www.anime-planet.com/manga/{id}",
    "shikimori":        "https://shikimori.one/mangas/{id}",
    "bookwalker":       "https://bookwalker.jp/{id}",
 }
 _MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
 # --------------------------------------------------------------------------
 # Helpers
 # --------------------------------------------------------------------------
 def _normalise_key(key) -> str:
    return re.sub(r"[^a-z0-9]", "", str(key).lower())
 def _format_term(value: str) -> str:
    return str(value).replace("_", " ").strip().title() if value else ""
 def _md_to_html(text: str) -> str:
    """Converts the subset of Markdown produced by MangaBaka to compact HTML."""
    if not text:
        return ""
    text = _MD_ESCAPE_RE.sub(r'\1', text)
    text = re.sub(
        r'\[([^\]]+)\]\(([^)]+)\)',
        lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
        text,
    )
    text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
    text = re.sub(r'\*(.+?)\*',     r'<em>\1</em>',         text, flags=re.DOTALL)
    return paragraphs_to_html(text)
 def pick_cover_url(cover) -> "str | None":
    """Selects the best cover URL from a MangaBaka cover object."""
    if not cover:
        return None
    if isinstance(cover, str):
        return cover
    if not isinstance(cover, dict):
        return None
    raw = cover.get("raw")
    if isinstance(raw, dict):
        url = raw.get("url")
        if isinstance(url, str) and url:
            return url
    elif isinstance(raw, str) and raw:
        return raw
    for size_key in ("x350", "x250", "x150"):
        variant = cover.get(size_key)
        if isinstance(variant, dict):
            for density in ("x3", "x2", "x1"):
                url = variant.get(density)
                if isinstance(url, str) and url:
                    return url
        elif isinstance(variant, str) and variant:
            return variant
    for val in cover.values():
        if isinstance(val, str) and val.startswith("http"):
            return val
        if isinstance(val, dict):
            for sub in val.values():
                if isinstance(sub, str) and sub.startswith("http"):
                    return sub
    return None
 def pick_thumbnail_url(cover) -> "str | None":
    """Picks a small cover variant suitable for a UI thumbnail."""
    if not cover:
        return None
    if isinstance(cover, str):
        return cover
    if not isinstance(cover, dict):
        return None
    for size_key in ("x150", "x250", "x350"):
        variant = cover.get(size_key)
        if isinstance(variant, dict):
            for density in ("x2", "x1", "x3"):
                url = variant.get(density)
                if isinstance(url, str) and url:
                    return url
        elif isinstance(variant, str) and variant:
            return variant
    return pick_cover_url(cover)
 def _id_from_source(md: dict, *names: str) -> "int | None":
    target = {_normalise_key(n) for n in names}
    for raw_key, info in (md.get("source") or {}).items():
        if _normalise_key(raw_key) in target and isinstance(info, dict):
            mid = info.get("id")
            if mid is not None:
                try:
                    return int(mid)
                except (TypeError, ValueError):
                    pass
    return None
 # --------------------------------------------------------------------------
 # Builder
 # --------------------------------------------------------------------------
 class LightNovelMetadataBuilder:
    """
    Resolves a light-novel series on MangaBaka and produces a structured
    metadata dict ready to be merged into Kavita.
    """
    def __init__(self, *,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 language: str = "en",
                 request_timeout: int = 30,
                 session: "requests.Session | None" = None,
                 mal_resolver: "MALResolver | None" = None,
                 al_resolver: "AniListResolver | None" = None,
                 matches_cache: "MatchesCache | None" = None):
        self.api_base_url = api_base_url.rstrip("/")
        self.language = language
        self.request_timeout = request_timeout
        self._session = session or requests.Session()
        self._session.headers.setdefault("User-Agent",
                                         "LightNovelMetadataBuilder/1.0")
        _apply_mangabaka_rate_limit(self._session)
        self._mal = mal_resolver or MALResolver(
            request_timeout=request_timeout, search_type="lightnovel")
        self._al  = al_resolver  or AniListResolver(
            request_timeout=request_timeout, media_format="novel")
        self._matches_cache = matches_cache
    # ------------------------------------------------------------------
    # MangaBaka search / fetch
    # ------------------------------------------------------------------
    def search_series(self, title: str) -> "dict | None":
        """Returns the top MangaBaka novel hit for `title`, or None."""
        if not title or not title.strip():
            return None
        url = f"{self.api_base_url}/series/search"
        try:
            resp = self._session.get(
                url, params={"q": title, "type": _SEARCH_TYPES,
                             "page": 1, "limit": 1},
                timeout=self.request_timeout)
            resp.raise_for_status()
        except requests.RequestException:
            return None
        data = resp.json().get("data") or []
        return data[0] if data else None
    def fetch_series(self, series_id) -> "dict | None":
        """
        Returns the full MangaBaka series dict for the given id, following
        ``merged_with`` redirects.  A seen-set guards against merge cycles.
        """
        if series_id is None or str(series_id).strip() == "":
            return None
        seen: set[str] = set()
        current = series_id
        while str(current) not in seen:
            seen.add(str(current))
            url = f"{self.api_base_url}/series/{current}"
            resp = self._session.get(url, timeout=self.request_timeout)
            resp.raise_for_status()
            data = resp.json().get("data")
            if data and data.get("state") == "merged" and data.get("merged_with"):
                current = data["merged_with"]
                continue
            return data
        return None
    # ------------------------------------------------------------------
    # Resolve title -> MangaBaka series (caches the match)
    # ------------------------------------------------------------------
    def resolve(self, title: str) -> "dict | None":
        """
        Returns the MangaBaka series for `title`.
        Lookup order:
          1. MatchesCache (uses stored mangabakaId, skips the search).
          2. Fresh MangaBaka search — top hit. Result is persisted to the
             cache so it survives a crash.
        """
        if self._matches_cache is not None:
            cached = self._matches_cache.get(title)
            if cached and cached.get("mangabakaId"):
                try:
                    series = self.fetch_series(cached["mangabakaId"])
                    if series:
                        return series
                except Exception:
                    pass
        series = self.search_series(title)
        if series and self._matches_cache is not None:
            self._matches_cache.upsert(
                title,
                mangabaka_id=series.get("id"),
                mangabaka_name=series.get("title") or "",
                image_url=pick_thumbnail_url(series.get("cover")),
            )
        return series
    # ------------------------------------------------------------------
    # Main entry point
    # ------------------------------------------------------------------
    def build(self, *, title: str = "",
              mangabaka_id=None) -> "dict | None":
        """
        Fetches and enriches metadata for one series, returning the
        normalised dict described in the module docstring.
        Pass either `title` (will resolve via cache/search) or
        `mangabaka_id` (direct fetch).
        """
        if mangabaka_id is not None and str(mangabaka_id).strip():
            md = self.fetch_series(mangabaka_id)
        else:
            md = self.resolve(title)
        if not md:
            return None
        return self._assemble(md)
    # ------------------------------------------------------------------
    # Internal: assemble the result dict
    # ------------------------------------------------------------------
    def _assemble(self, md: dict) -> dict:
        mal_id = _id_from_source(md, "myanimelist", "mal")
        al_id  = _id_from_source(md, "anilist")
        # Fall back to a title-based MAL lookup when the source map does
        # not carry an id — Jikan is the only tracker that ships staff
        # data we can use to enrich author / artist person records.
        if mal_id is None:
            mal_id = self._mal.find_mal_id(md.get("title") or "")
        mal_stats = self._mal.get_stats(mal_id) if mal_id else None
        characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else []
        if not characters_detailed and al_id:
            characters_detailed = self._al.get_characters_detailed(al_id)
        staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else []
        if not staff_detailed and al_id:
            staff_detailed = self._al.get_staff_detailed(al_id)
        # Character names for SeriesMetadata, disambiguated with the
        # tracker character id ("Rem (MAL 118737)") because Kavita person
        # records are global and keyed by name only.
        character_names = [
            person_name_with_id(c["name"],
                                mal_id=c.get("mal_id"),
                                al_id=c.get("al_id"))
            for c in characters_detailed if c.get("name")
        ]
        # Writers come from MangaBaka first (authoritative for novels)
        writers = list(md.get("authors") or [])
        # Illustrators / artists -> CoverArtists (Kavita has no dedicated
        # illustrator field, and Pencillers is the wrong semantic for
        # text-only novels).
        cover_artists = list(md.get("artists") or [])
        # Publisher: prefer English licence, else original.  When both
        # exist, the original publisher becomes the imprint.
        english_pubs  = self._publishers_by_type(md, "English")
        original_pubs = self._publishers_by_type(md, "Original")
        publishers = english_pubs or original_pubs
        imprint = original_pubs[0] if english_pubs and original_pubs else None
        # Release year
        release_year = None
        try:
            if md.get("year") is not None:
                release_year = int(md["year"])
        except (TypeError, ValueError):
            pass
        # Score: MangaBaka rating is 0..100  ->  Kavita userRating is 0..5
        score = None
        if md.get("rating") is not None:
            try:
                score = round(float(md["rating"]) / 20.0, 1)
            except (TypeError, ValueError):
                pass
        # Tags / genres come back as snake_case slugs.
        genres = [_format_term(g) for g in (md.get("genres") or []) if g]
        tags   = [_format_term(t) for t in (md.get("tags")   or []) if t]
        # Web links
        web_links = self._collect_web_links(md)
        # Summary HTML
        summary = self._build_summary(md, mal_stats)
        # Cover URL
        cover_url = pick_cover_url(md.get("cover"))
        # Title variants
        all_alt = self._collect_all_alt_titles(md)
        return {
            "mangabakaId":     str(md.get("id") or ""),
            "mangabakaTitle":  md.get("title") or "",
            "originalName":    md.get("native_title") or "",
            "localizedName":   md.get("romanized_title") or "",
            "sortName":        self._sort_title(md),
            "altTitles":       all_alt,
            "summary":         summary,
            "genres":          genres,
            "tags":            tags,
            "characters":      character_names,
            "writers":         writers,
            "coverArtists":    cover_artists,
            "publishers":      publishers,
            "imprint":         imprint,
            "releaseYear":     release_year,
            "ageRating":       _AGE_RATING_MAP.get(md.get("content_rating"), 0),
            "publicationStatus": _PUB_STATUS_MAP.get(
                (md.get("status") or "").lower(), 0),
            "language":        self.language,
            "webLinks":        web_links,
            "score":           score,
            "coverUrl":        cover_url,
            "malId":           mal_id,
            "anilistId":       al_id,
            "relationships":   list(md.get("relationships_v2") or []),
            "charactersDetailed": characters_detailed,
            "staffDetailed":   staff_detailed,
            "raw":             md,
        }
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _publishers_by_type(md: dict, ptype: str) -> list[str]:
        return [p.get("name") for p in (md.get("publishers") or [])
                if p.get("type") == ptype and p.get("name")]
    def _sort_title(self, md: dict) -> str:
        lang = self.language.lower()
        alts = self._collect_alt_titles(md)
        return alts.get(lang) or md.get("title") or ""
    def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
        """Returns one best title per language code (en/de/jp/romaji)."""
        titles = md.get("titles") or md.get("alt_titles") or []
        def pick(language_codes: tuple, prefer_trait: "str | None" = None
                 ) -> "str | None":
            best_score = -1
            best_title: "str | None" = None
            for entry in titles:
                if not isinstance(entry, dict):
                    continue
                lang = (entry.get("language") or entry.get("lang") or "").lower()
                if lang not in language_codes:
                    continue
                title = entry.get("title")
                if not title:
                    continue
                traits = entry.get("traits") or []
                score = 0
                if prefer_trait and prefer_trait in traits:
                    score += 4
                if "official" in traits:
                    score += 2
                if entry.get("is_primary"):
                    score += 1
                if score > best_score:
                    best_score, best_title = score, title
            return best_title
        result: dict[str, str] = {}
        kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
        if kanji:
            result["jp"] = kanji
        romaji = pick(("ja-latn", "ja-romaji"))
        if not romaji:
            rt = md.get("romanized_title") or ""
            if rt and all(ord(c) < 128 for c in rt):
                romaji = rt
        if romaji:
            result["romaji"] = romaji
        en = pick(("en",)) or md.get("title")
        if en:
            result["en"] = en
        de = pick(("de",))
        if de:
            result["de"] = de
        return result
    @staticmethod
    def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]":
        _GROUPS = {
            "en":        ("en",),
            "de":        ("de",),
            "ja":        ("ja",),
            "ja-romaji": ("ja-latn", "ja-romaji"),
            "ko":        ("ko",),
            "ko-romaji": ("ko-latn", "ko-romaji"),
            "zh":        ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
            "zh-romaji": ("zh-latn",),
        }
        lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls}
        result: dict[str, list[str]] = {}
        seen:   dict[str, set]       = {}
        for entry in (md.get("titles") or md.get("alt_titles") or []):
            if not isinstance(entry, dict):
                continue
            lang  = (entry.get("language") or entry.get("lang") or "").lower()
            group = lang_to_group.get(lang)
            if not group:
                continue
            title = (entry.get("title") or "").strip()
            if not title:
                continue
            result.setdefault(group, [])
            seen.setdefault(group, set())
            if title not in seen[group]:
                result[group].append(title)
                seen[group].add(title)
        return result
    def _collect_web_links(self, md: dict) -> list[str]:
        links: list[str] = [l for l in (md.get("links") or []) if l]
        for raw_key, info in (md.get("source") or {}).items():
            template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
            if not template or not isinstance(info, dict):
                continue
            source_id = info.get("id")
            if source_id is not None:
                links.append(template.format(id=source_id))
        seen: set[str] = set()
        unique: list[str] = []
        for link in links:
            if link not in seen:
                seen.add(link)
                unique.append(link)
        return unique
    def _build_summary(self, md: dict,
                       mal_stats: "dict | None") -> str:
        """Builds the HTML summary with stats table + description + alt titles."""
        _TD = 'style="padding-right:1.5em"'
        parts: list[str] = []
        if mal_stats:
            url     = mal_stats.get("url", "")
            as_of   = mal_stats.get("as_of", "")
            rows: list[str] = []
            for label, key, fmt in (
                ("Score",     "score",      "{}"),
                ("Ranked",    "rank",       "#{}"),
                ("Scored by", "scored_by",  "{:,} users"),
                ("Popularity","popularity", "#{}"),
                ("Members",   "members",    "{:,}"),
                ("Favorites", "favorites",  "{:,}"),
            ):
                v = mal_stats.get(key)
                if v is None:
                    continue
                try:
                    formatted = fmt.format(v)
                except (TypeError, ValueError):
                    formatted = str(v)
                rows.append(f"<tr><td {_TD}>{label}</td><td>{formatted}</td></tr>")
            if rows:
                link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
                parts.append(f"<p>{link} stats as of {as_of}:</p>"
                             f"<table>{''.join(rows)}</table>")
        desc_raw = (md.get("description") or "").strip()
        if desc_raw:
            parts.append(_md_to_html(desc_raw))
        all_alt = self._collect_all_alt_titles(md)
        if all_alt:
            label_map = {
                "en":        "EN",
                "de":        "DE",
                "ja":        "JA",
                "ja-romaji": "JA Romaji",
                "ko":        "KO",
                "ko-romaji": "KO Romaji",
                "zh":        "ZH",
                "zh-romaji": "ZH Romaji",
            }
            alt_rows: list[str] = []
            for group in ("en", "de", "ja", "ja-romaji",
                          "ko", "ko-romaji", "zh", "zh-romaji"):
                titles = all_alt.get(group)
                if not titles:
                    continue
                cell = "<br>".join(titles)
                alt_rows.append(
                    f"<tr><td {_TD}>{label_map[group]}</td><td>{cell}</td></tr>")
            if alt_rows:
                parts.append(f"<table>{''.join(alt_rows)}</table>")
        return "<br>".join(parts)
@@ -0,0 +1,260 @@
 """
 light_novel_orchestrator.py
 ===========================
 High-level workflow on top of the resolvers, the Kavita client and the
 diff-based updaters.  Exposes three operations to the WebApp:
  - build_matches(library_ids):
        Scan one or more Kavita libraries, resolve every series against
        MangaBaka and persist the match in matches.json.
  - update_series(kavita_series_id):
        Re-fetch MangaBaka, MAL and AniList data for a single Kavita
        series and apply the diff (metadata + persons + relationships).
  - update_all(library_ids):
        Run update_series for every series that has a match in the
        cache and lives in the given libraries.
 A single shared HTTP session (rate-limited for MangaBaka) and shared
 resolver singletons are used across the whole run to maximise cache
 hits.
 """
 from __future__ import annotations
 import requests
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from KavitaClient import KavitaClient
 from KavitaPersonUpdater import KavitaPersonUpdater
 from KavitaSeriesUpdater import KavitaSeriesUpdater
 from LightNovelMetadataBuilder import (
    LightNovelMetadataBuilder,
    pick_thumbnail_url,
 )
 from RelationshipSync import RelationshipSync
 class LightNovelOrchestrator:
    def __init__(self, *,
                 kavita_url: str,
                 kavita_api_key: str,
                 matches_cache: MatchesCache,
                 language: str = "en",
                 request_timeout: int = 30,
                 api_base_url: str = "https://api.mangabaka.dev/v1"):
        self._cache = matches_cache
        self._timeout = request_timeout
        session = requests.Session()
        session.headers.setdefault("User-Agent",
                                   "KavitaLightNovelOrchestrator/1.0")
        _apply_mangabaka_rate_limit(session)
        self._session = session
        # First construction in the LN container — pins the singletons to
        # light-novel search mode (manga container uses the defaults).
        self._mal = MALResolver(request_timeout=request_timeout,
                                search_type="lightnovel")
        self._al  = AniListResolver(request_timeout=request_timeout,
                                    media_format="novel")
        self._client = KavitaClient(kavita_url, kavita_api_key,
                                    request_timeout=request_timeout)
        self._builder = LightNovelMetadataBuilder(
            api_base_url=api_base_url,
            language=language,
            request_timeout=request_timeout,
            session=session,
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=matches_cache,
        )
        self._series_updater = KavitaSeriesUpdater(self._client)
        self._person_updater = KavitaPersonUpdater(
            self._client,
            mal_resolver=self._mal,
            al_resolver=self._al,
        )
        self._relation_sync = RelationshipSync(
            self._client, matches_cache, builder=self._builder)
    # ------------------------------------------------------------------
    # Library listings
    # ------------------------------------------------------------------
    def list_libraries(self) -> list[dict]:
        return self._client.list_libraries()
    def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]:
        result: list[dict] = []
        for lib_id in library_ids:
            try:
                result.extend(self._client.list_series_in_library(int(lib_id)))
            except Exception as exc:
                print(f"[orchestrator] library {lib_id} list failed: {exc}",
                      flush=True)
        return result
    # ------------------------------------------------------------------
    # Matching
    # ------------------------------------------------------------------
    def build_matches(self, library_ids: list[int]) -> dict:
        """
        Resolves every series in the given libraries against MangaBaka.
        Series already present in matches.json keep their stored
        mangabakaId; the kavitaSeriesId + libraryId fields are refreshed
        in case the user moved a series between libraries.
        """
        stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0}
        for series in self.list_series_in_libraries(library_ids):
            title = (series.get("name") or "").strip()
            if not title:
                continue
            stats["checked"] += 1
            kavita_id = int(series.get("id") or 0)
            library_id = int(series.get("libraryId") or 0)
            cached = self._cache.get(title)
            if cached and cached.get("mangabakaId"):
                self._cache.upsert(
                    title,
                    kavita_series_id=kavita_id,
                    library_id=library_id,
                )
                stats["skipped"] += 1
                continue
            mb_series = self._builder.search_series(title)
            if not mb_series:
                self._cache.upsert(
                    title,
                    kavita_series_id=kavita_id,
                    library_id=library_id,
                )
                stats["missing"] += 1
                print(f"[match] {title!r}: no MangaBaka hit", flush=True)
                continue
            self._cache.upsert(
                title,
                mangabaka_id=mb_series.get("id"),
                mangabaka_name=mb_series.get("title") or "",
                image_url=pick_thumbnail_url(mb_series.get("cover")),
                kavita_series_id=kavita_id,
                library_id=library_id,
            )
            stats["matched"] += 1
            print(f"[match] {title!r} -> {mb_series.get('title')!r} "
                  f"(id={mb_series.get('id')})", flush=True)
        return stats
    # ------------------------------------------------------------------
    # Updating
    # ------------------------------------------------------------------
    def update_series(self, kavita_series_id: int) -> dict:
        """Runs a full metadata update for a single Kavita series."""
        hit = self._cache.get_by_kavita_id(int(kavita_series_id))
        if not hit:
            # Try to resolve via the Kavita series name on the fly.
            series = self._client.get_series(int(kavita_series_id))
            title = (series.get("name") or "").strip()
            if not title:
                return {"ok": False, "error": "series not in matches.json"}
            built = self._builder.build(title=title)
            if not built:
                return {"ok": False, "error": "no MangaBaka match"}
            self._cache.upsert(
                title,
                mangabaka_id=built.get("mangabakaId"),
                mangabaka_name=built.get("mangabakaTitle"),
                image_url=built.get("coverUrl"),
                kavita_series_id=int(kavita_series_id),
                library_id=int(series.get("libraryId") or 0),
            )
            cached_title = title
            cached_entry = self._cache.get(title) or {}
        else:
            cached_title, cached_entry = hit
            built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId"))
            if not built:
                return {"ok": False, "error": "mangabaka id no longer resolvable"}
        prev_cover = cached_entry.get("imageUrl") or ""
        try:
            series_report = self._series_updater.update_series(
                int(kavita_series_id), built,
                previous_cover_url=prev_cover,
            )
        except Exception as exc:
            return {"ok": False, "error": f"series update failed: {exc}"}
        # Persons
        try:
            person_report = self._person_updater.update_for_manga(
                built.get("malId"),
                al_manga_id=built.get("anilistId"),
            )
        except Exception as exc:
            person_report = {"error": str(exc)}
        # Relationships + collection
        try:
            relation_report = self._relation_sync.sync(
                int(kavita_series_id), built)
        except Exception as exc:
            relation_report = {"error": str(exc)}
        # Stamp the new cover URL on the cache so the next run knows when
        # to re-upload.
        self._cache.upsert(
            cached_title,
            image_url=built.get("coverUrl") or prev_cover,
        )
        self._cache.mark_updated(cached_title)
        return {
            "ok":            True,
            "title":         cached_title,
            "mangabakaId":   built.get("mangabakaId"),
            "series":        series_report,
            "persons":       person_report,
            "relationships": relation_report,
        }
    def update_all(self, library_ids: "list[int] | None") -> dict:
        """Updates every cached series in the given libraries."""
        if library_ids is None:
            entries = self._cache.all()["matches"]
        else:
            entries = self._cache.all_in_libraries(library_ids)["matches"]
        results: list[dict] = []
        ok = fail = 0
        for title, entry in entries.items():
            ksid = int(entry.get("kavitaSeriesId") or 0)
            if not ksid or not entry.get("mangabakaId"):
                continue
            try:
                res = self.update_series(ksid)
            except Exception as exc:
                res = {"ok": False, "error": str(exc)}
            res["title"] = title
            results.append(res)
            if res.get("ok"):
                ok += 1
            else:
                fail += 1
            print(f"[update] {title!r}: "
                  f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}",
                  flush=True)
        return {"ok": ok, "failed": fail, "results": results}
    # ------------------------------------------------------------------
    # Direct helpers exposed to the WebApp
    # ------------------------------------------------------------------
    def fetch_series(self, mangabaka_id) -> "dict | None":
        return self._builder.fetch_series(mangabaka_id)
@@ -2,26 +2,30 @@
 matches_cache.py
 ================
-Persistent JSON cache that maps a Suwayomi/series search title to the
+Persistent JSON cache that maps a Kavita series title to the MangaBaka
-MangaBaka series it was matched against.
+series it was matched against, plus enough context to update the right
 Kavita record later.
 Structure on disk::
    {
      "matches": {
-        "<search title>": {
+        "<kavita series name>": {
          "mangabakaId":      "12345",
-          "mangabakaName":  "One-Punch Man",
+          "mangabakaName":    "Re:Zero",
          "imageUrl":         "https://.../cover.jpg",
-          "firstMatchTime": 1700000000
+          "kavitaSeriesId":   42,
          "libraryId":        3,
          "firstMatchTime":   1700000000,
          "lastUpdateTime":   1700100000
        },
        ...
      }
    }
-The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
+The cache is the source of truth for the WebUI's matches table and is
-search request, and is written back to disk on every mutation so a crash
+written back on every mutation so a crash mid-batch does not lose
-does not lose matches that were resolved in the current run.
+matches that were resolved in the current run.
 """
 from __future__ import annotations
@@ -32,6 +36,14 @@ import time
 from pathlib import Path
 def _set_int(entry: dict, key: str, value) -> None:
    """Sets entry[key] = int(value); ignores values that don't coerce."""
    try:
        entry[key] = int(value)
    except (TypeError, ValueError):
        pass
 class MatchesCache:
    def __init__(self, path):
        self._path = Path(path)
@@ -47,26 +59,35 @@ class MatchesCache:
            entry = self._data["matches"].get(title)
            return dict(entry) if entry else None
-    def add(self, title: str, *,
+    def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None":
            mangabaka_id,
            mangabaka_name: str,
            image_url: "str | None") -> dict:
        entry = {
            "mangabakaId":    str(mangabaka_id) if mangabaka_id is not None else "",
            "mangabakaName":  mangabaka_name or "",
            "imageUrl":       image_url or "",
            "firstMatchTime": int(time.time()),
        }
        with self._lock:
-            self._data["matches"][title] = entry
+            for title, entry in self._data["matches"].items():
-            self._save_unlocked()
+                if entry.get("kavitaSeriesId") == kavita_series_id:
-        return dict(entry)
+                    return title, dict(entry)
        return None
    def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None":
        target = str(mangabaka_id) if mangabaka_id is not None else ""
        if not target:
            return None
        with self._lock:
            for title, entry in self._data["matches"].items():
                if str(entry.get("mangabakaId") or "") == target:
                    return title, dict(entry)
        return None
    def upsert(self, title: str, *,
               mangabaka_id=None,
               mangabaka_name=None,
               image_url=None,
-               first_match_time=None) -> dict:
+               kavita_series_id=None,
               library_id=None,
               first_match_time=None,
               last_update_time=None) -> dict:
        """
        Inserts or updates an entry.  Only fields passed explicitly are
        modified; the rest are preserved.
        """
        with self._lock:
            entry = self._data["matches"].get(title)
            if entry is None:
@@ -74,7 +95,10 @@ class MatchesCache:
                    "mangabakaId":    "",
                    "mangabakaName":  "",
                    "imageUrl":       "",
                    "kavitaSeriesId": 0,
                    "libraryId":      0,
                    "firstMatchTime": int(time.time()),
                    "lastUpdateTime": 0,
                }
                self._data["matches"][title] = entry
            if mangabaka_id is not None:
@@ -83,14 +107,24 @@ class MatchesCache:
                entry["mangabakaName"] = mangabaka_name
            if image_url is not None:
                entry["imageUrl"] = image_url
            if kavita_series_id is not None:
                _set_int(entry, "kavitaSeriesId", kavita_series_id)
            if library_id is not None:
                _set_int(entry, "libraryId", library_id)
            if first_match_time is not None:
-                try:
+                _set_int(entry, "firstMatchTime", first_match_time)
-                    entry["firstMatchTime"] = int(first_match_time)
+            if last_update_time is not None:
-                except (TypeError, ValueError):
+                _set_int(entry, "lastUpdateTime", last_update_time)
                    pass
            self._save_unlocked()
            return dict(entry)
    def mark_updated(self, title: str) -> None:
        with self._lock:
            entry = self._data["matches"].get(title)
            if entry is not None:
                entry["lastUpdateTime"] = int(time.time())
                self._save_unlocked()
    def rename(self, old_title: str, new_title: str) -> bool:
        if not new_title or old_title == new_title:
            return False
@@ -115,6 +149,20 @@ class MatchesCache:
            return {"matches": {k: dict(v)
                                for k, v in self._data["matches"].items()}}
    def all_in_libraries(self, library_ids: "list[int] | None") -> dict:
        """
        Returns the cache filtered to entries whose libraryId is in
        `library_ids`.  Pass None to return everything.
        """
        if library_ids is None:
            return self.all()
        ids = {int(i) for i in library_ids}
        with self._lock:
            return {"matches": {
                k: dict(v) for k, v in self._data["matches"].items()
                if int(v.get("libraryId") or 0) in ids
            }}
    # ------------------------------------------------------------------
    # Internal IO
    # ------------------------------------------------------------------
@@ -0,0 +1,764 @@
 """
 matches_web_app.py
 ==================
 Flask web UI for the Kavita light-novel metadata fetcher.
 Pages
 -----
 GET  /                          HTML UI (matches table + actions)
 Match cache (JSON)
 ------------------
 GET  /api/libraries             Lists Kavita libraries
 GET  /api/matches               Full cache, optionally filtered by libraryIds=
 POST /api/matches               Upsert a single match
                                  body: {title, mangabakaId}
 POST /api/matches/delete        Remove a match
                                  body: {title}
 Background jobs
 ---------------
 POST /api/build                 Build matches for libraries
                                  body: {libraryIds: [int, ...]}
 POST /api/update                Update a single series
                                  body: {kavitaSeriesId}
 POST /api/update-all            Update every cached series in libraries
                                  body: {libraryIds: [int, ...] | null}
 GET  /api/status                Current background job status (status, log)
 """
 from __future__ import annotations
 import threading
 import time
 from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 from LightNovelMetadataBuilder import pick_thumbnail_url
 def _int_list(values) -> list[int]:
    """Coerces an iterable of mixed values to a list of positive ints."""
    out: list[int] = []
    for v in (values or []):
        try:
            n = int(v)
        except (TypeError, ValueError):
            continue
        if n > 0:
            out.append(n)
    return out
 _INDEX_HTML = r"""<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>Kavita light-novel metadata fetcher</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
    .bar select[multiple] { background:#222; color:#eee; border:1px solid #444; min-width: 14rem; min-height: 4.2rem; }
    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
    button.success { background:#15803d; border-color:#15803d; color:white; }
    button:disabled { opacity:.5; cursor:default; }
    table { border-collapse: collapse; width: 100%; }
    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
    th.sortable { cursor: pointer; user-select: none; }
    th.sortable:hover { background:#252525; }
    th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
    tr:nth-child(even) td { background: #161616; }
    td.image img { max-width: 90px; max-height: 130px; display:block; }
    td.id input { width: 12rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
    td.title a { color: #60a5fa; text-decoration: none; }
    td.title a:hover { text-decoration: underline; }
    td.actions { white-space: nowrap; }
    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
    .dirty td { background: #1f2937 !important; }
    .count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
    pre.log { background:#0a0a0a; color:#9ca3af; padding:.5rem .75rem; max-height:18rem; overflow:auto; border:1px solid #333; font-size:.8rem; white-space:pre-wrap; }
    label { font-size:.9rem; color:#9ca3af; }
  </style>
 </head>
 <body>
 <h1>Kavita light-novel metadata fetcher <span id="count" class="count"></span></h1>
 <div class="bar">
  <label>Libraries
    <select id="libraries" multiple size="3"></select>
  </label>
  <button id="reload">Reload</button>
  <button id="build">Match all in libraries</button>
  <button id="updateAll" class="success">Update all in libraries</button>
  <button id="batchSave" class="primary">Save dirty (0)</button>
  <span class="status" id="status"></span>
 </div>
 <div class="bar">
  <input id="filter" type="search" placeholder="Filter by title…">
  <span class="count" id="jobStatus"></span>
 </div>
 <pre id="jobLog" class="log" hidden></pre>
 <table>
  <thead>
    <tr>
      <th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
      <th>mangabakaId</th>
      <th>mangabakaName</th>
      <th>library</th>
      <th class="sortable" data-col="lastUpdateTime">Last update <span class="arrow" id="arrow-lastUpdateTime"></span></th>
      <th>Image</th>
      <th></th>
    </tr>
  </thead>
  <tbody id="rows"></tbody>
 </table>
 <script>
 const MB_SEARCH = "https://mangabaka.org/search?q=";
 let matchesData = {};
 let librariesById = {};
 let currentSort = { col: "title", asc: true };
 let jobPollHandle = null;
 function fmtTime(unix) {
  if (!unix) return "";
  const d = new Date(unix * 1000);
  return d.toLocaleString();
 }
 function setStatus(msg) { document.getElementById("status").textContent = msg; }
 function selectedLibraryIds() {
  const sel = document.getElementById("libraries");
  return Array.from(sel.selectedOptions).map(o => parseInt(o.value, 10));
 }
 function updateDirtyCount() {
  const n = document.querySelectorAll("#rows tr.dirty").length;
  const btn = document.getElementById("batchSave");
  btn.textContent = "Save dirty (" + n + ")";
  btn.disabled = n === 0;
 }
 function makeRow(title, e) {
  const tr = document.createElement("tr");
  tr.dataset.title = title;
  // Title — links to MangaBaka search
  const titleTd = document.createElement("td");
  titleTd.className = "title";
  const a = document.createElement("a");
  a.href = MB_SEARCH + encodeURIComponent(title) + "&type=novel";
  a.target = "_blank";
  a.rel = "noopener";
  a.textContent = title;
  titleTd.appendChild(a);
  tr.appendChild(titleTd);
  // mangabakaId (editable)
  const idTd = document.createElement("td");
  idTd.className = "id";
  const idInp = document.createElement("input");
  idInp.value = e.mangabakaId || "";
  idInp.dataset.original = e.mangabakaId || "";
  idInp.addEventListener("input", () => {
    if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
    else tr.classList.remove("dirty");
    updateDirtyCount();
  });
  idTd.appendChild(idInp);
  tr.appendChild(idTd);
  // mangabakaName
  const nameTd = document.createElement("td");
  nameTd.textContent = e.mangabakaName || "";
  tr.appendChild(nameTd);
  // library
  const libTd = document.createElement("td");
  const libId = e.libraryId || 0;
  libTd.textContent = librariesById[libId] || (libId ? "#" + libId : "");
  tr.appendChild(libTd);
  // lastUpdateTime
  const timeTd = document.createElement("td");
  timeTd.textContent = e.lastUpdateTime ? fmtTime(e.lastUpdateTime) : "";
  tr.appendChild(timeTd);
  // Image
  const imgTd = document.createElement("td");
  imgTd.className = "image";
  const img = document.createElement("img");
  img.src = e.imageUrl || "";
  img.alt = "";
  img.loading = "lazy";
  imgTd.appendChild(img);
  tr.appendChild(imgTd);
  // Actions
  const actTd = document.createElement("td");
  actTd.className = "actions";
  const save = document.createElement("button");
  save.textContent = "Save";
  save.className = "primary";
  save.addEventListener("click", () => saveRow(tr));
  actTd.appendChild(save);
  const update = document.createElement("button");
  update.textContent = "Update";
  update.className = "success";
  update.style.marginLeft = ".25rem";
  update.disabled = !e.kavitaSeriesId;
  update.title = e.kavitaSeriesId
    ? "Push metadata to Kavita series #" + e.kavitaSeriesId
    : "Run a Match cycle first so we know the Kavita series id";
  update.addEventListener("click", () => updateRow(tr));
  actTd.appendChild(update);
  const del = document.createElement("button");
  del.textContent = "Delete";
  del.className = "danger";
  del.style.marginLeft = ".25rem";
  del.addEventListener("click", () => deleteRow(tr));
  actTd.appendChild(del);
  tr.appendChild(actTd);
  tr._idInp = idInp;
  tr._nameTd = nameTd;
  tr._img = img;
  tr._timeTd = timeTd;
  tr._update = update;
  return tr;
 }
 async function saveRow(tr) {
  const title = tr.dataset.title;
  const newId = tr._idInp.value.trim();
  setStatus("Saving " + title + "…");
  try {
    const r = await fetch("/api/matches", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ title: title, mangabakaId: newId }),
    });
    if (!r.ok) throw new Error(await r.text());
    const data = await r.json();
    const entry = data.entry || {};
    matchesData[title] = entry;
    tr._idInp.value = entry.mangabakaId || "";
    tr._idInp.dataset.original = entry.mangabakaId || "";
    tr._nameTd.textContent = entry.mangabakaName || "";
    tr._img.src = entry.imageUrl || "";
    tr.classList.remove("dirty");
    updateDirtyCount();
    setStatus("Saved " + title);
    return true;
  } catch (err) {
    setStatus("Save failed (" + title + "): " + err.message);
    return false;
  }
 }
 async function deleteRow(tr) {
  const title = tr.dataset.title;
  if (!confirm("Delete " + title + "?")) return;
  setStatus("Deleting " + title + "…");
  try {
    const r = await fetch("/api/matches/delete", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ title: title }),
    });
    if (!r.ok) throw new Error(await r.text());
    delete matchesData[title];
    tr.remove();
    document.getElementById("count").textContent =
        "(" + Object.keys(matchesData).length + " entries)";
    setStatus("Deleted");
  } catch (err) {
    setStatus("Delete failed: " + err.message);
  }
 }
 async function updateRow(tr) {
  const title = tr.dataset.title;
  const entry = matchesData[title] || {};
  if (!entry.kavitaSeriesId) {
    setStatus("No kavitaSeriesId for " + title + " — run match first");
    return;
  }
  setStatus("Updating " + title + "…");
  tr._update.disabled = true;
  try {
    const r = await fetch("/api/update", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ kavitaSeriesId: entry.kavitaSeriesId }),
    });
    if (!r.ok) throw new Error(await r.text());
    const res = await r.json();
    setStatus(res.ok ? "Updated " + title : "Update failed: " + res.error);
    if (res.ok) {
      entry.lastUpdateTime = Math.floor(Date.now() / 1000);
      tr._timeTd.textContent = fmtTime(entry.lastUpdateTime);
    }
  } catch (err) {
    setStatus("Update failed: " + err.message);
  } finally {
    tr._update.disabled = false;
  }
 }
 async function batchSave() {
  const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
  if (dirty.length === 0) return;
  if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
  setStatus("Batch saving " + dirty.length + " rows…");
  let ok = 0, fail = 0;
  for (const tr of dirty) {
    const success = await saveRow(tr);
    if (success) ok++; else fail++;
  }
  setStatus("Batch: " + ok + " ok, " + fail + " failed");
 }
 function sortedTitles() {
  const titles = Object.keys(matchesData);
  const dir = currentSort.asc ? 1 : -1;
  if (currentSort.col === "title") {
    return titles.sort((a, b) => a.localeCompare(b) * dir);
  }
  if (currentSort.col === "lastUpdateTime") {
    return titles.sort((a, b) => {
      const av = matchesData[a].lastUpdateTime || 0;
      const bv = matchesData[b].lastUpdateTime || 0;
      return (av - bv) * dir;
    });
  }
  return titles;
 }
 function updateSortArrows() {
  for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
  const id = "arrow-" + currentSort.col;
  const el = document.getElementById(id);
  if (el) el.textContent = currentSort.asc ? "▲" : "▼";
 }
 function applyFilter() {
  const q = document.getElementById("filter").value.toLowerCase();
  const libs = new Set(selectedLibraryIds());
  for (const tr of document.querySelectorAll("#rows tr")) {
    const title = tr.dataset.title;
    const entry = matchesData[title] || {};
    const titleMatch = title.toLowerCase().includes(q);
    const libMatch = libs.size === 0 || libs.has(entry.libraryId || 0);
    tr.style.display = (titleMatch && libMatch) ? "" : "none";
  }
 }
 function render() {
  const tbody = document.getElementById("rows");
  tbody.innerHTML = "";
  for (const t of sortedTitles()) {
    tbody.appendChild(makeRow(t, matchesData[t]));
  }
  updateSortArrows();
  applyFilter();
  updateDirtyCount();
  document.getElementById("count").textContent =
      "(" + Object.keys(matchesData).length + " entries)";
 }
 async function loadLibraries() {
  try {
    const r = await fetch("/api/libraries");
    const data = await r.json();
    const libs = data.libraries || [];
    const defaults = new Set(data.defaults || []);
    librariesById = {};
    const sel = document.getElementById("libraries");
    sel.innerHTML = "";
    for (const lib of libs) {
      librariesById[lib.id] = lib.name;
      const opt = document.createElement("option");
      opt.value = lib.id;
      opt.textContent = lib.name + " (#" + lib.id + ")";
      if (defaults.has(lib.id)) opt.selected = true;
      sel.appendChild(opt);
    }
  } catch (err) {
    setStatus("Failed to load libraries: " + err.message);
  }
 }
 async function load() {
  setStatus("Loading…");
  try {
    const r = await fetch("/api/matches");
    const data = await r.json();
    matchesData = data.matches || {};
    render();
    setStatus(Object.keys(matchesData).length + " entries");
  } catch (err) {
    setStatus("Load failed: " + err.message);
  }
 }
 async function pollJob() {
  try {
    const r = await fetch("/api/status");
    const s = await r.json();
    const jobStatus = document.getElementById("jobStatus");
    const jobLog = document.getElementById("jobLog");
    if (!s.running && !s.lastFinished) {
      jobStatus.textContent = "";
      jobLog.hidden = true;
      stopPolling();
      return;
    }
    jobLog.hidden = false;
    jobLog.textContent = (s.log || []).join("\n");
    jobLog.scrollTop = jobLog.scrollHeight;
    if (s.running) {
      jobStatus.textContent = "Running: " + (s.label || "");
    } else {
      jobStatus.textContent = "Done: " + (s.label || "");
      stopPolling();
      load();
    }
  } catch (err) {
    /* keep polling silently */
  }
 }
 function startPolling() {
  if (jobPollHandle) return;
  jobPollHandle = setInterval(pollJob, 1000);
  pollJob();
 }
 function stopPolling() {
  if (jobPollHandle) clearInterval(jobPollHandle);
  jobPollHandle = null;
 }
 async function startBuild() {
  const libs = selectedLibraryIds();
  if (libs.length === 0) {
    setStatus("Pick at least one library");
    return;
  }
  if (!confirm("Match every series in " + libs.length + " library(ies)?")) return;
  setStatus("Build started");
  try {
    const r = await fetch("/api/build", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ libraryIds: libs }),
    });
    if (!r.ok) throw new Error(await r.text());
    startPolling();
  } catch (err) {
    setStatus("Build failed: " + err.message);
  }
 }
 async function startUpdateAll() {
  const libs = selectedLibraryIds();
  if (libs.length === 0) {
    if (!confirm("No libraries selected — update every cached series?")) return;
  } else if (!confirm("Update every cached series in " + libs.length + " library(ies)?")) {
    return;
  }
  setStatus("Update-all started");
  try {
    const r = await fetch("/api/update-all", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ libraryIds: libs.length ? libs : null }),
    });
    if (!r.ok) throw new Error(await r.text());
    startPolling();
  } catch (err) {
    setStatus("Update-all failed: " + err.message);
  }
 }
 document.getElementById("filter").addEventListener("input", applyFilter);
 document.getElementById("libraries").addEventListener("change", applyFilter);
 document.getElementById("reload").addEventListener("click", load);
 document.getElementById("batchSave").addEventListener("click", batchSave);
 document.getElementById("build").addEventListener("click", startBuild);
 document.getElementById("updateAll").addEventListener("click", startUpdateAll);
 for (const th of document.querySelectorAll("th.sortable")) {
  th.addEventListener("click", () => {
    const col = th.dataset.col;
    if (currentSort.col === col) currentSort.asc = !currentSort.asc;
    else { currentSort.col = col; currentSort.asc = true; }
    render();
  });
 }
 (async () => {
  await loadLibraries();
  await load();
  // Resume polling if there's a job running from a previous session
  pollJob();
 })();
 </script>
 </body>
 </html>
 """
 class _JobState:
    """Thread-safe container for the current background job's progress."""
    def __init__(self):
        self._lock = threading.Lock()
        self._running = False
        self._label = ""
        self._log: list[str] = []
        self._last_finished_at = 0
        self._thread: "threading.Thread | None" = None
    def start(self, label: str, target, *args, **kwargs) -> bool:
        with self._lock:
            if self._running:
                return False
            self._running = True
            self._label = label
            self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"]
        def runner():
            try:
                target(self, *args, **kwargs)
            except Exception as exc:
                self.append(f"FATAL: {exc}")
            finally:
                with self._lock:
                    self._running = False
                    self._last_finished_at = int(time.time())
                self.append(f"[{time.strftime('%H:%M:%S')}] finished")
        self._thread = threading.Thread(target=runner,
                                        name=f"job:{label}",
                                        daemon=True)
        self._thread.start()
        return True
    def append(self, line: str) -> None:
        with self._lock:
            self._log.append(line)
            # Cap log length so the response stays bounded.
            if len(self._log) > 1000:
                self._log = self._log[-800:]
    def snapshot(self) -> dict:
        with self._lock:
            return {
                "running":      self._running,
                "label":        self._label,
                "log":          list(self._log),
                "lastFinished": self._last_finished_at,
            }
 class MatchesWebApp:
    def __init__(self, cache: MatchesCache, *,
                 orchestrator=None,
                 default_library_ids: "list[int] | None" = None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._orchestrator = orchestrator
        self._defaults = list(default_library_ids or [])
        self._host = host
        self._port = port
        self._job = _JobState()
        self._app = Flask(__name__)
        self._thread: "threading.Thread | None" = None
        self._register_routes()
    @property
    def app(self) -> Flask:
        return self._app
    def start(self) -> threading.Thread:
        if self._thread is not None and self._thread.is_alive():
            return self._thread
        self._thread = threading.Thread(
            target=self._app.run,
            kwargs={"host": self._host, "port": self._port,
                    "debug": False, "use_reloader": False,
                    "threaded": True},
            name="MatchesWebApp",
            daemon=False,
        )
        self._thread.start()
        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
              flush=True)
        return self._thread
    def wait(self) -> None:
        if self._thread is not None:
            self._thread.join()
    # ------------------------------------------------------------------
    # Routes
    # ------------------------------------------------------------------
    def _register_routes(self) -> None:
        app = self._app
        cache = self._cache
        @app.get("/")
        def index() -> Response:
            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
        @app.get("/api/libraries")
        def api_libraries():
            if self._orchestrator is None:
                return jsonify([])
            try:
                libs = self._orchestrator.list_libraries()
            except Exception as exc:
                return Response(f"libraries failed: {exc}", status=502)
            return jsonify({"libraries": libs, "defaults": self._defaults})
        @app.get("/api/matches")
        def api_list():
            raw = request.args.get("libraryIds") or ""
            lib_ids = _int_list(raw.split(","))
            if lib_ids:
                return jsonify(cache.all_in_libraries(lib_ids))
            return jsonify(cache.all())
        @app.post("/api/matches")
        def api_upsert():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            new_id_raw = body.get("mangabakaId")
            new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
            if not new_id:
                return Response("mangabakaId is required", status=400)
            new_name: "str | None" = None
            new_image: "str | None" = None
            if self._orchestrator is not None:
                try:
                    series = self._orchestrator.fetch_series(new_id)
                except Exception as exc:
                    return Response(f"resolve failed: {exc}", status=502)
                if not series:
                    return Response(
                        f"MangaBaka has no series with id {new_id}",
                        status=404)
                new_name  = series.get("title") or ""
                new_image = pick_thumbnail_url(series.get("cover")) or ""
            entry = cache.upsert(
                title,
                mangabaka_id=new_id,
                mangabaka_name=new_name,
                image_url=new_image,
            )
            return jsonify({"title": title, "entry": entry})
        @app.post("/api/matches/delete")
        def api_delete():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            removed = cache.remove(title)
            return jsonify({"removed": removed, "title": title})
        @app.post("/api/build")
        def api_build():
            if self._orchestrator is None:
                return Response("no orchestrator configured", status=503)
            body = request.get_json(silent=True) or {}
            library_ids = _int_list(body.get("libraryIds"))
            if not library_ids:
                return Response("libraryIds required", status=400)
            label = f"match libraries {library_ids}"
            def task(job: _JobState, lib_ids):
                stats = self._orchestrator.build_matches(lib_ids)
                job.append(f"matched={stats.get('matched')} "
                           f"skipped={stats.get('skipped')} "
                           f"missing={stats.get('missing')} "
                           f"checked={stats.get('checked')}")
            if not self._job.start(label, task, library_ids):
                return Response("a job is already running", status=409)
            return jsonify({"started": label})
        @app.post("/api/update")
        def api_update():
            if self._orchestrator is None:
                return Response("no orchestrator configured", status=503)
            body = request.get_json(silent=True) or {}
            ksid = body.get("kavitaSeriesId")
            try:
                ksid_int = int(ksid)
            except (TypeError, ValueError):
                return Response("kavitaSeriesId required", status=400)
            try:
                res = self._orchestrator.update_series(ksid_int)
            except Exception as exc:
                return Response(f"update failed: {exc}", status=500)
            return jsonify(res)
        @app.post("/api/update-all")
        def api_update_all():
            if self._orchestrator is None:
                return Response("no orchestrator configured", status=503)
            body = request.get_json(silent=True) or {}
            raw = body.get("libraryIds")
            library_ids = None if raw is None else _int_list(raw)
            label = ("update all (every library)" if library_ids is None
                     else f"update all in libraries {library_ids}")
            def task(job: _JobState, lib_ids):
                summary = self._orchestrator.update_all(lib_ids)
                job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}")
                for res in summary.get("results", []):
                    title = res.get("title", "?")
                    if res.get("ok"):
                        flags = []
                        sr = res.get("series") or {}
                        for k, v in sr.items():
                            if v == "changed":
                                flags.append(k)
                        job.append(
                            f"  {title}: changed=[{', '.join(flags) or '-'}]")
                    else:
                        job.append(f"  {title}: FAIL {res.get('error')}")
            if not self._job.start(label, task, library_ids):
                return Response("a job is already running", status=409)
            return jsonify({"started": label})
        @app.get("/api/status")
        def api_status():
            snap = self._job.snapshot()
            snap["defaults"] = self._defaults
            return jsonify(snap)
@@ -0,0 +1,174 @@
 """
 relationship_sync.py
 ====================
 Mirrors MangaBaka's ``relationships_v2`` graph into Kavita:
  1. Every related MangaBaka series that is *also* present in Kavita
     (resolved via MatchesCache) is added to a shared Kavita collection
     so the whole franchise can be browsed in one place.
  2. Series-level relationships (prequel / sequel / spin-off / …) are
     written via ``POST /api/Series/update-related`` so navigating
     between entries surfaces the right neighbours.
 Only relationships where both endpoints exist in Kavita are written.
 Relationships pointing to series that have not been imported yet are
 silently skipped (the next match run picks them up).
 """
 from __future__ import annotations
 from KavitaClient import KavitaClient
 from MatchesCache import MatchesCache
 # MangaBaka relation_type  ->  Kavita UpdateRelatedSeriesDto bucket
 _RELATION_MAP = {
    "prequel":             "prequels",
    "sequel":              "sequels",
    "side_story":          "sideStories",
    "spin_off":            "spinOffs",
    "spinoff":             "spinOffs",
    "alternative_version": "alternativeVersions",
    "alternative_story":   "alternativeVersions",
    "alternative_setting": "alternativeSettings",
    "adapted_from":        "adaptations",
    "adaptation":          "adaptations",
    "doujinshi":           "doujinshis",
    "parent":              "contains",   # the parent "contains" the child
 }
 _ALL_BUCKETS = (
    "adaptations", "characters", "contains", "others",
    "prequels", "sequels", "sideStories", "spinOffs",
    "alternativeSettings", "alternativeVersions", "doujinshis",
    "editions", "annuals",
 )
 class RelationshipSync:
    def __init__(self, client: KavitaClient, cache: MatchesCache, *,
                 builder=None):
        """
        Parameters
        ----------
        client  : KavitaClient for collection / relation writes.
        cache   : MatchesCache to resolve mangabakaId -> kavitaSeriesId.
        builder : optional LightNovelMetadataBuilder used to fetch parent
                  series titles when picking the collection name.
        """
        self._client = client
        self._cache = cache
        self._builder = builder
    # ------------------------------------------------------------------
    # Public
    # ------------------------------------------------------------------
    def sync(self, kavita_series_id: int, built: dict) -> dict:
        """
        Applies the relationship and collection links described by
        `built["relationships"]` (raw MangaBaka relationships_v2 list)
        for the given Kavita series.  Returns a small status dict.
        """
        report: dict = {"relations": {}, "collection": None,
                        "missing_series": []}
        relationships = built.get("relationships") or []
        if not relationships:
            return report
        # Resolve mangabakaId -> kavitaSeriesId for every related entry.
        related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS}
        all_kavita_ids: set[int] = set()
        for rel in relationships:
            mb_id = rel.get("to_series_id")
            if mb_id is None:
                continue
            hit = self._cache.get_by_mangabaka_id(mb_id)
            if not hit:
                report["missing_series"].append(int(mb_id))
                continue
            _title, entry = hit
            ksid = int(entry.get("kavitaSeriesId") or 0)
            if not ksid:
                report["missing_series"].append(int(mb_id))
                continue
            bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(),
                                       "others")
            if ksid not in related[bucket]:
                related[bucket].append(ksid)
            all_kavita_ids.add(ksid)
        # ----- Relationships ------------------------------------------
        if any(related.values()):
            payload = {"seriesId": int(kavita_series_id)}
            for bucket in _ALL_BUCKETS:
                payload[bucket] = related[bucket]
            try:
                self._client.update_related(payload)
                report["relations"] = {k: v for k, v in related.items() if v}
            except Exception as exc:
                report["relations"] = {"error": str(exc)}
        # ----- Collection ---------------------------------------------
        # Include the current series in the collection so it shows up too.
        all_kavita_ids.add(int(kavita_series_id))
        if len(all_kavita_ids) >= 2:
            collection_name = self._collection_name(built, relationships)
            collection_id = self._find_collection_id(collection_name)
            try:
                self._client.add_series_to_collection(
                    collection_id=collection_id,
                    title=collection_name,
                    series_ids=sorted(all_kavita_ids),
                )
                report["collection"] = collection_name
            except Exception as exc:
                report["collection"] = f"error: {exc}"
        return report
    # ------------------------------------------------------------------
    # Internal
    # ------------------------------------------------------------------
    def _find_collection_id(self, name: str) -> int:
        """Returns the id of an existing collection by title, or 0 to create."""
        if not name:
            return 0
        target = name.strip().lower()
        try:
            for col in self._client.list_collections():
                if (col.get("title") or "").strip().lower() == target:
                    try:
                        return int(col.get("id") or 0)
                    except (TypeError, ValueError):
                        return 0
        except Exception:
            pass
        return 0
    def _collection_name(self, built: dict,
                         relationships: list[dict]) -> str:
        """
        Picks the collection name.  Uses the parent series title from
        MangaBaka if the current series has one; otherwise falls back to
        the current series' own title.
        """
        for rel in relationships:
            if (rel.get("relation_type") or "").lower() == "parent":
                parent_id = rel.get("to_series_id")
                if parent_id is not None and self._builder is not None:
                    try:
                        parent_md = self._builder.fetch_series(parent_id)
                        if parent_md and parent_md.get("title"):
                            return parent_md["title"]
                    except Exception:
                        pass
                # Even without a builder, the cache may know the parent.
                hit = self._cache.get_by_mangabaka_id(parent_id)
                if hit:
                    _title, entry = hit
                    name = entry.get("mangabakaName")
                    if name:
                        return name
        return built.get("mangabakaTitle") or ""
@@ -37,18 +37,27 @@ Data source notes
 from __future__ import annotations
 import difflib
 import re
 import sys
 import xml.etree.ElementTree as ET
 from contextlib import contextmanager
 from pathlib import Path
 import requests
 # Shared modules live one level up (src/); needed when a module in this
 # folder is run directly as a script (the entry points set the path).
 if __name__ == "__main__":
    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 from MangadexVolumeResolver import MangaDexVolumeResolver
-from MangaBakaWorksResolver import MangaBakaWorksResolver
+from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from CoverCache import CoverCache, _IMAGE_EXTS
 from TextUtils import person_name_with_id
 try:
    from PIL import Image
@@ -57,10 +66,20 @@ except ImportError:
    _HAS_PIL = False
@contextmanager
 def _no_measure():
    """No-op stand-in for a perf recorder's measure() context manager."""
    yield
 # --------------------------------------------------------------------------
 # Constants
 # --------------------------------------------------------------------------
-_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
+# Series types accepted by the MangaBaka search endpoint.  Light/web novels
 # are filtered out because this pipeline only handles image-based manga.
 # Passed to `requests` as a list so each value becomes its own `&type=...`
 # query parameter (MangaBaka's API expects repeated keys, not a CSV list).
 _SEARCH_TYPES = ["manga", "manhwa", "manhua"]
 _AGE_RATING_MAP = {
    "safe": "Everyone",
@@ -172,7 +191,8 @@ class ComicInfoBuilder:
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None,
                 al_resolver: "AniListResolver | None" = None,
-                 matches_cache: "MatchesCache | None" = None):
+                 matches_cache: "MatchesCache | None" = None,
                 cover_cache: "CoverCache | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")
@@ -184,6 +204,9 @@ class ComicInfoBuilder:
        self.request_timeout = request_timeout
        self._session = session or requests.Session()
        self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0")
        # Throttle every call to api.mangabaka.dev (idempotent — safe even
        # when the session was already prepared by a parent class).
        _apply_mangabaka_rate_limit(self._session)
        self._volume_resolver = (volume_resolver
                                 or MangaDexVolumeResolver(
@@ -200,6 +223,13 @@ class ComicInfoBuilder:
        self._al_resolver = al_resolver or AniListResolver(
            request_timeout=request_timeout)
        self._matches_cache = matches_cache
        self._cover_cache = cover_cache or _default_cover_cache()
        # Optional performance recorder (duck-typed: any object with a
        # .measure(name) context manager).  The mover sets this per chapter;
        # when None, _measure() is a no-op so the builder stays decoupled
        # from PerfStats and works standalone (e.g. the cover updater).
        self.perf = None
        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
@@ -245,6 +275,12 @@ class ComicInfoBuilder:
        self._cover_path = None
        self._suwayomi_data = {}
    def _measure(self, name: str):
        """Times a named step on the attached recorder; no-op when unset."""
        if self.perf is not None:
            return self.perf.measure(name)
        return _no_measure()
    # ======================================================================
    # Public XML functions
    # ======================================================================
@@ -288,10 +324,12 @@ class ComicInfoBuilder:
        if not folder.is_dir():
            raise NotADirectoryError(f"Folder not found: {folder}")
        with self._measure("read_comicinfo"):
            self._suwayomi_data = self._read_existing_comicinfo(folder)
        self._cover_path = None
        if download_cover:
            with self._measure("cover"):
                self._cover_path = self._download_cover(folder, cover_filename)
        cover_resolved = self._cover_path.resolve() if self._cover_path else None
@@ -312,6 +350,9 @@ class ComicInfoBuilder:
        ordered.extend((img, "Story") for img in story_images)
        self._pages = []
        # Probing every page for its pixel dimensions reads each file — on a
        # network share this is often the dominant per-chapter cost.
        with self._measure("image_dimensions"):
            for index, (img_path, page_type) in enumerate(ordered):
                width, height = self._image_dimensions(img_path)
                try:
@@ -378,7 +419,8 @@ class ComicInfoBuilder:
        url = f"{self.api_base_url}/series/search"
        resp = self._session.get(
-            url, params={"q": title, "page": 1, "limit": 1},
+            url, params={"q": title, "type": _SEARCH_TYPES,
                         "page": 1, "limit": 1},
            timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data") or []
@@ -389,7 +431,7 @@ class ComicInfoBuilder:
                title,
                mangabaka_id=series.get("id"),
                mangabaka_name=series.get("title") or "",
-                image_url=_pick_cover_url(series.get("cover")),
+                image_url=_pick_thumbnail_url(series.get("cover")),
            )
        return series
@@ -428,8 +470,7 @@ class ComicInfoBuilder:
        # ----- Title / Series -----------------------------------------------
        add("Title", sd.get("Title") or f"Chapter {self._chapter}")
        add("Series", md.get("title") or self._manga_title)
-        add("LocalizedSeries",
+        add("LocalizedSeries", self._romanized_for_native(md))
            md.get("native_title") or md.get("romanized_title"))
        add("SeriesSort", self._get_sort_title(md))
        add("Number", sd.get("Number") or self._chapter)
        add("Count", md.get("total_chapters"))
@@ -473,9 +514,19 @@ class ComicInfoBuilder:
        add("Tags",  ", ".join(_format_term(t) for t in (md.get("tags") or [])))
        # ----- Characters — MAL first, AniList fallback ---------------------
-        characters = self._mal_resolver.get_characters(mal_id)
+        # Names are disambiguated with the tracker *character* id
-        if not characters and al_id:
+        # ("Rem (MAL 118737)") so same-named characters from different
-            characters = self._al_resolver.get_characters(al_id)
+        # series stay separate Kavita person records.  The format is shared
        # with the light-novel updater — see TextUtils.person_name_with_id.
        char_entries = self._mal_resolver.get_characters_detailed(mal_id)
        if not char_entries and al_id:
            char_entries = self._al_resolver.get_characters_detailed(al_id)
        characters = [
            person_name_with_id(e.get("name"),
                                mal_id=e.get("mal_id"),
                                al_id=e.get("al_id"))
            for e in char_entries if (e.get("name") or "").strip()
        ]
        add("Characters", ", ".join(characters) if characters else None)
        # ----- Web links ----------------------------------------------------
@@ -570,11 +621,13 @@ class ComicInfoBuilder:
    # ======================================================================
    def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
        """
-        Downloads the cover for the current chapter/volume.
+        Fetches the cover for the current chapter/volume and writes it into
        `folder`.
-        If a volume is known and a volume-specific cover exists in MangaBaka
+        If a volume is known and a volume-specific cover exists in MangaBaka,
-        works, that cover is used.  Otherwise the series default cover is
+        that cover is used; otherwise the series default cover.  The image
-        downloaded (raw variant preferred).
+        itself comes from the CoverCache, so a cover shared by many chapters
        is downloaded only once.
        """
        md = self._get_metadata()
        volume = self._determine_volume()
@@ -592,18 +645,13 @@ class ComicInfoBuilder:
        if not cover_url:
            cover_url = _pick_cover_url(md.get("cover"))
-        if not cover_url:
+        fetched = self._cover_cache.get(cover_url) if cover_url else None
        if not fetched:
            return None
-        try:
+        data, ext = fetched
            resp = self._session.get(cover_url, timeout=self.request_timeout)
            resp.raise_for_status()
        except requests.RequestException:
            return None
        ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
        target = folder / f"{cover_filename}{ext}"
-        target.write_bytes(resp.content)
+        target.write_bytes(data)
        return target
    # ======================================================================
@@ -637,6 +685,82 @@ class ComicInfoBuilder:
    # ======================================================================
    # Title helpers
    # ======================================================================
    # Mapping from series type to the matching romanized language code(s)
    # in the MangaBaka titles array.  Used to pick the correct romaji /
    # romaja / pinyin for LocalizedSeries.
    _ROMANIZED_LANG_BY_TYPE = {
        "manga":  ("ja-latn", "ja-romaji"),
        "manhwa": ("ko-latn", "ko-romaji"),
        "manhua": ("zh-latn",),
    }
    @staticmethod
    def _pick_best_title(titles, language_codes: tuple,
                         prefer_trait: "str | None" = None) -> "str | None":
        """
        Picks the highest-scoring entry from a MangaBaka `titles` list for
        any of the given language codes.
        Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
        (+1); first seen wins on ties.  Returns None when no entry matches.
        """
        if not isinstance(titles, list):
            return None
        best_score = -1
        best_title: "str | None" = None
        for entry in titles:
            if not isinstance(entry, dict):
                continue
            lang = (entry.get("language") or entry.get("lang") or "").lower()
            if lang not in language_codes:
                continue
            title = entry.get("title")
            if not title:
                continue
            traits = entry.get("traits") or []
            score = 0
            if prefer_trait and prefer_trait in traits:
                score += 4
            if "official" in traits:
                score += 2
            if entry.get("is_primary"):
                score += 1
            if score > best_score:
                best_score, best_title = score, title
        return best_title
    @classmethod
    def _romanized_for_native(cls, md: dict) -> "str | None":
        """
        Picks the romanized title in the manga's original language from the
        ``titles`` array.
        The series' original language is inferred from ``type``::
            manga  -> ja-Latn  (Japanese romaji)
            manhwa -> ko-Latn  (Korean romaja)
            manhua -> zh-Latn  (Chinese pinyin)
        Among multiple entries for the matching language, the one with the
        highest "quality score" wins (``official`` trait > ``is_primary`` >
        first seen).
        The root-level ``romanized_title`` field is **deliberately not used
        as a fallback** — MangaBaka frequently stores a different language's
        romanization there (e.g. Korean romaja on a Japanese manga), which
        is exactly what this function is meant to avoid.
        Returns ``None`` when no romanized title is available for the
        inferred language.
        """
        mtype = (md.get("type") or "").lower()
        langs = cls._ROMANIZED_LANG_BY_TYPE.get(mtype)
        if not langs:
            return None
        titles = md.get("titles") or md.get("alt_titles") or []
        return cls._pick_best_title(titles, langs)
    def _get_sort_title(self, md: dict) -> "str | None":
        """
        Returns the SeriesSort title in the configured language.
@@ -671,31 +795,7 @@ class ComicInfoBuilder:
        def pick(language_codes: tuple, prefer_trait: "str | None" = None
                 ) -> "str | None":
-            """Picks the best title entry for any of the given language codes."""
+            return self._pick_best_title(titles, language_codes, prefer_trait)
            if not isinstance(titles, list):
                return None
            best_score = -1
            best_title: "str | None" = None
            for entry in titles:
                if not isinstance(entry, dict):
                    continue
                lang = (entry.get("language") or entry.get("lang") or "").lower()
                if lang not in language_codes:
                    continue
                title = entry.get("title")
                if not title:
                    continue
                traits = entry.get("traits") or []
                score = 0
                if prefer_trait and prefer_trait in traits:
                    score += 4
                if "official" in traits:
                    score += 2
                if entry.get("is_primary"):
                    score += 1
                if score > best_score:
                    best_score, best_title = score, title
            return best_title
        result: dict[str, str] = {}
@@ -956,12 +1056,14 @@ class ComicInfoBuilder:
        return unique
    @staticmethod
-    def _read_existing_comicinfo(folder: Path) -> dict:
+    def read_comicinfo_fields(xml_source) -> dict:
-        xml_path = folder / "ComicInfo.xml"
+        """
-        if not xml_path.is_file():
+        Parses ComicInfo.xml content (bytes or str) and returns the fields
-            return {}
+        relevant as supplementary Suwayomi data.  Returns {} on parse errors.
        Reusable for XML read directly from a CBZ archive (no extraction).
        """
        try:
-            root = ET.parse(xml_path).getroot()
+            root = ET.fromstring(xml_source)
        except ET.ParseError:
            return {}
@@ -975,6 +1077,16 @@ class ComicInfoBuilder:
                data[tag] = child.text.strip()
        return data
    @staticmethod
    def _read_existing_comicinfo(folder: Path) -> dict:
        xml_path = folder / "ComicInfo.xml"
        if not xml_path.is_file():
            return {}
        try:
            return ComicInfoBuilder.read_comicinfo_fields(xml_path.read_bytes())
        except OSError:
            return {}
    @staticmethod
    def _image_dimensions(path: Path):
        if not _HAS_PIL:
@@ -987,22 +1099,37 @@ class ComicInfoBuilder:
 # --------------------------------------------------------------------------
-# Module-level helpers (shared with MangaBakaWorksResolver logic)
+# Module-level helpers
 # --------------------------------------------------------------------------
-def _pick_cover_url(cover) -> "str | None":
+
 # Alias: _pick_image_url (from MangaBakaWorksResolver) is the canonical
 # generic image-block picker; _pick_cover_url is kept for backward compat.
 _pick_cover_url = _pick_image_url
 # Shared fallback CoverCache for builders constructed without an explicit
 # one (temporary directory, removed at process exit).  Created lazily so
 # importing this module never touches the filesystem.
 _shared_cover_cache: "CoverCache | None" = None
 def _default_cover_cache() -> CoverCache:
    global _shared_cover_cache
    if _shared_cover_cache is None:
        _shared_cover_cache = CoverCache()
    return _shared_cover_cache
 def _pick_thumbnail_url(cover) -> "str | None":
    """
-    Selects the best cover URL from a MangaBaka cover object.
+    Picks a small cover variant suitable for a UI thumbnail.
-    Real API shape (from `GET /v1/series/{id}` and `/works`):
+    Order of preference: x150@x2 > x150@x1 > x150@x3 > x250@x2 > x250@x1 >
-        {
+    x250@x3 > x350@x2 > x350@x1 > x350@x3 > raw. x150@x2 is roughly 300px
-          "raw":  {"url": "...", "size": ..., "height": ..., "width": ...},
+    wide — sharp on HiDPI displays at the ~90px thumbnail size used in
-          "x150": {"x1": "...", "x2": "...", "x3": "..."},
+    the matches table, while still being a fraction of the raw image
-          "x250": {"x1": "...", "x2": "...", "x3": "..."},
+    weight (often 50KB vs. several MB).
          "x350": {"x1": "...", "x2": "...", "x3": "..."}
        }
-    Order of preference: raw original  >  x350@x3  >  x250@x3  >  x150@x3
+    Falls back to `_pick_cover_url` if no thumbnail variant is available.
    (falling through to lower densities and sizes as needed).
    """
    if not cover:
        return None
@@ -1011,46 +1138,17 @@ def _pick_cover_url(cover) -> "str | None":
    if not isinstance(cover, dict):
        return None
-    # 1) Preferred: the unscaled "raw" image
+    for size_key in ("x150", "x250", "x350"):
    raw = cover.get("raw")
    if isinstance(raw, dict):
        url = raw.get("url")
        if isinstance(url, str) and url:
            return url
    elif isinstance(raw, str) and raw:
        return raw
    # 2) Fallback: size-keyed variants, largest first, highest density first
    for size_key in ("x350", "x250", "x150"):
        variant = cover.get(size_key)
        if isinstance(variant, dict):
-            for density in ("x3", "x2", "x1"):
+            for density in ("x2", "x1", "x3"):
                url = variant.get(density)
                if isinstance(url, str) and url:
                    return url
        elif isinstance(variant, str) and variant:
            return variant
-    # 3) Last-ditch fallback: any http URL anywhere in the structure
+    return _pick_cover_url(cover)
    for val in cover.values():
        if isinstance(val, str) and val.startswith("http"):
            return val
        if isinstance(val, dict):
            for sub in val.values():
                if isinstance(sub, str) and sub.startswith("http"):
                    return sub
    return None
 def _guess_extension(url: str, content_type: str) -> str:
    url_ext = Path(url.split("?")[0]).suffix.lower()
    if url_ext in _IMAGE_EXTS:
        return url_ext
    ct = (content_type or "").lower()
    if "png"  in ct: return ".png"
    if "webp" in ct: return ".webp"
    if "gif"  in ct: return ".gif"
    return ".jpg"
 # --------------------------------------------------------------------------
@@ -0,0 +1,554 @@
 """
 kavita_volume_cover_updater.py
 ==============================
 Periodically re-checks chapters already moved to the Kavita library whose
 volume could not be resolved at move time (``"volume": null`` in the
 series' ``chapter_index.json``).
 When MangaDex has since assigned the chapter to a volume, the updater:
  1. writes the volume into ``chapter_index.json``,
  2. updates ``<Volume>`` inside the chapter's ComicInfo.xml (in-archive),
  3. downloads the MangaBaka volume cover and swaps it in for the
     placeholder ``000.<ext>`` series cover, and
  4. refreshes the *first* chapter's ComicInfo.xml with full metadata —
     Kavita can be configured to take series metadata from the lowest
     chapter, so it must reflect the latest state.
 Host-IO policy
 --------------
 * Per series only ``chapter_index.json`` is read (no archive is opened to
  discover its contents).
 * Series without null-volume chapters are skipped before any API call.
 * An archive is read+rewritten exactly once per update (single pass,
  written to a ``.tmp`` file, then atomically replaced).
 Every updated chapter is appended to a log file (one line per update).
 Reused components
 -----------------
 * ``SuwayomiMover``            — chapter index helpers, dirname sanitizer
 * ``ComicInfoBuilder``         — metadata fetch (matches-cache ID lookup),
                                 chapter→volume resolution, XML build
 * ``MangaBakaWorksResolver``   — volume covers (/images with /works fallback)
 * ``MangaDexVolumeResolver``   — chapter→volume aggregate (shared cache)
 * ``MangaBakaRateLimit``       — process-wide API throttle
 Dependencies
 ------------
    requests    ->  pip install requests
    Pillow      ->  pip install pillow   (optional, page-0 dimensions)
 """
 from __future__ import annotations
 import io
 import sys
 import threading
 import xml.etree.ElementTree as ET
 import zipfile
 from datetime import datetime
 from pathlib import Path
 import requests
 # Shared modules live one level up (src/); needed when a module in this
 # folder is run directly as a script (the entry points set the path).
 if __name__ == "__main__":
    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 from ComicInfoBuilder import ComicInfoBuilder
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
                           _sanitize_dirname, _normalise_volume_value)
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from CronSchedule import CronSchedule
 from CoverCache import CoverCache, _IMAGE_EXTS
 try:
    from PIL import Image
    _HAS_PIL = True
 except ImportError:
    _HAS_PIL = False
 def _now() -> str:
    return datetime.now().isoformat(timespec="seconds")
 def _image_dims_from_bytes(data: bytes) -> tuple:
    """Returns (width, height) of an image byte blob, or (None, None)."""
    if not _HAS_PIL:
        return (None, None)
    try:
        with Image.open(io.BytesIO(data)) as im:
            return im.size
    except Exception:
        return (None, None)
 def _chapter_sort_value(num: str) -> float:
    try:
        return float(num)
    except (TypeError, ValueError):
        return float("inf")
 def _update_page0_attrs(pages_el: "ET.Element", cover_bytes: bytes) -> None:
    """Refreshes size/dimension attributes of the FrontCover page entry."""
    for page in pages_el:
        if page.get("Image") == "0":
            page.set("ImageSize", str(len(cover_bytes)))
            width, height = _image_dims_from_bytes(cover_bytes)
            if width and height:
                page.set("ImageWidth", str(width))
                page.set("ImageHeight", str(height))
            return
 def _serialize_tree(root: "ET.Element") -> str:
    tree = ET.ElementTree(root)
    try:
        ET.indent(tree, space="  ")
    except AttributeError:
        pass
    return ('<?xml version="1.0" encoding="UTF-8"?>\n'
            + ET.tostring(root, encoding="unicode"))
 class KavitaVolumeCoverUpdater:
    """
    Scans the Kavita library for chapters whose volume was unknown at move
    time and back-fills volume + volume cover once MangaDex / MangaBaka
    provide the data.  Runs periodically on a background thread.
    Parameters
    ----------
    kavita_path      : Root of the Kavita library (series folders inside).
    matches_cache    : MatchesCache — provides the MangaBaka series ID per
                       series (mandatory; folders without a match are skipped).
    language         : ComicInfo language (passed to ComicInfoBuilder).
    request_timeout  : HTTP timeout in seconds.
    log_path         : File that receives one line per updated chapter.
                       Default: <kavita_path>/volume_updater.log
    schedule         : Cron expression (5 fields) defining when scans run,
                       e.g. "0 19 * * 1,4" = 19:00 every Monday and
                       Thursday.  Evaluated in local time — set the TZ env
                       var inside Docker.  Default: "0 19 * * 1,4".
    cover_cache_dir  : Directory for the persistent cover cache.  None ->
                       temporary cache, deleted at process exit.
    """
    def __init__(self,
                 kavita_path,
                 *,
                 matches_cache: MatchesCache,
                 language: str = "en",
                 request_timeout: int = 30,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 log_path=None,
                 schedule: str = "0 19 * * 1,4",
                 cover_cache_dir=None):
        self._dst = Path(kavita_path)
        self._matches_cache = matches_cache
        self._language = language
        self._timeout = request_timeout
        self._api_base_url = api_base_url.rstrip("/")
        self._log_path = (Path(log_path) if log_path
                          else self._dst / "volume_updater.log")
        self._cron = CronSchedule(schedule)
        session = requests.Session()
        session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
        _apply_mangabaka_rate_limit(session)
        self._session = session
        self._mal = MALResolver(request_timeout=request_timeout)
        self._al  = AniListResolver(request_timeout=request_timeout)
        self._vol_resolver = MangaDexVolumeResolver(
            request_timeout=request_timeout, session=session)
        self._works_resolver = MangaBakaWorksResolver(
            api_base_url=api_base_url,
            request_timeout=request_timeout, session=session)
        self._cover_cache = CoverCache(
            cover_cache_dir, session=session, request_timeout=request_timeout)
        self._stop = threading.Event()
        self._thread: "threading.Thread | None" = None
    # ------------------------------------------------------------------
    # Cron API (mirrors SuwayomiFolderWatcher)
    # ------------------------------------------------------------------
    def start(self) -> None:
        """Starts the periodic scan thread.  Non-blocking."""
        if self._thread is not None and self._thread.is_alive():
            return
        self._stop.clear()
        self._thread = threading.Thread(
            target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
        self._thread.start()
        print(f"[{_now()}] [updater] scanning {self._dst} "
              f"on cron '{self._cron.expression}'", flush=True)
    def stop(self) -> None:
        """Stops the scan thread (current scan finishes its series first)."""
        self._stop.set()
        if self._thread is not None:
            self._thread.join(timeout=10)
    def wait(self) -> None:
        """Blocks the calling thread until stop() is invoked."""
        self._stop.wait()
    def _loop(self) -> None:
        while not self._stop.is_set():
            next_run = self._cron.next_after(datetime.now())
            wait = max(0.0, (next_run - datetime.now()).total_seconds())
            print(f"[{_now()}] [updater] next scheduled scan: "
                  f"{next_run.isoformat(timespec='minutes')}", flush=True)
            if self._stop.wait(wait):
                break
            try:
                summary = self.update_all()
                print(f"[{_now()}] [updater] scan done: "
                      f"{summary['series_updated']} series / "
                      f"{summary['chapters_updated']} chapters updated",
                      flush=True)
            except Exception as exc:
                print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
    # ------------------------------------------------------------------
    # Public scan API
    # ------------------------------------------------------------------
    def update_all(self) -> dict:
        """
        Scans every series folder under the Kavita root once.
        Returns {"series_scanned": n, "series_updated": n, "chapters_updated": n}.
        """
        summary = {"series_scanned": 0, "series_updated": 0,
                   "chapters_updated": 0}
        if not self._dst.is_dir():
            print(f"[updater] kavita path missing: {self._dst}", flush=True)
            return summary
        # The whole point of a scan is detecting volume assignments added
        # since the previous run — start from fresh API data, not the
        # process-lifetime resolver caches.
        self._vol_resolver.clear_cache()
        self._works_resolver.clear_cache()
        for series_dir in sorted(self._dst.iterdir()):
            if self._stop.is_set():
                break
            if not series_dir.is_dir():
                continue
            summary["series_scanned"] += 1
            try:
                updated = self.update_series(series_dir)
            except Exception as exc:
                print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
                continue
            if updated:
                summary["series_updated"] += 1
                summary["chapters_updated"] += updated
        return summary
    def update_series(self, series_dir: Path) -> int:
        """
        Updates one series folder.  Returns the number of updated chapters.
        Only chapters listed in chapter_index.json with ``"volume": null``
        are candidates; everything else costs no further host reads.
        """
        index = _load_chapter_index(series_dir)
        chapters: dict = index["chapter"]
        if not chapters:
            return 0
        missing = [num for num, e in chapters.items()
                   if isinstance(e, dict) and e.get("volume") is None]
        if not missing:
            return 0
        match_key, match = self._find_match_for_folder(series_dir.name)
        if not match or not match.get("mangabakaId"):
            print(f"[updater] {series_dir.name}: no matches.json entry — skip",
                  flush=True)
            return 0
        # Builder resolves metadata via the cached MangaBaka ID and gives us
        # the exact same chapter→volume logic the mover uses.
        builder = ComicInfoBuilder(
            match_key, chapter=missing[0],
            api_base_url=self._api_base_url,
            language=self._language,
            request_timeout=self._timeout,
            session=self._session,
            volume_resolver=self._vol_resolver,
            works_resolver=self._works_resolver,
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
            cover_cache=self._cover_cache,
        )
        md = builder.fetch_metadata()
        series_id = str(md.get("id") or "")
        # Resolve volumes for all null-volume chapters first (API only).
        updates: dict[str, dict] = {}   # num -> {"volume": str, "cover": tuple|None}
        for num in sorted(missing, key=_chapter_sort_value):
            builder.chapter = num
            try:
                volume = builder._determine_volume()
            except Exception:
                volume = None
            if not volume:
                continue
            updates[num] = {"volume": volume,
                            "cover": self._fetch_cover(series_id, volume)}
        if not updates:
            return 0
        first = min(chapters, key=_chapter_sort_value)
        updated = 0
        for num, up in updates.items():
            entry = chapters[num]
            cbz = series_dir / (entry.get("archiveName") or "")
            if not entry.get("archiveName") or not cbz.is_file():
                print(f"[updater] {series_dir.name} ch.{num}: archive missing "
                      f"({entry.get('archiveName')!r}) — skip", flush=True)
                continue
            # The first chapter gets a full metadata rebuild (Kavita reads
            # series metadata from it); other chapters only a volume edit.
            ok, cover_swapped = self._apply_update(
                cbz, builder, num,
                volume=up["volume"], cover=up["cover"],
                full_rebuild=(num == first))
            if not ok:
                continue
            entry["volume"] = _normalise_volume_value(up["volume"])
            updated += 1
            self._log(f"{series_dir.name} | chapter {num} -> volume "
                      f"{up['volume']} | cover "
                      f"{'replaced' if cover_swapped else 'kept'} | {cbz.name}")
        # Refresh the first chapter's metadata when any other chapter changed
        # (skip when it was already fully rebuilt in the loop above).
        if updated and first not in updates:
            first_entry = chapters.get(first) or {}
            cbz = series_dir / (first_entry.get("archiveName") or "")
            if first_entry.get("archiveName") and cbz.is_file():
                ok, _ = self._apply_update(
                    cbz, builder, first,
                    volume=None, cover=None, full_rebuild=True)
                if ok:
                    self._log(f"{series_dir.name} | chapter {first} | "
                              f"first-chapter metadata refreshed | {cbz.name}")
        if updated:
            _save_chapter_index(series_dir, index)
        return updated
    # ------------------------------------------------------------------
    # Matching Kavita folder -> matches.json entry
    # ------------------------------------------------------------------
    def _find_match_for_folder(self, folder_name: str) -> tuple:
        """
        Maps a Kavita series folder back to its matches.json entry.
        The folder was created as ``_sanitize_dirname(mangabaka_title)``, so
        the comparison sanitizes each entry's mangabakaName the same way.
        Falls back to the folderTitle (Suwayomi name) for robustness.
        Returns (match_key, entry) or (None, None).
        """
        target = folder_name.strip().casefold()
        matches = self._matches_cache.all()["matches"]
        for key, entry in matches.items():
            name = entry.get("mangabakaName") or ""
            if name and _sanitize_dirname(name).strip().casefold() == target:
                return key, entry
        for key, entry in matches.items():
            folder = entry.get("folderTitle") or key
            if _sanitize_dirname(folder).strip().casefold() == target:
                return key, entry
        return None, None
    # ------------------------------------------------------------------
    # Cover download
    # ------------------------------------------------------------------
    def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
        """
        Fetches the MangaBaka volume cover via the CoverCache (one download
        per unique URL, even across chapters sharing a volume).
        Returns ("000<ext>", bytes) or None when no cover is available.
        """
        try:
            url = self._works_resolver.get_cover_for_volume(series_id, volume)
        except Exception:
            url = None
        if not url:
            return None
        fetched = self._cover_cache.get(url)
        if not fetched:
            return None
        data, ext = fetched
        return (f"000{ext}", data)
    # ------------------------------------------------------------------
    # Archive update (single read + single write per archive)
    # ------------------------------------------------------------------
    def _apply_update(self, cbz_path: Path, builder: ComicInfoBuilder,
                      chapter_num: str, *,
                      volume, cover, full_rebuild: bool) -> tuple:
        """
        Rewrites one CBZ archive with an updated ComicInfo.xml and (when
        provided and a placeholder exists) a new cover image.
        Returns (ok, cover_swapped).
        """
        try:
            with zipfile.ZipFile(cbz_path, "r") as zin:
                try:
                    old_xml = zin.read("ComicInfo.xml")
                except KeyError:
                    old_xml = None
                if full_rebuild or old_xml is None:
                    new_xml = self._build_full_xml(
                        builder, chapter_num, old_xml, cover)
                else:
                    new_xml = self._edit_volume_xml(old_xml, volume, cover)
                    if new_xml is None:           # parse error -> full rebuild
                        new_xml = self._build_full_xml(
                            builder, chapter_num, None, cover)
                infos = zin.infolist()
                # Cover is only ever *replaced*: inserting one would shift
                # every <Pages> image index in the existing XML.
                has_placeholder = any(
                    Path(i.filename).stem == "000"
                    and Path(i.filename).suffix.lower() in _IMAGE_EXTS
                    for i in infos)
                swap_cover = cover is not None and has_placeholder
                tmp = cbz_path.with_suffix(cbz_path.suffix + ".tmp")
                wrote_xml = False
                with zipfile.ZipFile(tmp, "w", zipfile.ZIP_STORED) as zout:
                    for info in infos:
                        p = Path(info.filename)
                        if (swap_cover and p.stem == "000"
                                and p.suffix.lower() in _IMAGE_EXTS):
                            zout.writestr(cover[0], cover[1])
                        elif info.filename == "ComicInfo.xml":
                            zout.writestr("ComicInfo.xml", new_xml)
                            wrote_xml = True
                        else:
                            zout.writestr(info, zin.read(info.filename))
                    if not wrote_xml:
                        zout.writestr("ComicInfo.xml", new_xml)
            tmp.replace(cbz_path)
            return True, swap_cover
        except Exception as exc:
            print(f"[updater] {cbz_path.name}: update failed: {exc}",
                  flush=True)
            return False, False
    # ------------------------------------------------------------------
    # XML builders
    # ------------------------------------------------------------------
    def _edit_volume_xml(self, old_xml: bytes, volume,
                         cover) -> "str | None":
        """
        Sets <Volume> in an existing ComicInfo.xml and refreshes the
        FrontCover page attributes when the cover gets replaced.
        Returns None when the XML is unparseable.
        """
        try:
            root = ET.fromstring(old_xml)
        except ET.ParseError:
            return None
        el = root.find("Volume")
        if el is None:
            el = ET.SubElement(root, "Volume")
        el.text = str(volume)
        if cover is not None:
            pages = root.find("Pages")
            if pages is not None:
                _update_page0_attrs(pages, cover[1])
        return _serialize_tree(root)
    def _build_full_xml(self, builder: ComicInfoBuilder, chapter_num: str,
                        old_xml: "bytes | None", cover) -> str:
        """
        Rebuilds the complete ComicInfo.xml via ComicInfoBuilder (fresh
        MangaBaka/MAL metadata).  Suwayomi-derived fields and the <Pages>
        section are carried over from the previous XML.
        """
        builder.chapter = chapter_num   # also clears builder page state
        builder._suwayomi_data = (
            ComicInfoBuilder.read_comicinfo_fields(old_xml) if old_xml else {})
        root = builder._build_tree().getroot()
        if old_xml:
            try:
                old_root = ET.fromstring(old_xml)
            except ET.ParseError:
                old_root = None
            if old_root is not None:
                pages = old_root.find("Pages")
                if pages is not None and cover is not None:
                    _update_page0_attrs(pages, cover[1])
                page_count = old_root.find("PageCount")
                if page_count is not None:
                    root.append(page_count)
                if pages is not None:
                    root.append(pages)
        return _serialize_tree(root)
    # ------------------------------------------------------------------
    # Logging
    # ------------------------------------------------------------------
    def _log(self, msg: str) -> None:
        line = f"[{_now()}] {msg}"
        print(f"[updater] {msg}", flush=True)
        try:
            self._log_path.parent.mkdir(parents=True, exist_ok=True)
            with self._log_path.open("a", encoding="utf-8") as f:
                f.write(line + "\n")
        except OSError as exc:
            print(f"[updater] cannot write log file {self._log_path}: {exc}",
                  flush=True)
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
    # Local (no-Docker) smoke test.  Adjust paths to your environment.
    KAVITA_PATH  = r"\\192.168.2.2\root\ServerData\Kavita\test"
    MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
    updater = KavitaVolumeCoverUpdater(
        KAVITA_PATH,
        matches_cache=MatchesCache(MATCHES_PATH),
    )
    # One-shot scan (no cron thread):
    summary = updater.update_all()
    print(f"\n[updater] {summary}")
    # Or run on the cron schedule (default: 19:00 every Mon + Thu):
    # updater.start()
    # updater.wait()
@@ -43,7 +43,6 @@ Dependencies
 from __future__ import annotations
 import difflib
 import re
 import requests
@@ -0,0 +1,218 @@
 """
 matches_cache.py
 ================
 Persistent JSON cache that maps a normalised (lowercase) search title to the
 MangaBaka series it was matched against.
 Structure on disk::
    {
      "matches": {
        "<normalised lowercase key>": {
          "folderTitle":    "Original Folder Name",
          "mangabakaId":    "12345",
          "mangabakaName":  "One-Punch Man",
          "imageUrl":       "https://.../cover.jpg",
          "firstMatchTime": 1700000000
        },
        ...
      }
    }
 Keys are always stored lowercase so that folder names differing only in
 capitalisation (e.g. "[Oshi No Ko]" vs "[oshi no ko]") are treated as
 identical entries.  The original casing is preserved in the ``folderTitle``
 field and is used for display purposes (e.g. the web UI title link).
 The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
 search request, and is written back to disk on every mutation so a crash
 does not lose matches that were resolved in the current run.
 """
 from __future__ import annotations
 import json
 import threading
 import time
 from pathlib import Path
 def _norm_key(title: str) -> str:
    """Normalises a cache key to lowercase for case-insensitive deduplication."""
    return title.lower()
 class MatchesCache:
    def __init__(self, path):
        self._path = Path(path)
        self._lock = threading.RLock()
        self._data: dict = {"matches": {}}
        self._load()
    # ------------------------------------------------------------------
    # Public lookup / mutation API
    # ------------------------------------------------------------------
    def get(self, title: str) -> "dict | None":
        with self._lock:
            entry = self._data["matches"].get(_norm_key(title))
            return dict(entry) if entry else None
    def add(self, title: str, *,
            mangabaka_id,
            mangabaka_name: str,
            image_url: "str | None") -> dict:
        entry = {
            "folderTitle":    title,
            "mangabakaId":    str(mangabaka_id) if mangabaka_id is not None else "",
            "mangabakaName":  mangabaka_name or "",
            "imageUrl":       image_url or "",
            "firstMatchTime": int(time.time()),
        }
        with self._lock:
            self._data["matches"][_norm_key(title)] = entry
            self._save_unlocked()
        return dict(entry)
    def upsert(self, title: str, *,
               mangabaka_id=None,
               mangabaka_name=None,
               image_url=None,
               first_match_time=None) -> dict:
        norm = _norm_key(title)
        with self._lock:
            entry = self._data["matches"].get(norm)
            if entry is None:
                entry = {
                    "folderTitle":    title,
                    "mangabakaId":    "",
                    "mangabakaName":  "",
                    "imageUrl":       "",
                    "firstMatchTime": int(time.time()),
                }
                self._data["matches"][norm] = entry
            # folderTitle is only set on creation; preserve original casing on updates.
            if mangabaka_id is not None:
                entry["mangabakaId"] = str(mangabaka_id)
            if mangabaka_name is not None:
                entry["mangabakaName"] = mangabaka_name
            if image_url is not None:
                entry["imageUrl"] = image_url
            if first_match_time is not None:
                try:
                    entry["firstMatchTime"] = int(first_match_time)
                except (TypeError, ValueError):
                    pass
            self._save_unlocked()
            return dict(entry)
    def rename(self, old_title: str, new_title: str) -> bool:
        old_norm = _norm_key(old_title)
        new_norm = _norm_key(new_title)
        if not new_title or old_norm == new_norm:
            return False
        with self._lock:
            entry = self._data["matches"].pop(old_norm, None)
            if entry is None:
                return False
            entry["folderTitle"] = new_title
            self._data["matches"][new_norm] = entry
            self._save_unlocked()
            return True
    def remove(self, title: str) -> bool:
        norm = _norm_key(title)
        with self._lock:
            existed = norm in self._data["matches"]
            if existed:
                del self._data["matches"][norm]
                self._save_unlocked()
            return existed
    def all(self) -> dict:
        with self._lock:
            return {"matches": {k: dict(v)
                                for k, v in self._data["matches"].items()}}
    # ------------------------------------------------------------------
    # Internal IO
    # ------------------------------------------------------------------
    def _load(self) -> None:
        if not self._path.is_file():
            return
        try:
            with self._path.open("r", encoding="utf-8") as f:
                loaded = json.load(f)
        except (OSError, json.JSONDecodeError) as exc:
            print(f"[MatchesCache] failed to load {self._path}: {exc}",
                  flush=True)
            return
        if not isinstance(loaded, dict) or not isinstance(loaded.get("matches"), dict):
            return
        normalized, changed = self._normalize_on_load(loaded["matches"])
        loaded["matches"] = normalized
        self._data = loaded
        if changed:
            print(f"[MatchesCache] migrated {changed} entr{'y' if changed == 1 else 'ies'} "
                  f"(lowercase keys / folderTitle), saving", flush=True)
            self._save_unlocked()
    @staticmethod
    def _normalize_on_load(raw: dict) -> "tuple[dict, int]":
        """
        Normalises the raw matches dict loaded from disk.
        - Keys are lowercased.
        - ``folderTitle`` is added from the original key when missing.
        - Duplicate keys (same normalised form) are merged by keeping the
          entry with the higher ``firstMatchTime``.
        Returns (normalised_dict, number_of_changed_entries).
        """
        result: dict = {}
        changed = 0
        for orig_key, entry in raw.items():
            if not isinstance(entry, dict):
                continue
            norm = _norm_key(orig_key)
            entry = dict(entry)
            # Add folderTitle if absent
            if "folderTitle" not in entry:
                entry["folderTitle"] = orig_key
                changed += 1
            if norm != orig_key:
                changed += 1
            # Merge duplicates: keep data from the more recent entry, but
            # prefer the folderTitle that contains uppercase letters (= the
            # original folder name) regardless of which entry is newer.
            if norm in result:
                existing = result[norm]
                if entry.get("firstMatchTime", 0) > existing.get("firstMatchTime", 0):
                    # Newer entry wins for data; preserve better-cased folderTitle
                    existing_ft = existing.get("folderTitle", norm)
                    new_ft      = entry.get("folderTitle", norm)
                    if existing_ft != existing_ft.lower() and new_ft == new_ft.lower():
                        entry["folderTitle"] = existing_ft
                    result[norm] = entry
                else:
                    # Existing entry stays; but adopt new folderTitle if it has casing
                    existing_ft = existing.get("folderTitle", norm)
                    new_ft      = entry.get("folderTitle", norm)
                    if new_ft != new_ft.lower() and existing_ft == existing_ft.lower():
                        existing["folderTitle"] = new_ft
            else:
                result[norm] = entry
        return result, changed
    def _save_unlocked(self) -> None:
        self._path.parent.mkdir(parents=True, exist_ok=True)
        tmp = self._path.with_suffix(self._path.suffix + ".tmp")
        with tmp.open("w", encoding="utf-8") as f:
            json.dump(self._data, f, ensure_ascii=False, indent=2)
        tmp.replace(self._path)
@@ -0,0 +1,642 @@
 """
 matches_web_app.py
 ==================
 Flask web UI for inspecting and editing the matches.json file produced by
 MatchesCache.
 Routes
 ------
 GET  /                       HTML table view (one row per cached match)
 GET  /api/matches            JSON dump of the full cache
 POST /api/matches            Update an entry's mangabakaId
                             body: {title, mangabakaId}
                             Server resolves the id against MangaBaka and
                             refreshes the mangabakaName + imageUrl fields.
 POST /api/matches/delete     Remove an entry          body: {title}
 POST /api/build              Trigger a full re-scan via
                             SuwayomiMover.build_matches_only
 The Title cell is rendered as a link to MangaBaka's search page restricted
 to the manga / manhwa / manhua types. Only mangabakaId is editable; title
 (folder name) and mangabakaName (info only) are read-only.
 """
 from __future__ import annotations
 import threading
 from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 from ComicInfoBuilder import _pick_thumbnail_url
 _INDEX_HTML = """<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>MangaBaka matches</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
    button:disabled { opacity:.5; cursor:default; }
    table { border-collapse: collapse; width: 100%; }
    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
    th.sortable { cursor: pointer; user-select: none; }
    th.sortable:hover { background:#252525; }
    th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
    tr:nth-child(even) td { background: #161616; }
    td.image img { max-width: 90px; max-height: 130px; display:block; }
    td.id input { width: 14rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
    td.title a { color: #60a5fa; text-decoration: none; }
    td.title a:hover { text-decoration: underline; }
    td.actions { white-space: nowrap; }
    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
    .dirty td { background: #1f2937 !important; }
    .count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
  </style>
 </head>
 <body>
 <h1>MangaBaka matches <span id="count" class="count"></span></h1>
 <div class="bar">
  <input id="filter" type="search" placeholder="Filter by title…">
  <button id="reload">Reload</button>
  <button id="batchSave" class="primary">Save dirty (0)</button>
  <button id="build">Build all (rescan)</button>
  <button id="move">Start move</button>
  <a href="/perf" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
  <span class="status" id="status"></span>
 </div>
 <table>
  <thead>
    <tr>
      <th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
      <th>mangabakaId</th>
      <th>mangabakaName</th>
      <th class="sortable" data-col="firstMatchTime">firstMatchTime <span class="arrow" id="arrow-firstMatchTime"></span></th>
      <th>Image</th>
      <th></th>
    </tr>
  </thead>
  <tbody id="rows"></tbody>
 </table>
 <script>
 const TYPES = "&type=manhwa&type=manhua&type=manga";
 let matchesData = {};
 let currentSort = { col: "title", asc: true };
 function fmtTime(unix) {
  if (!unix) return "";
  const d = new Date(unix * 1000);
  return d.toLocaleString();
 }
 function searchUrl(title) {
  return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
 }
 function setStatus(msg) { document.getElementById("status").textContent = msg; }
 function updateDirtyCount() {
  const n = document.querySelectorAll("#rows tr.dirty").length;
  const btn = document.getElementById("batchSave");
  btn.textContent = "Save dirty (" + n + ")";
  btn.disabled = n === 0;
 }
 function makeRow(title, e) {
  const tr = document.createElement("tr");
  tr.dataset.title = title;
  const displayTitle = e.folderTitle || title;
  tr.dataset.folderTitle = displayTitle;
  // Title — link only, not editable; shows folderTitle (original casing)
  const titleTd = document.createElement("td");
  titleTd.className = "title";
  const titleLink = document.createElement("a");
  titleLink.href = searchUrl(displayTitle);
  titleLink.target = "_blank";
  titleLink.rel = "noopener";
  titleLink.textContent = displayTitle;
  titleTd.appendChild(titleLink);
  tr.appendChild(titleTd);
  // mangabakaId — editable
  const idTd = document.createElement("td");
  idTd.className = "id";
  const idInp = document.createElement("input");
  idInp.value = e.mangabakaId || "";
  idInp.dataset.original = e.mangabakaId || "";
  idInp.addEventListener("input", () => {
    if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
    else tr.classList.remove("dirty");
    updateDirtyCount();
  });
  idTd.appendChild(idInp);
  tr.appendChild(idTd);
  // mangabakaName — plain text (info only)
  const nameTd = document.createElement("td");
  nameTd.className = "name";
  nameTd.textContent = e.mangabakaName || "";
  tr.appendChild(nameTd);
  // firstMatchTime — plain text
  const timeTd = document.createElement("td");
  timeTd.textContent = fmtTime(e.firstMatchTime);
  tr.appendChild(timeTd);
  // Image
  const imgTd = document.createElement("td");
  imgTd.className = "image";
  const img = document.createElement("img");
  img.src = e.imageUrl || "";
  img.alt = "";
  img.loading = "lazy";
  imgTd.appendChild(img);
  tr.appendChild(imgTd);
  // Actions
  const actTd = document.createElement("td");
  actTd.className = "actions";
  const save = document.createElement("button");
  save.textContent = "Save";
  save.className = "primary";
  save.addEventListener("click", () => saveRow(tr));
  const del = document.createElement("button");
  del.textContent = "Delete";
  del.className = "danger";
  del.style.marginLeft = ".25rem";
  del.addEventListener("click", () => deleteRow(tr));
  actTd.append(save, del);
  tr.appendChild(actTd);
  tr._idInp = idInp;
  tr._nameTd = nameTd;
  tr._img = img;
  return tr;
 }
 async function saveRow(tr) {
  const title = tr.dataset.title;
  const newId = tr._idInp.value.trim();
  setStatus("Saving " + (tr.dataset.folderTitle || title) + "…");
  try {
    const r = await fetch("/api/matches", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ title: title, mangabakaId: newId }),
    });
    if (!r.ok) throw new Error(await r.text());
    const data = await r.json();
    const entry = data.entry || {};
    matchesData[title] = entry;
    tr._idInp.value = entry.mangabakaId || "";
    tr._idInp.dataset.original = entry.mangabakaId || "";
    tr._nameTd.textContent = entry.mangabakaName || "";
    tr._img.src = entry.imageUrl || "";
    tr.classList.remove("dirty");
    updateDirtyCount();
    setStatus("Saved " + (tr.dataset.folderTitle || title));
    return true;
  } catch (err) {
    setStatus("Save failed (" + title + "): " + err.message);
    return false;
  }
 }
 async function deleteRow(tr) {
  const title = tr.dataset.title;
  if (!confirm("Delete " + title + "?")) return;
  setStatus("Deleting " + title + "…");
  try {
    const r = await fetch("/api/matches/delete", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ title: title }),
    });
    if (!r.ok) throw new Error(await r.text());
    delete matchesData[title];
    tr.remove();
    updateDirtyCount();
    document.getElementById("count").textContent =
        "(" + Object.keys(matchesData).length + " entries)";
    setStatus("Deleted");
  } catch (err) {
    setStatus("Delete failed: " + err.message);
  }
 }
 async function batchSave() {
  const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
  if (dirty.length === 0) return;
  if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
  setStatus("Batch saving " + dirty.length + " rows…");
  let ok = 0, fail = 0;
  for (const tr of dirty) {
    const success = await saveRow(tr);
    if (success) ok++; else fail++;
  }
  setStatus("Batch: " + ok + " ok, " + fail + " failed");
 }
 function sortedTitles() {
  const titles = Object.keys(matchesData);
  const dir = currentSort.asc ? 1 : -1;
  if (currentSort.col === "title") {
    return titles.sort((a, b) => {
      const fa = (matchesData[a].folderTitle || a).toLowerCase();
      const fb = (matchesData[b].folderTitle || b).toLowerCase();
      return fa.localeCompare(fb) * dir;
    });
  }
  if (currentSort.col === "firstMatchTime") {
    return titles.sort((a, b) => {
      const av = matchesData[a].firstMatchTime || 0;
      const bv = matchesData[b].firstMatchTime || 0;
      return (av - bv) * dir;
    });
  }
  return titles;
 }
 function updateSortArrows() {
  for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
  const id = "arrow-" + currentSort.col;
  const el = document.getElementById(id);
  if (el) el.textContent = currentSort.asc ? "▲" : "▼";
 }
 function render() {
  const tbody = document.getElementById("rows");
  tbody.innerHTML = "";
  for (const t of sortedTitles()) {
    tbody.appendChild(makeRow(t, matchesData[t]));
  }
  updateSortArrows();
  applyFilter();
  updateDirtyCount();
  document.getElementById("count").textContent =
      "(" + Object.keys(matchesData).length + " entries)";
 }
 async function load() {
  setStatus("Loading…");
  try {
    const r = await fetch("/api/matches");
    const data = await r.json();
    matchesData = data.matches || {};
    render();
    setStatus(Object.keys(matchesData).length + " entries");
  } catch (err) {
    setStatus("Load failed: " + err.message);
  }
 }
 function applyFilter() {
  const q = document.getElementById("filter").value.toLowerCase();
  for (const tr of document.querySelectorAll("#rows tr")) {
    const t = (tr.dataset.folderTitle || tr.dataset.title).toLowerCase();
    tr.style.display = t.includes(q) ? "" : "none";
  }
 }
 document.getElementById("filter").addEventListener("input", applyFilter);
 document.getElementById("reload").addEventListener("click", load);
 document.getElementById("batchSave").addEventListener("click", batchSave);
 document.getElementById("build").addEventListener("click", async () => {
  if (!confirm("Run full scan? This may take several minutes.")) return;
  setStatus("Building… (running on the server)");
  try {
    const r = await fetch("/api/build", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    setStatus("Build finished");
    load();
  } catch (err) {
    setStatus("Build failed: " + err.message);
  }
 });
 document.getElementById("move").addEventListener("click", async () => {
  if (!confirm("Start move operation? This will process all series and may take a long time.")) return;
  const btn = document.getElementById("move");
  btn.disabled = true;
  setStatus("Moving… (running on the server)");
  try {
    const r = await fetch("/api/move", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    const data = await r.json();
    const total = Object.keys(data.results || {}).length;
    setStatus("Move finished — " + total + " series processed");
  } catch (err) {
    setStatus("Move failed: " + err.message);
  } finally {
    btn.disabled = false;
  }
 });
 for (const th of document.querySelectorAll("th.sortable")) {
  th.addEventListener("click", () => {
    const col = th.dataset.col;
    if (currentSort.col === col) currentSort.asc = !currentSort.asc;
    else { currentSort.col = col; currentSort.asc = true; }
    render();
  });
 }
 load();
 </script>
 </body>
 </html>
 """
 _PERF_HTML = """<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>Move performance</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    h2    { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
    a     { color:#60a5fa; text-decoration:none; }
    a:hover { text-decoration:underline; }
    .bar  { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
    select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
    .summary { color:#9ca3af; margin:.3rem 0 1rem; }
    table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
    th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
    th { background:#1d1d1d; }
    td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
    .barcell { position:relative; }
    .barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
    .barcell span { position:relative; z-index:1; }
    details { margin:.3rem 0; }
    summary { cursor:pointer; padding:.25rem 0; }
    .chip { color:#9ca3af; font-size:.85rem; }
    .err  { color:#f87171; }
  </style>
 </head>
 <body>
 <h1>Move performance <a href="/" style="font-size:.9rem;">◂ back to matches</a></h1>
 <div class="bar">
  <label>Run: <select id="runSelect"></select></label>
  <button id="reload">Reload</button>
  <span class="summary" id="summary"></span>
 </div>
 <div id="content"></div>
 <script>
 let runs = [];
 function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
 function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
 function stepTable(totals, grandTotal) {
  const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
  if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
  const max = entries[0][1] || 1;
  let rows = "";
  for (const [name, secs] of entries) {
    const pct = grandTotal ? (secs / grandTotal * 100) : 0;
    const w = (secs / max * 100);
    rows += "<tr><td>" + name + "</td>"
         + "<td class='num'>" + fmtSecs(secs) + "</td>"
         + "<td class='num'>" + pct.toFixed(1) + "%</td>"
         + "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
         + "<span>&nbsp;</span></td></tr>";
  }
  return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
       + "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
       + rows + "</tbody></table>";
 }
 function seriesBlock(s) {
  let chapters = "";
  // Chapters sorted slowest first to surface outliers.
  const chs = (s.chapters || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
  for (const c of chs) {
    const steps = Object.entries(c.steps || {}).sort((a, b) => b[1] - a[1])
      .map(([n, v]) => n + " " + fmtSecs(v)).join(", ");
    chapters += "<tr><td>" + c.chapter + (c.ok ? "" : " <span class=err>(failed)</span>") + "</td>"
             + "<td class='num'>" + fmtSecs(c.totalSeconds) + "</td>"
             + "<td>" + steps + "</td></tr>";
  }
  const seriesSteps = Object.entries(s.steps || {})
    .map(([n, v]) => n + " " + fmtSecs(v)).join(", ") || "—";
  return "<details><summary><b>" + s.title + "</b> "
       + "<span class=chip>" + fmtSecs(s.totalSeconds) + " · "
       + (s.chapterCount || 0) + " chapters · " + seriesSteps + "</span></summary>"
       + "<table><thead><tr><th>Chapter</th><th class=num>Total</th>"
       + "<th>Steps</th></tr></thead><tbody>" + chapters + "</tbody></table></details>";
 }
 function renderRun(run) {
  const c = document.getElementById("content");
  if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
  document.getElementById("summary").textContent =
    fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
    + run.seriesCount + " series · " + run.chapterCount + " chapters";
  let html = "<h2>Chapter steps (summed over all chapters)</h2>"
           + stepTable(run.stepTotals, run.totalSeconds)
           + "<h2>Series steps (metadata / person sync)</h2>"
           + stepTable(run.seriesStepTotals, run.totalSeconds)
           + "<h2>Series detail</h2>";
  const series = (run.series || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
  html += series.map(seriesBlock).join("");
  c.innerHTML = html;
 }
 function renderSelect() {
  const sel = document.getElementById("runSelect");
  sel.innerHTML = "";
  runs.forEach((r, i) => {
    const o = document.createElement("option");
    o.value = i;
    o.textContent = fmtTime(r.startedAt) + "  (" + fmtSecs(r.totalSeconds) + ")";
    sel.appendChild(o);
  });
 }
 async function load() {
  const r = await fetch("/api/perf");
  const data = await r.json();
  runs = data.runs || [];
  renderSelect();
  renderRun(runs[0]);
 }
 document.getElementById("runSelect").addEventListener("change", e => {
  renderRun(runs[e.target.value]);
 });
 document.getElementById("reload").addEventListener("click", load);
 load();
 </script>
 </body>
 </html>
 """
 class MatchesWebApp:
    """
    Flask app exposing the MatchesCache. `mover` is required when you want
    POST /api/matches to resolve a new mangabakaId against MangaBaka (it
    uses the mover's rate-limited session) and when POST /api/build should
    work.
    """
    def __init__(self, cache: MatchesCache, *,
                 mover=None,
                 perf_stats=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._mover = mover
        self._perf = perf_stats
        self._host = host
        self._port = port
        self._build_lock = threading.Lock()
        self._move_lock  = threading.Lock()
        self._app = Flask(__name__)
        self._thread: "threading.Thread | None" = None
        self._register_routes()
    @property
    def app(self) -> Flask:
        return self._app
    def start(self) -> threading.Thread:
        """
        Starts the Flask server on a background thread and returns it.
        The thread is non-daemon so the process stays alive even when the
        caller does not explicitly join() — important when this is the
        only foreground task (e.g. watcher disabled for testing).
        """
        if self._thread is not None and self._thread.is_alive():
            return self._thread
        self._thread = threading.Thread(
            target=self._app.run,
            kwargs={"host": self._host, "port": self._port,
                    "debug": False, "use_reloader": False,
                    "threaded": True},
            name="MatchesWebApp",
            daemon=False,
        )
        self._thread.start()
        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
              flush=True)
        return self._thread
    def wait(self) -> None:
        """Blocks until the Flask thread exits."""
        if self._thread is not None:
            self._thread.join()
    # ------------------------------------------------------------------
    # Routes
    # ------------------------------------------------------------------
    def _register_routes(self) -> None:
        app = self._app
        cache = self._cache
        @app.get("/")
        def index() -> Response:
            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
        @app.get("/api/matches")
        def api_list():
            return jsonify(cache.all())
        @app.post("/api/matches")
        def api_upsert():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            new_id_raw = body.get("mangabakaId")
            new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
            if not new_id:
                return Response("mangabakaId is required", status=400)
            # Resolve the id against MangaBaka so mangabakaName + imageUrl
            # always reflect what the id actually points to.
            new_name: "str | None" = None
            new_image: "str | None" = None
            if self._mover is not None:
                try:
                    series = self._mover.fetch_series(new_id)
                except Exception as exc:
                    return Response(f"resolve failed: {exc}", status=502)
                if not series:
                    return Response(
                        f"MangaBaka has no series with id {new_id}",
                        status=404)
                new_name  = series.get("title") or ""
                new_image = _pick_thumbnail_url(series.get("cover")) or ""
            entry = cache.upsert(
                title,
                mangabaka_id=new_id,
                mangabaka_name=new_name,
                image_url=new_image,
            )
            return jsonify({"title": title, "entry": entry})
        @app.post("/api/matches/delete")
        def api_delete():
            body = request.get_json(silent=True) or {}
            title = (body.get("title") or "").strip()
            if not title:
                return Response("title is required", status=400)
            removed = cache.remove(title)
            return jsonify({"removed": removed, "title": title})
        @app.post("/api/build")
        def api_build():
            if self._mover is None:
                return Response("no mover configured", status=503)
            if not self._build_lock.acquire(blocking=False):
                return Response("build already running", status=409)
            try:
                result = self._mover.build_matches_only()
            except Exception as exc:
                return Response(f"build failed: {exc}", status=500)
            finally:
                self._build_lock.release()
            return jsonify(result)
        @app.post("/api/move")
        def api_move():
            if self._mover is None:
                return Response("no mover configured", status=503)
            if not self._move_lock.acquire(blocking=False):
                return Response("move already running", status=409)
            try:
                results = self._mover.process_all()
            except Exception as exc:
                return Response(f"move failed: {exc}", status=500)
            finally:
                self._move_lock.release()
            return jsonify({"results": results})
        @app.get("/perf")
        def perf_page() -> Response:
            return Response(_PERF_HTML, mimetype="text/html; charset=utf-8")
        @app.get("/api/perf")
        def api_perf():
            if self._perf is None:
                return jsonify({"runs": []})
            return jsonify(self._perf.all())
@@ -0,0 +1,242 @@
 """
 perf_stats.py
 =============
 Lightweight performance profiler for the Suwayomi -> Kavita move pipeline.
 It records, per move run, how long each step of every chapter takes plus
 per-series and per-run totals, so a slowdown can be traced to the step
 responsible (cover download, image-dimension probing, CBZ packing, …).
 Data model (one entry per run, newest first)::
    {
      "runs": [
        {
          "startedAt":    1700000000,        # unix seconds
          "finishedAt":   1700000123,
          "totalSeconds": 123.4,             # wall clock of the whole run
          "seriesCount":  2,
          "chapterCount": 31,
          "stepTotals":   {                  # summed over ALL chapters
            "cover": 41.2, "image_dimensions": 55.8, "pack_cbz": 18.1, ...
          },
          "seriesStepTotals": {              # summed over ALL series
            "fetch_metadata": 2.4, "person_sync": 9.7
          },
          "series": [
            {
              "title": "Call of the Night",
              "totalSeconds": 60.2,
              "chapterCount": 20,
              "steps": {"fetch_metadata": 1.2, "person_sync": 3.4},
              "chapters": [
                {"chapter": "1", "ok": true, "totalSeconds": 11.5,
                 "steps": {"cover": 1.8, "image_dimensions": 4.2, ...}}
              ]
            }
          ]
        }
      ]
    }
 Usage from the mover::
    perf = PerfStats(path)                 # path=None -> disabled (no-op)
    run = perf.begin_run()
    series = run.begin_series("Title")
    with series.measure("fetch_metadata"):
        ...
    chap = series.begin_chapter("1")
    with chap.measure("pack_cbz"):
        ...
    chap.finish(ok=True)
    series.finish()
    run.finish()                           # persists the run to disk
 When ``path`` is None every recorder is a no-op and nothing is written,
 so the profiler can be left permanently wired in with negligible cost.
 """
 from __future__ import annotations
 import json
 import threading
 import time
 from contextlib import contextmanager
 from pathlib import Path
 # Keep the JSON small: only the most recent runs are retained on disk.
 _MAX_RUNS = 30
 class _StepTimer:
    """
    Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
    wall-clock lifetime.  ``enabled=False`` turns every method into a no-op.
    """
    def __init__(self, enabled: bool = True):
        self.steps: dict[str, float] = {}
        self._enabled = enabled
        self._t0 = time.monotonic()
    @contextmanager
    def measure(self, name: str):
        """Context manager timing a named step (accumulates on repeat use)."""
        if not self._enabled:
            yield
            return
        start = time.monotonic()
        try:
            yield
        finally:
            self.steps[name] = round(
                self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
    def elapsed(self) -> float:
        return round(time.monotonic() - self._t0, 4)
 class ChapterRecorder(_StepTimer):
    """Per-chapter step timer."""
    def __init__(self, series: "SeriesRecorder", chapter: str,
                 enabled: bool = True):
        super().__init__(enabled)
        self._series = series
        self._chapter = chapter
        self._ok = True
    def finish(self, *, ok: bool = True) -> None:
        self._ok = ok
        if not self._enabled:
            return
        self._series._chapters.append({
            "chapter":      self._chapter,
            "ok":           ok,
            "totalSeconds": self.elapsed(),
            "steps":        self.steps,
        })
 class SeriesRecorder(_StepTimer):
    """Per-series step timer; also collects its chapters."""
    def __init__(self, run: "RunRecorder", title: str, enabled: bool = True):
        super().__init__(enabled)
        self._run = run
        self._title = title
        self._chapters: list[dict] = []
    def begin_chapter(self, chapter: str) -> ChapterRecorder:
        return ChapterRecorder(self, chapter, enabled=self._enabled)
    def finish(self) -> None:
        if not self._enabled:
            return
        self._run._series.append({
            "title":        self._title,
            "totalSeconds": self.elapsed(),
            "chapterCount": len(self._chapters),
            "steps":        self.steps,
            "chapters":     self._chapters,
        })
 class RunRecorder:
    """Top-level recorder for one full move run."""
    def __init__(self, stats: "PerfStats", enabled: bool = True):
        self._stats = stats
        self._enabled = enabled
        self._series: list[dict] = []
        self._started = time.time()
        self._t0 = time.monotonic()
    def begin_series(self, title: str) -> SeriesRecorder:
        return SeriesRecorder(self, title, enabled=self._enabled)
    def finish(self) -> dict | None:
        """Aggregates the run and persists it.  Returns the run dict."""
        if not self._enabled:
            return None
        step_totals: dict[str, float] = {}
        series_step_totals: dict[str, float] = {}
        chapter_count = 0
        for s in self._series:
            for step, secs in s["steps"].items():
                series_step_totals[step] = round(
                    series_step_totals.get(step, 0.0) + secs, 4)
            for ch in s["chapters"]:
                chapter_count += 1
                for step, secs in ch["steps"].items():
                    step_totals[step] = round(
                        step_totals.get(step, 0.0) + secs, 4)
        run = {
            "startedAt":         round(self._started),
            "finishedAt":        round(time.time()),
            "totalSeconds":      round(time.monotonic() - self._t0, 4),
            "seriesCount":       len(self._series),
            "chapterCount":      chapter_count,
            "stepTotals":        step_totals,
            "seriesStepTotals":  series_step_totals,
            "series":            self._series,
        }
        self._stats._append_run(run)
        return run
 class PerfStats:
    """
    Profiler facade + JSON persistence.
    Parameters
    ----------
    path : Destination JSON file.  None disables the profiler entirely
           (every recorder becomes a no-op and nothing is written).
    """
    def __init__(self, path=None):
        self._path = Path(path) if path else None
        self._lock = threading.Lock()
    @property
    def enabled(self) -> bool:
        return self._path is not None
    def begin_run(self) -> RunRecorder:
        return RunRecorder(self, enabled=self.enabled)
    # ------------------------------------------------------------------
    # Read / write
    # ------------------------------------------------------------------
    def all(self) -> dict:
        """Returns the persisted runs ({"runs": [...]}); newest first."""
        if not self._path or not self._path.is_file():
            return {"runs": []}
        try:
            with self._path.open("r", encoding="utf-8") as f:
                data = json.load(f)
        except (OSError, json.JSONDecodeError):
            return {"runs": []}
        if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
            return {"runs": []}
        return data
    def _append_run(self, run: dict) -> None:
        if not self._path:
            return
        with self._lock:
            data = self.all()
            runs = data["runs"]
            runs.insert(0, run)             # newest first
            del runs[_MAX_RUNS:]            # cap history
            self._path.parent.mkdir(parents=True, exist_ok=True)
            tmp = self._path.with_suffix(self._path.suffix + ".tmp")
            with tmp.open("w", encoding="utf-8") as f:
                json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
            tmp.replace(self._path)
@@ -29,7 +29,6 @@ from __future__ import annotations
 import queue
 import threading
 import time
 from datetime import datetime
 from pathlib import Path
@@ -43,26 +43,93 @@ Dependencies
 from __future__ import annotations
 import json
 import re
 import shutil
 import sys
 import xml.etree.ElementTree as ET
 import zipfile
 from pathlib import Path
 import requests
-from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
+# Shared modules live one level up (src/); needed when a module in this
 # folder is run directly as a script (the entry points set the path).
 if __name__ == "__main__":
    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
 from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
                              _SEARCH_TYPES, _natural_key)
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from KavitaClient import KavitaClient
 from KavitaPersonUpdater import KavitaPersonUpdater
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from CoverCache import CoverCache, _IMAGE_EXTS
 from PerfStats import PerfStats
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
 _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
 # JSON file written into each Kavita series folder, listing every chapter
 # already moved.  Avoids opening CBZ archives to determine what is present.
 # Absence is interpreted as "folder empty" (per spec), not "scan the folder".
 _CHAPTER_INDEX_FILENAME = "chapter_index.json"
 def _normalise_volume_value(value):
    """
    Normalises a volume identifier for storage in chapter_index.json.
    Returns int when the value is a whole number, float for fractional
    volumes, None when missing.  Mirrors how the user wants volumes
    rendered (``"volume": 1`` rather than ``"volume": "1"``).
    """
    if value is None:
        return None
    text = str(value).strip()
    if not text:
        return None
    try:
        f = float(text)
        return int(f) if f.is_integer() else f
    except (TypeError, ValueError):
        return text
 def _load_chapter_index(dest_series: Path) -> dict:
    """
    Reads chapter_index.json from a Kavita series folder.
    Returns ``{"chapter": {}}`` when the file is missing or unreadable —
    per the project spec, absence means "no chapters are present yet".
    """
    path = dest_series / _CHAPTER_INDEX_FILENAME
    if not path.is_file():
        return {"chapter": {}}
    try:
        with path.open("r", encoding="utf-8") as f:
            data = json.load(f)
    except (OSError, json.JSONDecodeError) as exc:
        print(f"  [warn] chapter_index unreadable ({path.name}): {exc} — "
              f"treating folder as empty")
        return {"chapter": {}}
    if not isinstance(data, dict) or not isinstance(data.get("chapter"), dict):
        return {"chapter": {}}
    return data
 def _save_chapter_index(dest_series: Path, index: dict) -> None:
    """Writes chapter_index.json atomically into a Kavita series folder."""
    path = dest_series / _CHAPTER_INDEX_FILENAME
    tmp = path.with_suffix(path.suffix + ".tmp")
    with tmp.open("w", encoding="utf-8") as f:
        json.dump(index, f, ensure_ascii=False, indent=2)
    tmp.replace(path)
 # Parenthetical source labels that Suwayomi appends to series names.
 # These are not part of the actual title and confuse MangaBaka searches.
 _SOURCE_LABEL_RE = re.compile(
@@ -75,11 +142,6 @@ _SOURCE_LABEL_RE = re.compile(
 _WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
 def _natural_key(name: str) -> list:
    return [int(p) if p.isdigit() else p.lower()
            for p in re.split(r"(\d+)", name)]
 def _sanitize_dirname(name: str) -> str:
    """
    Makes a string safe to use as a Windows (or SMB) directory name.
@@ -134,34 +196,6 @@ def _clean_suwayomi_title(title: str) -> str:
    return _SOURCE_LABEL_RE.sub("", title).strip()
 def _mal_id_from_metadata(md: dict) -> "int | None":
    """Extracts the MAL ID from a MangaBaka series dict's source map."""
    for raw_key, info in (md.get("source") or {}).items():
        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
            if isinstance(info, dict):
                mal_id = info.get("id")
                if mal_id is not None:
                    try:
                        return int(mal_id)
                    except (TypeError, ValueError):
                        pass
    return None
 def _al_id_from_metadata(md: dict) -> "int | None":
    """Extracts the AniList ID from a MangaBaka series dict's source map."""
    for raw_key, info in (md.get("source") or {}).items():
        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
            if isinstance(info, dict):
                al_id = info.get("id")
                if al_id is not None:
                    try:
                        return int(al_id)
                    except (TypeError, ValueError):
                        pass
    return None
 def _chapter_image_size(chapter_dir: Path) -> int:
    """Returns the total file size of all images in a chapter folder."""
    return sum(
@@ -278,6 +312,10 @@ class SuwayomiMover:
    language        : ComicInfo LanguageISO and SeriesSort language ("en").
    request_timeout : HTTP timeout in seconds for all API / image requests.
    delete_source   : Remove the source chapter folder after successful pack.
    cover_cache_dir : Directory for the persistent cover cache.  None ->
                      temporary cache, deleted at process exit.
    perf_stats      : Optional PerfStats instance for per-step timing.  None
                      (default) disables profiling.
    """
    def __init__(self,
@@ -290,7 +328,9 @@ class SuwayomiMover:
                 request_timeout: int = 30,
                 delete_source: bool = True,
                 matches_cache: "MatchesCache | None" = None,
-                 api_base_url: str = "https://api.mangabaka.dev/v1"):
+                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 cover_cache_dir=None,
                 perf_stats: "PerfStats | None" = None):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
@@ -298,11 +338,14 @@ class SuwayomiMover:
        self._delete_source = delete_source
        self._matches_cache = matches_cache
        self._api_base_url = api_base_url.rstrip("/")
        self._perf = perf_stats or PerfStats(None)
        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
        session = requests.Session()
        session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
        # Throttle every call to api.mangabaka.dev (>=1s gap + retry on 429).
        _apply_mangabaka_rate_limit(session)
        self._session = session
        self._mal = MALResolver(request_timeout=request_timeout)
@@ -311,14 +354,18 @@ class SuwayomiMover:
            request_timeout=request_timeout, session=session)
        self._works_resolver = MangaBakaWorksResolver(
            request_timeout=request_timeout, session=session)
        self._cover_cache = CoverCache(
            cover_cache_dir, session=session, request_timeout=request_timeout)
        self._person_updater: "KavitaPersonUpdater | None" = None
        if kavita_base_url and kavita_api_key:
-            self._person_updater = KavitaPersonUpdater(
+            kavita_client = KavitaClient(
                kavita_base_url, kavita_api_key,
                mal_resolver=self._mal,
                al_resolver=self._al,
                request_timeout=request_timeout)
            self._person_updater = KavitaPersonUpdater(
                kavita_client,
                mal_resolver=self._mal,
                al_resolver=self._al)
    # ------------------------------------------------------------------
    # Public API
@@ -334,6 +381,8 @@ class SuwayomiMover:
        dict from _process_series_dir.
        """
        results: dict = {}
        run = self._perf.begin_run()
        try:
            for source_dir in sorted(self._src.iterdir()):
                if not source_dir.is_dir():
                    continue
@@ -342,7 +391,9 @@ class SuwayomiMover:
                        continue
                    title = manga_dir.name
                    print(f"[SuwayomiMover] {title}")
-                results[title] = self._process_series_dir(manga_dir)
+                    results[title] = self._process_series_dir(manga_dir, run)
        finally:
            run.finish()
        return results
    def process_series(self, manga_title: str) -> dict:
@@ -358,10 +409,26 @@ class SuwayomiMover:
                continue
            candidate = source_dir / manga_title
            if candidate.is_dir():
-                return self._process_series_dir(candidate)
+                run = self._perf.begin_run()
                try:
                    return self._process_series_dir(candidate, run)
                finally:
                    run.finish()
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")
    def fetch_series(self, series_id) -> "dict | None":
        """
        Fetches a MangaBaka series by id via the shared (rate-limited) session.
        Returns the inner `data` dict, or None if not found / empty.
        """
        if series_id is None or str(series_id).strip() == "":
            return None
        url = f"{self._api_base_url}/series/{series_id}"
        resp = self._session.get(url, timeout=self._timeout)
        resp.raise_for_status()
        return resp.json().get("data")
    def build_matches_only(self) -> dict:
        """
        Walks every series under the Suwayomi root and resolves each one
@@ -410,7 +477,8 @@ class SuwayomiMover:
                try:
                    resp = self._session.get(
                        search_url,
-                        params={"q": builder_title, "page": 1, "limit": 1},
+                        params={"q": builder_title, "type": _SEARCH_TYPES,
                                "page": 1, "limit": 1},
                        timeout=self._timeout)
                    resp.raise_for_status()
                    data = resp.json().get("data") or []
@@ -422,7 +490,7 @@ class SuwayomiMover:
                        builder_title,
                        mangabaka_id=series.get("id"),
                        mangabaka_name=series.get("title") or "",
-                        image_url=_pick_cover_url(series.get("cover")),
+                        image_url=_pick_thumbnail_url(series.get("cover")),
                    )
                except Exception as exc:
                    print(f"  [warn] search failed for {builder_title!r}: {exc}")
@@ -432,8 +500,9 @@ class SuwayomiMover:
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
-    def _process_series_dir(self, manga_dir: Path) -> dict:
+    def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
        manga_title = manga_dir.name
        series_rec = (run or self._perf.begin_run()).begin_series(manga_title)
        chapter_dirs = sorted(
            (d for d in manga_dir.iterdir() if d.is_dir()),
@@ -477,12 +546,14 @@ class SuwayomiMover:
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
            cover_cache=self._cover_cache,
        )
        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.
        md: "dict | None" = None
        mangabaka_title = manga_title
        try:
            with series_rec.measure("fetch_metadata"):
                md = builder.fetch_metadata()
            mangabaka_title = md.get("title") or manga_title
        except Exception as exc:
@@ -493,24 +564,50 @@ class SuwayomiMover:
        dest_series = self._dst / _sanitize_dirname(mangabaka_title)
        dest_series.mkdir(parents=True, exist_ok=True)
        # Skip chapters that have already been moved to Kavita.  The index
        # file in the destination folder is the authoritative source — we
        # never open CBZ archives or stat them individually.
        chapter_index = _load_chapter_index(dest_series)
        already_moved = chapter_index["chapter"]
        skipped: list[tuple[Path, str]] = []
        pending:  list[tuple[Path, dict, str]] = []
        for item in chapter_items:
            chapter_dir, _fields, chapter_num = item
            if chapter_num in already_moved:
                skipped.append((chapter_dir, chapter_num))
            else:
                pending.append(item)
        for chapter_dir, chapter_num in skipped:
            print(f"  Chapter {chapter_num}: skip (already in Kavita)")
            if self._delete_source:
                shutil.rmtree(chapter_dir, ignore_errors=True)
        chapter_results: list[dict] = []
-        for chapter_dir, _fields, chapter_num in chapter_items:
+        for chapter_dir, _fields, chapter_num in pending:
            result = self._process_chapter(
-                builder, chapter_num, chapter_dir, dest_series)
+                builder, chapter_num, chapter_dir, dest_series, series_rec)
            chapter_results.append(result)
            status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
            print(f"  Chapter {chapter_num}: {status}")
            if result["ok"]:
                already_moved[chapter_num] = {
                    "volume":      _normalise_volume_value(result.get("volume")),
                    "archiveName": Path(result["cbz"]).name,
                }
                _save_chapter_index(dest_series, chapter_index)
        # Sync Kavita persons once per series.
        # Both MAL and AniList IDs come from MangaBaka's source map;
        # AniList is used as fallback when MAL returns no characters/staff.
        person_result: "dict | None" = None
        if self._person_updater:
-            mal_id = (_mal_id_from_metadata(md) if md else None
+            mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
                      or self._mal.find_mal_id(builder_title))
-            al_id  = _al_id_from_metadata(md) if md else None
+            al_id  = ComicInfoBuilder._al_id_from_source(md) if md else None
            if mal_id or al_id:
                try:
                    with series_rec.measure("person_sync"):
                        person_result = self._person_updater.update_for_manga(
                            mal_id, al_manga_id=al_id)
                    print(f"  Persons: chars={person_result['characters'].get('updated')} "
@@ -519,6 +616,7 @@ class SuwayomiMover:
                    person_result = {"error": str(exc)}
                    print(f"  Persons: ERROR {exc}")
        series_rec.finish()
        return {"chapters": chapter_results, "persons": person_result}
    # ------------------------------------------------------------------
@@ -528,7 +626,8 @@ class SuwayomiMover:
                         builder: ComicInfoBuilder,
                         chapter_num: str,
                         chapter_dir: Path,
-                         dest_series: Path) -> dict:
+                         dest_series: Path,
                         series_rec=None) -> dict:
        """
        Generates ComicInfo.xml for one chapter, packs it to CBZ, and
        optionally removes the source folder.
@@ -538,38 +637,76 @@ class SuwayomiMover:
        <Pages> element correctly points to the front cover).
        """
        cbz_path = dest_series / f"{chapter_dir.name}.cbz"
        chap_rec = (series_rec or self._perf.begin_run().begin_series("")
                    ).begin_chapter(chapter_num)
        # add_pages_from_folder records its own sub-steps on this recorder.
        builder.perf = chap_rec
        ok = False
        try:
            builder.chapter = chapter_num
            builder.add_pages_from_folder(chapter_dir, cover_filename="000")
            # Resolving the volume here piggy-backs on caches already warmed
            # by add_pages_from_folder, so it's effectively free.  Used by
            # the chapter index in the Kavita destination folder.
            try:
                with chap_rec.measure("volume"):
                    volume = builder._determine_volume()
            except Exception:
                volume = None
            with chap_rec.measure("save_xml"):
                builder.save_xml(chapter_dir)
            with chap_rec.measure("pack_cbz"):
                _pack_to_cbz(chapter_dir, cbz_path)
            if self._delete_source:
                with chap_rec.measure("delete_source"):
                    shutil.rmtree(chapter_dir)
-            return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True}
+            ok = True
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": True, "volume": volume}
        except Exception as exc:
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": False, "error": str(exc)}
        finally:
            builder.perf = None
            chap_rec.finish(ok=ok)
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
-    SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas"
+    import os
    # Local (no-Docker) smoke test.  Adjust paths to your environment.
    # Set the KAVITA_API_KEY env var — never commit API keys to the repo.
    SUWAYOMI_PATH = r"M:\config\downloads\mangas"
    KAVITA_PATH   = r"\\192.168.2.2\root\ServerData\Kavita\test"
    KAVITA_URL    = "http://192.168.2.2:5000"
-    KAVITA_KEY    = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
+    KAVITA_KEY    = os.environ.get("KAVITA_API_KEY", "")
    # matches.json lives next to this script during local testing.
    MATCHES_PATH  = Path(__file__).resolve().parent.parent / "matches.json"
    matches_cache = MatchesCache(MATCHES_PATH)
    mover = SuwayomiMover(
        SUWAYOMI_PATH,
        KAVITA_PATH,
        kavita_base_url=KAVITA_URL,
        kavita_api_key=KAVITA_KEY,
-        delete_source=False
+        delete_source=False,
        matches_cache=matches_cache,
    )
-    # Process a single series
+    # ---- Option A: build matches.json only (no moves / no Kavita sync) ----
-    result = mover.process_series("Yofukashi no Uta")
+    # data = mover.build_matches_only()
    # matches = data.get("matches", {})
    # print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}")
    # for title, entry in list(matches.items())[:10]:
    #     print(f"  {title!r:50s}  id={entry.get('mangabakaId')}  "
    #           f"name={entry.get('mangabakaName')!r}")
    # ---- Option B: full pipeline for one series (uses the cache too) ----
    result = mover.process_series("Wistoria - Wand and Sword")
    ok     = sum(1 for c in result["chapters"] if c["ok"])
    failed = sum(1 for c in result["chapters"] if not c["ok"])
    print(f"\nDone: {ok} ok, {failed} failed")
Author	SHA1	Message	Date
johannesbot	b6d7f2d0af	time measurement	2026-06-15 11:23:37 +02:00
johannesbot	b0692a6527	time measurement	2026-06-15 11:23:20 +02:00
johannesbot	216771f709	merged ln metadata into manga mover Build and Deploy / build (push) Successful in 59s Details Build and Deploy / deploy (push) Successful in 24s Details	2026-06-14 10:47:47 +02:00
johannesbot	8a44b85a48	cleanup Build and Deploy / build (push) Successful in 23s Details Build and Deploy / deploy (push) Successful in 41s Details Build Release / build (push) Successful in 16s Details	2026-06-11 21:31:20 +02:00
johannesbot	4996026b91	release CI/CD Build and Deploy / build (push) Successful in 17s Details Build and Deploy / deploy (push) Successful in 23s Details Build Release / build (push) Successful in 15s Details	2026-06-11 20:02:06 +02:00
johannesbot	7fbe5f94a5	release CI/CD Build and Deploy / build (push) Successful in 52s Details Build and Deploy / deploy (push) Successful in 36s Details Build Release / build (push) Failing after 8s Details	2026-06-11 19:57:11 +02:00
johannesbot	4557137ad0	feat(updater): add KavitaVolumeCoverUpdater for back-filling null volumes Build and Deploy / build (push) Successful in 22s Details Build and Deploy / deploy (push) Successful in 36s Details Introduce a new background service that periodically re-checks chapters whose volume could not be resolved at move time. - Add KavitaVolumeCoverUpdater.py to resolve null volumes via MangaDex, update ComicInfo.xml in-archive, and swap in MangaBaka volume covers - Wire updater into main.py entry point with UPDATER_ENABLED env flag - Add UPDATER_ENABLED env var to docker-compose.prod.yml - Update CronSchedule.py to schedule updater runs	2026-06-10 13:09:01 +02:00
johannesbot	59ea1f8c8f	added chapter index json Build and Deploy / deploy (push) Successful in 36s Details Build and Deploy / build (push) Successful in 23s Details	2026-06-10 12:30:24 +02:00
johannesbot	d724e9ffcd	LocalizedSeries from kanji to romanji Build and Deploy / build (push) Successful in 26s Details Build and Deploy / deploy (push) Successful in 42s Details	2026-06-10 10:41:04 +02:00
johannesbot	2f30ac4e05	matches double key fix Build and Deploy / build (push) Successful in 26s Details Build and Deploy / deploy (push) Successful in 41s Details	2026-06-06 20:18:11 +02:00
johannesbot	97e4b10ac8	missing cover fix Build and Deploy / build (push) Successful in 23s Details Build and Deploy / deploy (push) Successful in 39s Details	2026-05-30 09:23:58 +02:00
johannesbot	054f974ddc	update btn for webui Build and Deploy / build (push) Successful in 23s Details Build and Deploy / deploy (push) Successful in 38s Details	2026-05-29 08:22:03 +02:00
johannesbot	3288ab9de7	err response Build and Deploy / build (push) Successful in 22s Details Build and Deploy / deploy (push) Successful in 36s Details	2026-05-26 21:18:57 +02:00
johannesbot	12ef254424	ffs Build and Deploy / build (push) Successful in 20s Details Build and Deploy / deploy (push) Successful in 35s Details	2026-05-26 21:13:44 +02:00
johannesbot	76050eeda9	fix Build and Deploy / build (push) Successful in 22s Details Build and Deploy / deploy (push) Successful in 35s Details	2026-05-26 21:12:02 +02:00
johannesbot	7887892737	matches.json changed image from full res to low res Build and Deploy / build (push) Successful in 22s Details Build and Deploy / deploy (push) Successful in 38s Details	2026-05-26 21:09:31 +02:00
johannesbot	79d64d7ed5	WebApp changes	2026-05-26 21:03:37 +02:00