Merge pull request 'Performance and Person Updater Improvements' (#7 ) from time-measurement into master

Reviewed-on: #7
Person Updater overhaul
2026-06-16 18:46:55 +02:00 · 2026-06-16 18:46:17 +02:00 · 2026-06-16 11:37:47 +02:00 · 2026-06-15 11:23:37 +02:00 · 2026-06-15 11:23:20 +02:00 · 2026-06-14 10:47:47 +02:00
32 changed files with 4472 additions and 1006 deletions
@@ -0,0 +1,28 @@
+# Shared
+KAVITA_URL=http://192.168.1.100:5000
+KAVITA_API_KEY=your-api-key-here
+LANGUAGE=en
+TZ=Europe/Berlin
+
+# Manga container (manga-mover-and-metadata-collector)
+HOST_SUWAYOMI_PATH=/path/to/suwayomi/downloads
+HOST_KAVITA_PATH=/path/to/kavita/library
+HOST_MANGA_CONFIG_PATH=/path/to/manga-config
+MANGA_WEB_PORT=8080
+SETTLE_SECONDS=600
+DELETE_SOURCE=true
+# Periodic updaters (volume/cover + global person sync) run together on
+# this cron. Sundays 10:00. Person updater also covers LN libraries.
+UPDATER_ENABLED=true
+UPDATER_SCHEDULE=0 10 * * 0
+COVER_CACHE_PATH=/config/covers
+PERF_PATH=/config/perf_stats.json
+VOLUME_PERF_PATH=/config/volume_perf_stats.json
+PERSON_PERF_PATH=/config/person_perf_stats.json
+
+# Light-novel container (kavita-lightnovel-metadata-fetcher)
+HOST_LN_CONFIG_PATH=/path/to/ln-config
+LN_WEB_PORT=8081
+LN_LIBRARY_IDS=3,5
+LN_UPDATER_ENABLED=true
+
@@ -5,6 +5,11 @@ on:
    branches:
      - master

+env:
+  REGISTRY: gitea.johannesbot.de/johannesbot
+  MANGA_IMAGE: manga-mover-and-metadata-collector
+  LN_IMAGE: kavita-lightnovel-metadata-fetcher
+
 jobs:
  build:
    runs-on: ubuntu-latest
@@ -17,11 +22,16 @@ jobs:
          echo "${{ secrets.REGISTRY_PASSWORD }}" | \
          docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin

-      - name: Build Image
-        run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest .
+      - name: Build Manga Image
+        run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest .

-      - name: Push Image
-        run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
+      - name: Build LN Image
+        run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest .
+
+      - name: Push Images
+        run: |
+          docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest
+          docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest

  deploy:
    needs: build
@@ -37,7 +47,7 @@ jobs:
          username: ${{ secrets.SSH_USER }}
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
-          script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+          script: mkdir -p /home/${{ secrets.SSH_USER }}/kavita-metadata-collector

      - name: Copy docker-compose via SCP
        uses: appleboy/scp-action@v0.1.7
@@ -47,7 +57,7 @@ jobs:
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
          source: "docker-compose.prod.yml"
-          target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
+          target: "/home/${{ secrets.SSH_USER }}/kavita-metadata-collector"

      - name: Deploy via SSH
        uses: appleboy/ssh-action@v1.0.3
@@ -57,7 +67,7 @@ jobs:
          password: ${{ secrets.SSH_PASSWORD }}
          port: ${{ secrets.SSH_PORT || 22 }}
          script: |
-            cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
+            cd /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
            mv docker-compose.prod.yml docker-compose.yml
            echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
            sudo docker compose pull
@@ -5,6 +5,11 @@ on:
    tags:
      - 'v*'

+env:
+  REGISTRY: gitea.johannesbot.de/johannesbot
+  MANGA_IMAGE: manga-mover-and-metadata-collector
+  LN_IMAGE: kavita-lightnovel-metadata-fetcher
+
 jobs:
  build:
    runs-on: ubuntu-latest
@@ -21,8 +26,13 @@ jobs:
        id: tag
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"

-      - name: Build Image
-        run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }} .
+      - name: Build Manga Image
+        run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} .

-      - name: Push Image
-        run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }}
+      - name: Build LN Image
+        run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
+
+      - name: Push Images
+        run: |
+          docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }}
+          docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }}
@@ -1,8 +1,18 @@
+# One Dockerfile, two images: the build arg APP selects the entry point.
+#
+#   docker build --build-arg APP=manga -t .../manga-mover-and-metadata-collector .
+#   docker build --build-arg APP=ln    -t .../kavita-lightnovel-metadata-fetcher .
+#
+# Both variants share src/; the variant-specific code lives in
+# src/manga/ resp. src/ln/ and is selected by the entry point.
+
 FROM python:3.12-slim

+ARG APP=manga
+
 WORKDIR /app

-# System deps for Pillow (image dimensions); kept minimal.
+# System deps for Pillow (image dimensions, manga variant); kept minimal.
 RUN apt-get update \
 && apt-get install -y --no-install-recommends \
        libjpeg62-turbo \
@@ -11,15 +21,17 @@ RUN apt-get update \
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

-COPY src/     /app/src/
-COPY main.py  /app/main.py
+COPY src/          /app/src/
+COPY main_manga.py main_ln.py /app/

 ENV PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1
+    PYTHONDONTWRITEBYTECODE=1 \
+    APP_VARIANT=${APP}

-# Mount points used by main.py defaults
-VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
+# /config is used by both variants; the manga variant additionally mounts
+# /mnt/suwayomi and /mnt/kavita (see docker-compose.prod.yml).
+VOLUME ["/config"]

 EXPOSE 8080

-CMD ["python", "/app/main.py"]
+CMD python /app/main_${APP_VARIANT}.py
@@ -1,5 +1,8 @@
 services:
-  manga-mover:
+  # ------------------------------------------------------------------
+  # Manga: Suwayomi -> Kavita mover + metadata enrichment
+  # ------------------------------------------------------------------
+  manga-mover-and-metadata-collector:
    image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
    container_name: manga-mover-and-metadata-collector
    restart: unless-stopped
@@ -9,19 +12,46 @@ services:
      LANGUAGE:       "${LANGUAGE:-en}"
      SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
      DELETE_SOURCE:  "${DELETE_SOURCE:-true}"
-      MATCH_PATH:     "${MATCH_PATH:-/config/matches.json}"
-      WEB_PORT:       "${WEB_PORT:-8080}"
-      # Volume/cover back-fill updater
+      MATCH_PATH:     "/config/matches.json"
+      # Periodic updaters (volume/cover back-fill + global person sync) run
+      # together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
      UPDATER_ENABLED:  "${UPDATER_ENABLED:-true}"
-      # Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
-      # (local time, see TZ)
-      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
-      UPDATER_LOG:      "${UPDATER_LOG:-/config/volume_updater.log}"
-      # Timezone for the cron schedule — without this 19:00 means 19:00 UTC
+      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
+      UPDATER_LOG:      "/config/volume_updater.log"
+      # Persistent cover cache (empty = temp dir, deleted on container stop)
+      COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
+      # Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
+      PERF_PATH:        "${PERF_PATH:-/config/perf_stats.json}"
+      VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
+      PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
+      # Timezone for the cron schedule — without this 10:00 means 10:00 UTC
      TZ:               "${TZ:-Europe/Berlin}"
    ports:
-      - "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
+      - "${MANGA_WEB_PORT:-8080}:8080"
    volumes:
      - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
      - "${HOST_KAVITA_PATH}:/mnt/kavita"
-      - "${HOST_CONFIG_PATH}:/config"
+      - "${HOST_MANGA_CONFIG_PATH}:/config"
+
+  # ------------------------------------------------------------------
+  # Light novels: Kavita metadata fetcher (HTTP only, no file mover)
+  # ------------------------------------------------------------------
+  kavita-lightnovel-metadata-fetcher:
+    image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest
+    container_name: kavita-lightnovel-metadata-fetcher
+    restart: unless-stopped
+    environment:
+      KAVITA_URL:     "${KAVITA_URL}"
+      KAVITA_API_KEY: "${KAVITA_API_KEY}"
+      LIBRARY_IDS:    "${LN_LIBRARY_IDS}"
+      LANGUAGE:       "${LANGUAGE:-en}"
+      MATCH_PATH:     "/config/matches.json"
+      # Global person sync on cron (same default cadence as the manga side)
+      UPDATER_ENABLED:  "${LN_UPDATER_ENABLED:-true}"
+      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
+      PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
+      TZ:             "${TZ:-Europe/Berlin}"
+    ports:
+      - "${LN_WEB_PORT:-8081}:8080"
+    volumes:
+      - "${HOST_LN_CONFIG_PATH}:/config"
@@ -1,155 +0,0 @@
-"""
-main.py
-=======
-
-Container entry point.  Watches the mounted Suwayomi download directory
-and, after a quiet period, triggers SuwayomiMover (which also runs the
-Kavita person sync for every processed series).
-
-Mount points (Docker)
---------------------
-  /mnt/suwayomi   -> Suwayomi downloads (read/write, sources deleted)
-  /mnt/kavita     -> Kavita library      (read/write, CBZs written here)
-
-Environment variables
---------------------
-  Required:
-    KAVITA_URL          base URL of the Kavita server, e.g. http://kavita:5000
-    KAVITA_API_KEY      Kavita API key (Settings → User → API key)
-
-  Optional:
-    SUWAYOMI_PATH       default /mnt/suwayomi
-    KAVITA_PATH         default /mnt/kavita
-    LANGUAGE            default en
-    SETTLE_SECONDS      default 600   (10-minute quiet window)
-    REQUEST_TIMEOUT     default 30
-    DELETE_SOURCE       default true  (delete source folders after pack)
-    MATCH_PATH          default /config/matches.json
-    WEB_PORT            default 8080  (Flask web UI for matches.json)
-    WEB_HOST            default 0.0.0.0
-    UPDATER_ENABLED     default true  (volume/cover back-fill cron)
-    UPDATER_SCHEDULE    cron expression for the updater scans,
-                        default "0 19 * * 1,4" = 19:00 every Mon + Thu
-                        (local time — set TZ inside the container!)
-    UPDATER_LOG         default /config/volume_updater.log
-"""
-
-from __future__ import annotations
-
-import os
-import signal
-import sys
-from pathlib import Path
-
-# Make src/ importable when running as `python main.py`.
-sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
-
-from src.SuwayomiMover import SuwayomiMover                       # noqa: E402
-from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher       # noqa: E402
-from src.MatchesCache import MatchesCache                          # noqa: E402
-from src.MatchesWebApp import MatchesWebApp                        # noqa: E402
-from src.KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater  # noqa: E402
-
-
-def _env_str(name: str, default: "str | None" = None,
-             required: bool = False) -> "str | None":
-    value = os.environ.get(name, default)
-    if required and not value:
-        print(f"[main] missing required env var: {name}", flush=True)
-        sys.exit(2)
-    return value
-
-
-def _env_int(name: str, default: int) -> int:
-    raw = os.environ.get(name)
-    if raw is None or raw == "":
-        return default
-    try:
-        return int(raw)
-    except ValueError:
-        print(f"[main] {name}={raw!r} is not a valid integer; "
-              f"falling back to {default}", flush=True)
-        return default
-
-
-def _env_bool(name: str, default: bool) -> bool:
-    raw = os.environ.get(name)
-    if raw is None:
-        return default
-    return raw.strip().lower() in ("1", "true", "yes", "y", "on")
-
-
-def main() -> int:
-    suwayomi_path   = _env_str("SUWAYOMI_PATH", r"/mnt/suwayomi")
-    kavita_path     = _env_str("KAVITA_PATH",   "/mnt/kavita")
-    kavita_url      = _env_str("KAVITA_URL",     "http://kavita:5000")
-    kavita_api_key  = _env_str("KAVITA_API_KEY",  "")
-    language        = _env_str("LANGUAGE", "en") or "en"
-    settle_seconds  = _env_int("SETTLE_SECONDS",  600)
-    request_timeout = _env_int("REQUEST_TIMEOUT",  30)
-    delete_source   = _env_bool("DELETE_SOURCE",  True)
-    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
-    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
-    web_port        = _env_int("WEB_PORT", 8080)
-    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
-    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
-    updater_log      = _env_str("UPDATER_LOG", "/config/volume_updater.log")
-
-    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
-    print(f"[main] kavita    = {kavita_path}",    flush=True)
-    print(f"[main] kavita url= {kavita_url}",     flush=True)
-    print(f"[main] settle    = {settle_seconds}s", flush=True)
-    print(f"[main] language  = {language}",       flush=True)
-    print(f"[main] delete src= {delete_source}",  flush=True)
-    print(f"[main] match path= {match_path}",     flush=True)
-    print(f"[main] web       = {web_host}:{web_port}", flush=True)
-
-    matches_cache = MatchesCache(match_path)
-
-    mover = SuwayomiMover(
-        suwayomi_path, kavita_path,
-        kavita_base_url=kavita_url,
-        kavita_api_key=kavita_api_key,
-        language=language,
-        request_timeout=request_timeout,
-        delete_source=delete_source,
-        matches_cache=matches_cache,
-    )
-
-    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
-
-    web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
-    web_app.start()
-
-    if updater_enabled:
-        try:
-            updater = KavitaVolumeCoverUpdater(
-                kavita_path,
-                matches_cache=matches_cache,
-                language=language,
-                request_timeout=request_timeout,
-                log_path=updater_log,
-                schedule=updater_schedule,
-            )
-            updater.start()
-        except ValueError as exc:
-            # Invalid cron expression — keep the service up, just without
-            # the updater, and make the config error obvious in the logs.
-            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
-                  f"volume/cover updater DISABLED", flush=True)
-
-    # def shutdown(signum, _frame):
-    #     print(f"[main] received signal {signum}", flush=True)
-    #     watcher.stop()
-    #
-    # signal.signal(signal.SIGTERM, shutdown)
-    # signal.signal(signal.SIGINT,  shutdown)
-    #
-    # watcher.start()
-    # watcher.wait()   # blocks until stop() is called via a signal
-    web_app.wait()     # keep process alive while the watcher is disabled
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -0,0 +1,162 @@
+"""
+main_ln.py
+==========
+
+Container entry point for the **light-novel** variant (Kavita metadata
+fetcher).  The manga variant has its own entry point (main_manga.py);
+both share the modules in src/ and add their variant-specific code from
+src/ln/ resp. src/manga/.
+
+Reads configuration from environment variables, starts the orchestrator
+and exposes the Flask WebApp on WEB_HOST:WEB_PORT.  Everything happens
+through HTTP — there is no folder watcher and no file mover (Kavita is
+the source of truth for the library content; this service only writes
+metadata back to it).
+
+Environment variables
+---------------------
+  Required:
+    KAVITA_URL          base URL of the Kavita server, e.g. http://kavita:5000
+    KAVITA_API_KEY      Kavita API key (Settings -> User -> API key)
+
+  Optional:
+    LIBRARY_IDS         comma-separated default library ids (e.g. "3,5").
+                        Empty = user picks in the WebUI each time.
+    LANGUAGE            default "en"
+    REQUEST_TIMEOUT     default 30
+    MATCH_PATH          default /config/matches.json
+    WEB_PORT            default 8080
+    WEB_HOST            default 0.0.0.0
+    UPDATER_ENABLED     default true  (run the person updater on cron)
+    UPDATER_SCHEDULE    cron expression for the person updater,
+                        default "0 10 * * 0" = Sundays 10:00
+                        (local time — set TZ inside the container!)
+    PERSON_PERF_PATH    JSON file for person updater timing.
+                        Default /config/person_perf_stats.json
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+# Shared code in src/, LN-specific code in src/ln/.  Modules are imported
+# by their plain names so src-internal imports resolve to the same module
+# objects (a `src.X` import would load everything twice).
+_BASE = Path(__file__).resolve().parent
+sys.path.insert(0, str(_BASE / "src"))
+sys.path.insert(0, str(_BASE / "src" / "ln"))
+
+from MatchesCache import MatchesCache                       # noqa: E402
+from LightNovelOrchestrator import LightNovelOrchestrator   # noqa: E402
+from MatchesWebApp import MatchesWebApp                     # noqa: E402
+from PerfStats import PerfStats                             # noqa: E402
+from CronRunner import CronRunner                           # noqa: E402
+
+
+def _env_bool(name: str, default: bool) -> bool:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    return raw.strip().lower() in ("1", "true", "yes", "y", "on")
+
+
+def _env_str(name: str, default: "str | None" = None,
+             required: bool = False) -> "str | None":
+    value = os.environ.get(name, default)
+    if required and not value:
+        print(f"[main] missing required env var: {name}", flush=True)
+        sys.exit(2)
+    return value
+
+
+def _env_int(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None or raw == "":
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        print(f"[main] {name}={raw!r} is not a valid integer; "
+              f"falling back to {default}", flush=True)
+        return default
+
+
+def _env_int_list(name: str) -> list[int]:
+    raw = os.environ.get(name) or ""
+    out: list[int] = []
+    for part in raw.split(","):
+        part = part.strip()
+        if not part:
+            continue
+        try:
+            out.append(int(part))
+        except ValueError:
+            print(f"[main] {name}: ignoring non-integer value {part!r}",
+                  flush=True)
+    return out
+
+
+def main() -> int:
+    kavita_url      = _env_str("KAVITA_URL",     required=True)
+    kavita_api_key  = _env_str("KAVITA_API_KEY", required=True)
+    language        = _env_str("LANGUAGE", "en") or "en"
+    request_timeout = _env_int("REQUEST_TIMEOUT", 30)
+    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
+    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
+    web_port        = _env_int("WEB_PORT", 8080)
+    library_ids     = _env_int_list("LIBRARY_IDS")
+    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
+    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
+    person_perf_path = _env_str("PERSON_PERF_PATH",
+                                "/config/person_perf_stats.json") or None
+
+    print(f"[main] kavita url    = {kavita_url}",     flush=True)
+    print(f"[main] language      = {language}",       flush=True)
+    print(f"[main] match path    = {match_path}",     flush=True)
+    print(f"[main] libraries     = {library_ids or '(picked in WebUI)'}",
+          flush=True)
+    print(f"[main] web           = {web_host}:{web_port}", flush=True)
+
+    cache = MatchesCache(match_path)
+    person_perf = PerfStats(person_perf_path)
+    orchestrator = LightNovelOrchestrator(
+        kavita_url=kavita_url,
+        kavita_api_key=kavita_api_key,
+        matches_cache=cache,
+        language=language,
+        request_timeout=request_timeout,
+    )
+
+    app = MatchesWebApp(
+        cache, orchestrator=orchestrator,
+        default_library_ids=library_ids,
+        person_perf=person_perf,
+        host=web_host, port=web_port,
+    )
+    app.start()
+
+    if updater_enabled:
+        try:
+            CronRunner(
+                updater_schedule,
+                lambda: orchestrator.sync_persons(trigger="cron",
+                                                  perf=person_perf),
+                name="person-updater").start()
+        except ValueError as exc:
+            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
+                  f"scheduled person sync DISABLED", flush=True)
+
+    app.wait()
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,199 @@
+"""
+main_manga.py
+=============
+
+Container entry point for the **manga** variant (Suwayomi -> Kavita mover
+plus metadata enrichment).  The light-novel variant has its own entry
+point (main_ln.py); both share the modules in src/ and add their
+variant-specific code from src/manga/ resp. src/ln/.
+
+Mount points (Docker)
+---------------------
+  /mnt/suwayomi   -> Suwayomi downloads (read/write, sources deleted)
+  /mnt/kavita     -> Kavita library      (read/write, CBZs written here)
+
+Environment variables
+---------------------
+  Required:
+    KAVITA_URL          base URL of the Kavita server, e.g. http://kavita:5000
+    KAVITA_API_KEY      Kavita API key (Settings -> User -> API key)
+
+  Optional:
+    SUWAYOMI_PATH       default /mnt/suwayomi
+    KAVITA_PATH         default /mnt/kavita
+    LANGUAGE            default en
+    SETTLE_SECONDS      default 600   (10-minute quiet window)
+    REQUEST_TIMEOUT     default 30
+    DELETE_SOURCE       default true  (delete source folders after pack)
+    MATCH_PATH          default /config/matches.json
+    WEB_PORT            default 8080  (Flask web UI for matches.json)
+    WEB_HOST            default 0.0.0.0
+    UPDATER_ENABLED     default true  (run volume/cover + person updaters on cron)
+    UPDATER_SCHEDULE    cron expression for the periodic updaters,
+                        default "0 10 * * 0" = Sundays 10:00
+                        (local time — set TZ inside the container!)
+    UPDATER_LOG         default /config/volume_updater.log
+    COVER_CACHE_PATH    directory for the persistent cover cache;
+                        empty (default) = temporary cache, deleted on exit
+    PERF_PATH           JSON file for per-step move timing stats.
+                        Default /config/perf_stats.json (empty disables it)
+    VOLUME_PERF_PATH    JSON file for volume/cover updater timing.
+                        Default /config/volume_perf_stats.json
+    PERSON_PERF_PATH    JSON file for person updater timing.
+                        Default /config/person_perf_stats.json
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+# Shared code in src/, manga-specific code in src/manga/.  Modules are
+# imported by their plain names so src-internal imports resolve to the
+# same module objects (a `src.X` import would load everything twice).
+_BASE = Path(__file__).resolve().parent
+sys.path.insert(0, str(_BASE / "src"))
+sys.path.insert(0, str(_BASE / "src" / "manga"))
+
+from SuwayomiMover import SuwayomiMover                        # noqa: E402
+from SuwayomiFolderWatcher import SuwayomiFolderWatcher        # noqa: E402,F401
+from MatchesCache import MatchesCache                          # noqa: E402
+from MatchesWebApp import MatchesWebApp                        # noqa: E402
+from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater  # noqa: E402
+from KavitaClient import KavitaClient                          # noqa: E402
+from KavitaPersonUpdater import KavitaPersonUpdater            # noqa: E402
+from PerfStats import PerfStats                                # noqa: E402
+from CronRunner import CronRunner                              # noqa: E402
+
+
+def _env_str(name: str, default: "str | None" = None,
+             required: bool = False) -> "str | None":
+    value = os.environ.get(name, default)
+    if required and not value:
+        print(f"[main] missing required env var: {name}", flush=True)
+        sys.exit(2)
+    return value
+
+
+def _env_int(name: str, default: int) -> int:
+    raw = os.environ.get(name)
+    if raw is None or raw == "":
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        print(f"[main] {name}={raw!r} is not a valid integer; "
+              f"falling back to {default}", flush=True)
+        return default
+
+
+def _env_bool(name: str, default: bool) -> bool:
+    raw = os.environ.get(name)
+    if raw is None:
+        return default
+    return raw.strip().lower() in ("1", "true", "yes", "y", "on")
+
+
+def main() -> int:
+    suwayomi_path   = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi")
+    kavita_path     = _env_str("KAVITA_PATH",   "/mnt/kavita")
+    kavita_url      = _env_str("KAVITA_URL",     "http://kavita:5000")
+    kavita_api_key  = _env_str("KAVITA_API_KEY",  "")
+    language        = _env_str("LANGUAGE", "en") or "en"
+    settle_seconds  = _env_int("SETTLE_SECONDS",  600)
+    request_timeout = _env_int("REQUEST_TIMEOUT",  30)
+    delete_source   = _env_bool("DELETE_SOURCE",  True)
+    match_path      = _env_str("MATCH_PATH", "/config/matches.json")
+    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
+    web_port        = _env_int("WEB_PORT", 8080)
+    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
+    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
+    updater_log      = _env_str("UPDATER_LOG", "/config/volume_updater.log")
+    cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
+    perf_path        = _env_str("PERF_PATH", "/config/perf_stats.json") or None
+    volume_perf_path = _env_str("VOLUME_PERF_PATH",
+                                "/config/volume_perf_stats.json") or None
+    person_perf_path = _env_str("PERSON_PERF_PATH",
+                                "/config/person_perf_stats.json") or None
+
+    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
+    print(f"[main] kavita    = {kavita_path}",    flush=True)
+    print(f"[main] kavita url= {kavita_url}",     flush=True)
+    print(f"[main] settle    = {settle_seconds}s", flush=True)
+    print(f"[main] language  = {language}",       flush=True)
+    print(f"[main] delete src= {delete_source}",  flush=True)
+    print(f"[main] match path= {match_path}",     flush=True)
+    print(f"[main] web       = {web_host}:{web_port}", flush=True)
+
+    matches_cache = MatchesCache(match_path)
+    perf_move   = PerfStats(perf_path)
+    perf_volume = PerfStats(volume_perf_path)
+    perf_person = PerfStats(person_perf_path)
+
+    mover = SuwayomiMover(
+        suwayomi_path, kavita_path,
+        language=language,
+        request_timeout=request_timeout,
+        delete_source=delete_source,
+        matches_cache=matches_cache,
+        cover_cache_dir=cover_cache_path,
+        perf_stats=perf_move,
+    )
+
+    # Standalone, global, id-based person updater (manga + LN libraries).
+    person_updater = None
+    if kavita_api_key:
+        kavita_client = KavitaClient(kavita_url, kavita_api_key,
+                                     request_timeout=request_timeout)
+        person_updater = KavitaPersonUpdater(kavita_client)
+
+    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
+
+    web_app = MatchesWebApp(
+        matches_cache, mover=mover,
+        person_updater=person_updater, person_trigger="web",
+        perf_stats={"move": perf_move, "volume": perf_volume,
+                    "person": perf_person},
+        host=web_host, port=web_port)
+    web_app.start()
+
+    if updater_enabled:
+        updater = KavitaVolumeCoverUpdater(
+            kavita_path,
+            matches_cache=matches_cache,
+            language=language,
+            request_timeout=request_timeout,
+            log_path=updater_log,
+            cover_cache_dir=cover_cache_path,
+            perf_stats=perf_volume,
+        )
+
+        def _scheduled_job():
+            updater.update_all()
+            if person_updater is not None:
+                person_updater.update_all_persons(trigger="cron",
+                                                  perf=perf_person)
+
+        try:
+            CronRunner(updater_schedule, _scheduled_job,
+                       name="updaters").start()
+        except ValueError as exc:
+            # Invalid cron expression — keep the service up, just without
+            # the scheduled updaters, and surface the config error.
+            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
+                  f"scheduled updaters DISABLED", flush=True)
+
+    # watcher.start()
+    # watcher.wait()   # blocks until stop() is called via a signal
+    web_app.wait()     # keep process alive while the watcher is disabled
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -2,3 +2,4 @@ requests>=2.31
 Pillow>=10.0
 watchdog>=4.0
 Flask>=3.0
+python-dotenv>=1.0
@@ -32,27 +32,35 @@ Dependencies
 from __future__ import annotations

 import datetime
-import difflib
 import time

 import requests

 from MediaResolver import MediaResolver
+from TextUtils import best_similarity


 # --------------------------------------------------------------------------
 # GraphQL query strings
 # --------------------------------------------------------------------------
-_SEARCH_MANGA = """
+# AniList models both manga and light novels as type MANGA; the format
+# clause decides which of the two a search returns.  The placeholder is
+# substituted at construction time (see `media_format`).
+_SEARCH_MANGA_TEMPLATE = """
 query ($search: String) {
  Page(page: 1, perPage: 5) {
-    media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
+    media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
      id title { romaji english native } siteUrl
    }
  }
 }
 """

+_FORMAT_CLAUSES = {
+    "manga": "format_not_in: [NOVEL]",
+    "novel": "format_in: [NOVEL]",
+}
+
 _MANGA_STATS = """
 query ($id: Int) {
  Media(id: $id, type: MANGA) {
@@ -131,10 +139,24 @@ class AniListResolver(MediaResolver):
            cls._instance._initialized = False
        return cls._instance

-    def __init__(self, *, request_timeout: int = 30):
+    def __init__(self, *, request_timeout: int = 30,
+                 media_format: str = "manga"):
+        """
+        media_format : "manga" (excludes novels) or "novel" (novels only).
+                       Only the FIRST construction in the process sets it
+                       (singleton); construct the resolver with the correct
+                       format in the entry point / orchestrator.
+        """
        if self._initialized:
            return

+        if media_format not in _FORMAT_CLAUSES:
+            raise ValueError(f"media_format must be one of "
+                             f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
+        self.media_format = media_format
+        self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
+            "__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
+
        self.request_timeout = request_timeout

        self._session = requests.Session()
@@ -178,7 +200,7 @@ class AniListResolver(MediaResolver):
            return self._id_cache[key]

        try:
-            data = self._gql(_SEARCH_MANGA, {"search": title})
+            data = self._gql(self._search_query, {"search": title})
            results = ((data.get("data") or {})
                       .get("Page", {})
                       .get("media") or [])
@@ -469,18 +491,11 @@ class AniListResolver(MediaResolver):
 def _score_title(query: str, entry: dict) -> float:
    """Returns the best title-similarity score for an AniList media entry."""
    title_obj = entry.get("title") or {}
-    candidates = [
-        title_obj.get("romaji") or "",
-        title_obj.get("english") or "",
-        title_obj.get("native") or "",
-    ]
-    best = 0.0
-    q = query.lower()
-    for t in candidates:
-        if t:
-            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
-            best = max(best, ratio)
-    return best
+    return best_similarity(query, (
+        title_obj.get("romaji"),
+        title_obj.get("english"),
+        title_obj.get("native"),
+    ))


 # --------------------------------------------------------------------------
@@ -0,0 +1,148 @@
+"""
+cover_cache.py
+==============
+
+Disk-backed cache for downloaded cover images, keyed by URL.
+
+Why
+---
+The mover packs every chapter of a series individually, and each chapter
+needs a cover image.  Without caching, the same multi-megabyte cover is
+downloaded once per chapter (20-chapter volume = 20 identical downloads).
+This cache turns that into a single download per unique URL.
+
+Persistence
+-----------
+* ``cache_dir`` given     -> covers persist across runs in that directory.
+* ``cache_dir`` omitted   -> a temporary directory is used and removed
+                             automatically when the process exits.
+
+Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
+from the URL / Content-Type at download time so it can be reused when
+writing the cover into a chapter folder.
+
+Thread safety: downloads are serialised per cache instance, so concurrent
+mover / updater threads never fetch the same URL twice.
+
+Dependencies
+------------
+    requests    ->  pip install requests
+"""
+
+from __future__ import annotations
+
+import atexit
+import hashlib
+import shutil
+import tempfile
+import threading
+from pathlib import Path
+
+import requests
+
+
+_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
+
+
+def _guess_extension(url: str, content_type: str) -> str:
+    """Derives an image file extension from a URL or HTTP Content-Type."""
+    url_ext = Path(url.split("?")[0]).suffix.lower()
+    if url_ext in _IMAGE_EXTS:
+        return url_ext
+    ct = (content_type or "").lower()
+    if "png"  in ct: return ".png"
+    if "webp" in ct: return ".webp"
+    if "gif"  in ct: return ".gif"
+    return ".jpg"
+
+
+class CoverCache:
+    """
+    URL-keyed image cache on disk.
+
+    Parameters
+    ----------
+    cache_dir       : Directory for cached covers.  None -> temporary
+                      directory, deleted automatically at process exit.
+    session         : Optional shared requests.Session for downloads.
+    request_timeout : HTTP timeout in seconds.
+    """
+
+    def __init__(self, cache_dir=None, *,
+                 session: "requests.Session | None" = None,
+                 request_timeout: int = 30):
+        self._persistent = cache_dir is not None
+        if self._persistent:
+            self._dir = Path(cache_dir)
+            self._dir.mkdir(parents=True, exist_ok=True)
+        else:
+            self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
+            atexit.register(self.close)
+
+        self._session = session or requests.Session()
+        self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
+        self._timeout = request_timeout
+        self._lock = threading.Lock()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def get(self, url: str) -> "tuple[bytes, str] | None":
+        """
+        Returns ``(image_bytes, extension)`` for the URL — from cache when
+        present, downloading (and caching) otherwise.  Returns None when
+        the URL is empty or the download fails.
+        """
+        if not url:
+            return None
+
+        with self._lock:
+            cached = self._find_cached(url)
+            if cached is not None:
+                try:
+                    return cached.read_bytes(), cached.suffix
+                except OSError:
+                    pass  # unreadable cache file -> re-download
+
+            return self._download(url)
+
+    def clear(self) -> None:
+        """Removes all cached covers (the directory itself is kept)."""
+        with self._lock:
+            for f in self._dir.glob("*"):
+                if f.is_file():
+                    f.unlink(missing_ok=True)
+
+    def close(self) -> None:
+        """Deletes the cache directory when it is non-persistent."""
+        if not self._persistent:
+            shutil.rmtree(self._dir, ignore_errors=True)
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _key(url: str) -> str:
+        return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
+
+    def _find_cached(self, url: str) -> "Path | None":
+        matches = list(self._dir.glob(self._key(url) + ".*"))
+        return matches[0] if matches else None
+
+    def _download(self, url: str) -> "tuple[bytes, str] | None":
+        try:
+            resp = self._session.get(url, timeout=self._timeout)
+            resp.raise_for_status()
+        except requests.RequestException:
+            return None
+
+        ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
+
+        target = self._dir / f"{self._key(url)}{ext}"
+        try:
+            tmp = target.with_suffix(target.suffix + ".tmp")
+            tmp.write_bytes(resp.content)
+            tmp.replace(target)
+        except OSError:
+            pass  # cache write failure is non-fatal — still return the bytes
+        return resp.content, ext
@@ -0,0 +1,87 @@
+"""
+cron_runner.py
+==============
+
+Runs a single callable on a cron schedule on a background thread.
+
+Decouples *what* runs from *when*: both the manga container (volume/cover
+updater + person updater) and the LN container (person updater) schedule
+their work through this one helper, using a shared ``CronSchedule`` for the
+``next_after`` arithmetic.
+
+Usage::
+
+    runner = CronRunner("0 10 * * 0", job=my_callable)   # Sundays 10:00
+    runner.start()
+    ...
+    runner.stop()
+
+When the schedule string is invalid, the CronSchedule constructor raises
+ValueError — the caller decides whether to disable the runner or fall back.
+The schedule is evaluated in local time (set TZ inside the container).
+"""
+
+from __future__ import annotations
+
+import threading
+from datetime import datetime
+
+from CronSchedule import CronSchedule
+
+
+def _now() -> str:
+    return datetime.now().isoformat(timespec="seconds")
+
+
+class CronRunner:
+    """
+    Fires ``job()`` whenever the cron ``schedule`` elapses.
+
+    Parameters
+    ----------
+    schedule : 5-field cron expression (see CronSchedule).
+    job      : Zero-arg callable invoked on each scheduled tick.  Exceptions
+               are caught and logged so a failing run does not kill the loop.
+    name     : Thread name (for logs).
+    """
+
+    def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
+        self._cron = CronSchedule(schedule)
+        self._job = job
+        self._name = name
+        self._stop = threading.Event()
+        self._thread: "threading.Thread | None" = None
+
+    def start(self) -> None:
+        """Starts the scheduling thread.  Non-blocking."""
+        if self._thread is not None and self._thread.is_alive():
+            return
+        self._stop.clear()
+        self._thread = threading.Thread(
+            target=self._loop, name=self._name, daemon=True)
+        self._thread.start()
+        print(f"[{_now()}] [{self._name}] scheduled on "
+              f"cron '{self._cron.expression}'", flush=True)
+
+    def stop(self) -> None:
+        """Signals the loop to stop (a job already running finishes first)."""
+        self._stop.set()
+        if self._thread is not None:
+            self._thread.join(timeout=10)
+
+    def wait(self) -> None:
+        """Blocks the calling thread until stop() is invoked."""
+        self._stop.wait()
+
+    def _loop(self) -> None:
+        while not self._stop.is_set():
+            next_run = self._cron.next_after(datetime.now())
+            wait = max(0.0, (next_run - datetime.now()).total_seconds())
+            print(f"[{_now()}] [{self._name}] next run: "
+                  f"{next_run.isoformat(timespec='minutes')}", flush=True)
+            if self._stop.wait(wait):
+                break
+            try:
+                self._job()
+            except Exception as exc:
+                print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
@@ -0,0 +1,297 @@
+"""
+kavita_client.py
+================
+
+Thin HTTP client for the Kavita server REST API (v0.9.x).
+
+Authenticates via the ``x-api-key`` header.  All series / library /
+collection / metadata reads and writes used by the light-novel updater
+go through this single client so request shaping (paging, content types,
+timeouts, retries) is consistent.
+
+The class is intentionally state-light: no caching layer, just one
+``requests.Session``.  Higher-level diff / update logic lives in
+KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync.
+"""
+
+from __future__ import annotations
+
+import base64
+from typing import Iterable
+
+import requests
+
+
+class KavitaClient:
+    def __init__(self, base_url: str, api_key: str, *,
+                 request_timeout: int = 30):
+        self._base = base_url.rstrip("/")
+        self._timeout = request_timeout
+
+        # API session: sends + receives JSON.
+        self._session = requests.Session()
+        self._session.headers.update({
+            "x-api-key":    api_key,
+            "Accept":       "application/json",
+            "Content-Type": "application/json",
+        })
+
+        # Plain session for downloading external images (covers).  Must NOT
+        # carry the API headers — some CDNs refuse to return image bytes
+        # when the client sends Accept: application/json.
+        self._image_session = requests.Session()
+        self._image_session.headers.update({
+            "User-Agent": "KavitaLightNovelUpdater/1.0",
+        })
+
+    # ------------------------------------------------------------------
+    # Libraries
+    # ------------------------------------------------------------------
+    def list_libraries(self) -> list[dict]:
+        """Returns all libraries the authenticated user can access."""
+        r = self._session.get(f"{self._base}/api/Library/libraries",
+                              timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or []
+
+    # ------------------------------------------------------------------
+    # Series
+    # ------------------------------------------------------------------
+    def list_series_in_library(self, library_id: int, *,
+                               page_size: int = 200) -> list[dict]:
+        """
+        Returns all SeriesDto entries in the given library.
+
+        Uses POST /api/Series/all-v2 with a FilterV2 that scopes by
+        library id.  Pages through until an empty page is returned.
+        """
+        results: list[dict] = []
+        page = 1
+        while True:
+            body = {
+                "statements": [
+                    {
+                        "comparison": 0,     # Equal
+                        "field": 19,         # Libraries field id (Kavita v0.9.x)
+                        "value": str(library_id),
+                    }
+                ],
+                "combination": 1,           # And
+                "sortOptions": {"isAscending": True, "sortField": 1},
+                "limitTo": 0,
+            }
+            r = self._session.post(
+                f"{self._base}/api/Series/all-v2",
+                params={"PageNumber": page, "PageSize": page_size},
+                json=body, timeout=self._timeout)
+            r.raise_for_status()
+            chunk = r.json() or []
+            if not chunk:
+                break
+            results.extend(chunk)
+            if len(chunk) < page_size:
+                break
+            page += 1
+        return results
+
+    def get_series(self, series_id: int) -> dict:
+        """Returns the SeriesDto for the given series id."""
+        r = self._session.get(f"{self._base}/api/Series/{series_id}",
+                              timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or {}
+
+    def update_series(self, series: dict) -> None:
+        """Updates the Series-level data (name, sortName, malId, …)."""
+        r = self._session.post(f"{self._base}/api/Series/update",
+                               json=series, timeout=self._timeout)
+        r.raise_for_status()
+
+    # ------------------------------------------------------------------
+    # Series metadata
+    # ------------------------------------------------------------------
+    def get_series_metadata(self, series_id: int) -> dict:
+        """Returns the SeriesMetadataDto for a series."""
+        r = self._session.get(
+            f"{self._base}/api/Series/metadata",
+            params={"seriesId": series_id}, timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or {}
+
+    def update_series_metadata(self, metadata: dict) -> None:
+        """
+        Writes a SeriesMetadataDto back to Kavita.
+
+        Kavita expects the payload wrapped: {seriesMetadata: {...}}.
+        """
+        r = self._session.post(
+            f"{self._base}/api/Series/metadata",
+            json={"seriesMetadata": metadata},
+            timeout=self._timeout)
+        r.raise_for_status()
+
+    # ------------------------------------------------------------------
+    # Related series
+    # ------------------------------------------------------------------
+    def get_related(self, series_id: int) -> dict:
+        """Returns all related series grouped by relation type."""
+        r = self._session.get(
+            f"{self._base}/api/Series/all-related",
+            params={"seriesId": series_id}, timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or {}
+
+    def update_related(self, payload: dict) -> None:
+        """
+        Sets the related-series relationships for a series.
+
+        Payload shape (UpdateRelatedSeriesDto):
+            {seriesId, prequels, sequels, sideStories, spinOffs,
+             adaptations, characters, contains, others,
+             alternativeSettings, alternativeVersions, doujinshis,
+             editions, annuals}
+        Each *_ids list contains target series ids (ints).
+        """
+        r = self._session.post(
+            f"{self._base}/api/Series/update-related",
+            json=payload, timeout=self._timeout)
+        r.raise_for_status()
+
+    # ------------------------------------------------------------------
+    # Collections
+    # ------------------------------------------------------------------
+    def list_collections(self) -> list[dict]:
+        """Returns all collection tags visible to the authenticated user."""
+        r = self._session.get(
+            f"{self._base}/api/Collection",
+            params={"ownedOnly": "false", "sortByLastModified": "false"},
+            timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or []
+
+    def add_series_to_collection(self, *, collection_id: int,
+                                 title: str,
+                                 series_ids: Iterable[int]) -> dict:
+        """
+        Adds (or creates) a collection and attaches series to it.
+
+        Pass collection_id=0 to create a new collection named `title`.
+        For an existing collection set collection_id to its id (title is
+        still required by the API but acts as no-op when the id matches).
+        """
+        body = {
+            "collectionTagId":    int(collection_id),
+            "collectionTagTitle": title,
+            "seriesIds":          [int(s) for s in series_ids],
+        }
+        r = self._session.post(
+            f"{self._base}/api/Collection/update-for-series",
+            json=body, timeout=self._timeout)
+        r.raise_for_status()
+        try:
+            return r.json() or {}
+        except ValueError:
+            return {}
+
+    # ------------------------------------------------------------------
+    # Persons
+    # ------------------------------------------------------------------
+    def search_persons(self, name: str) -> list[dict]:
+        """Returns PersonDto entries matching `name` (Kavita's own search)."""
+        r = self._session.get(
+            f"{self._base}/api/Person/search",
+            params={"queryString": name}, timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or []
+
+    def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
+        """
+        Returns every PersonDto in the instance.
+
+        Pages through POST /api/Person/all (the browse endpoint) with an
+        empty filter until an empty page is returned — same paging pattern
+        as list_series_in_library.
+        """
+        results: list[dict] = []
+        page = 1
+        while True:
+            r = self._session.post(
+                f"{self._base}/api/Person/all",
+                params={"PageNumber": page, "PageSize": page_size},
+                json={}, timeout=self._timeout)
+            r.raise_for_status()
+            chunk = r.json() or []
+            if not chunk:
+                break
+            results.extend(chunk)
+            if len(chunk) < page_size:
+                break
+            page += 1
+        return results
+
+    def update_person(self, payload: dict) -> None:
+        """Writes a person record (malId, aniListId, description, …)."""
+        r = self._session.post(f"{self._base}/api/Person/update",
+                               json=payload, timeout=self._timeout)
+        r.raise_for_status()
+
+    # ------------------------------------------------------------------
+    # Cover uploads
+    # ------------------------------------------------------------------
+    def upload_series_cover(self, series_id: int, image_url: str, *,
+                            lock: bool = False) -> None:
+        """Downloads an external image and uploads it as the series cover."""
+        self._upload_cover("/api/Upload/series", series_id, image_url, lock)
+
+    def upload_person_cover(self, person_id: int, image_url: str, *,
+                            lock: bool = False) -> None:
+        """Downloads an external image and uploads it as a person cover."""
+        self._upload_cover("/api/Upload/person", person_id, image_url, lock)
+
+    def _upload_cover(self, endpoint: str, entity_id: int,
+                      image_url: str, lock: bool) -> None:
+        """
+        Shared cover-upload path.  Kavita's upload endpoints accept a raw
+        base64 blob (no ``data:`` prefix) in the ``url`` field — a data
+        URI or the two-step upload-by-url flow are rejected with HTTP 400
+        (verified against Kavita 0.9.0.2).
+        """
+        img = self._image_session.get(image_url, timeout=self._timeout)
+        img.raise_for_status()
+        b64 = base64.b64encode(img.content).decode()
+        r = self._session.post(
+            f"{self._base}{endpoint}",
+            json={"id": entity_id, "url": b64, "lockCover": lock},
+            timeout=self._timeout)
+        if r.status_code >= 400:
+            # Include the body excerpt — Kavita's upload errors carry the
+            # actual reason there, not in the status line.
+            raise requests.HTTPError(
+                f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
+                response=r)
+
+    # ------------------------------------------------------------------
+    # Generic GET helper (used by callers that need a response object)
+    # ------------------------------------------------------------------
+    def get(self, path: str, params: "dict | None" = None) -> requests.Response:
+        return self._session.get(f"{self._base}{path}",
+                                 params=params, timeout=self._timeout)
+
+    def post(self, path: str, *,
+             json: "dict | list | None" = None,
+             params: "dict | None" = None) -> requests.Response:
+        return self._session.post(f"{self._base}{path}",
+                                  json=json, params=params,
+                                  timeout=self._timeout)
+
+
+def _short_body(resp: requests.Response, limit: int = 400) -> str:
+    """Returns the response body trimmed to `limit` chars for error messages."""
+    try:
+        text = resp.text or ""
+    except Exception:
+        return "<unreadable response body>"
+    text = text.strip().replace("\n", " ").replace("\r", " ")
+    if len(text) > limit:
+        text = text[:limit] + "…"
+    return text or "<empty body>"
@@ -2,544 +2,236 @@
 kavita_person_updater.py
 ========================

-Synchronises Kavita person / character records with MyAnimeList data.
+Synchronises Kavita character person-records with MyAnimeList / AniList data.

-For every character and staff member that MAL knows about for a given manga
-the updater:
-  1. Searches Kavita for a matching Person record (by name similarity /
-     alias match, configurable threshold).
-  2. Sets the MAL ID on the Kavita person if it is not yet linked.
-  3. Uploads the MAL profile image when the cover is not locked and has
-     not been set in a previous sync run.
-  4. Populates the description field when Kavita has none and MAL provides
-     an 'about' text (requires an extra Jikan request per character; only
-     performed when update_descriptions=True).
+Global, id-based mode
+---------------------
+Kavita person-records are created with a disambiguated name carrying the
+tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
+ComicInfo <Characters>; light novels: written by the metadata builder).
+``update_all_persons`` walks **every** person in the Kavita instance, reads
+that id from the name, looks the character up on MAL / AniList by id, and
+writes back:
+
+  * the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
+  * a description (when the record has none),
+  * the profile image (when not locked and not already set).
+
+Persons whose name carries no id (authors / staff, which are not
+disambiguated) are skipped.  A record already linked to a *different*
+tracker id than its name says is reported as a conflict and left untouched.
+
+This mode is format-independent (it only does id lookups, never title
+searches) so a single pass covers both the manga and light-novel libraries.
+
+All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
+(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).

-Kavita API version
------------------
 Tested against Kavita 0.9.0.2.
-
-Authentication
--------------
-Uses the `x-api-key` header (API key from Kavita user settings).
-No JWT login is required.
-
-Relevant endpoints (Kavita 0.9.0.2)
-------------------------------------
-  GET  /api/Person/search       find persons by name / alias
-  POST /api/Person/update       write metadata (malId, description, …)
-  POST /api/Upload/person       set cover image (base64 data URI)
-  POST /api/Upload/upload-by-url  download an external URL to temp storage
-                                  (used as an alternative upload path)
-
-Cover upload flow
-----------------
-The image is downloaded locally, base64-encoded, and sent as a data URI
-to POST /api/Upload/person.  This is more reliable than the
-upload-by-url → upload/person two-step because it avoids Kavita's temp
-file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
-
-Dependencies
------------
-    requests    ->  pip install requests
 """

 from __future__ import annotations

-import base64
-import datetime
-import difflib
-import re
-
 import requests

+from KavitaClient import KavitaClient
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
+from PerfStats import PerfStats
+from TextUtils import paragraphs_to_html, parse_person_tracker_id


 class KavitaPersonUpdater:
    """
-    Syncs Kavita Person records with MyAnimeList data.
+    Syncs Kavita character person-records with MAL / AniList data, keyed by
+    the tracker id embedded in each person's name.

    Parameters
    ----------
-    kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
-    api_key         : Kavita API key (Settings → User → API key)
-    mal_resolver    : Shared MALResolver singleton (created automatically if omitted)
-    request_timeout : HTTP timeout in seconds for both Kavita and image requests
-    min_name_score  : Minimum difflib similarity ratio (0–1) required to accept a
-                      Kavita person as a match for a MAL name.  Default 0.80.
+    client       : Shared KavitaClient (session, auth, cover uploads).
+    mal_resolver : Shared MALResolver singleton (created if omitted).
+    al_resolver  : Shared AniListResolver singleton (created if omitted).
    """

-    def __init__(self, kavita_base_url: str, api_key: str, *,
+    def __init__(self, client: KavitaClient, *,
                 mal_resolver: "MALResolver | None" = None,
-                 al_resolver: "AniListResolver | None" = None,
-                 request_timeout: int = 30,
-                 min_name_score: float = 0.80):
-        self._base = kavita_base_url.rstrip("/")
-        self._timeout = request_timeout
-        self._min_score = min_name_score
+                 al_resolver: "AniListResolver | None" = None):
+        self._client = client
        self._mal = mal_resolver or MALResolver()
        self._al  = al_resolver  or AniListResolver()

-        # Session used for Kavita API calls.
-        self._session = requests.Session()
-        self._session.headers.update({
-            "x-api-key": api_key,
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        })
-
-        # Plain session used to download external images (MAL CDN etc.).
-        # Must NOT carry the Kavita API headers — Accept: application/json
-        # would prevent MAL CDN from returning the image bytes.
-        self._image_session = requests.Session()
-        self._image_session.headers.update({
-            "User-Agent": "KavitaPersonUpdater/1.0",
-        })
-
-        # Cache: normalised name -> list of PersonDto dicts (best matches first)
-        self._person_search_cache: dict[str, list[dict]] = {}
-
    # ------------------------------------------------------------------
-    # Public: combined update
+    # Public: global person sync
    # ------------------------------------------------------------------
-    def update_for_manga(self, mal_manga_id: "int | None", *,
-                         al_manga_id: "int | None" = None,
-                         update_covers: bool = True,
-                         update_descriptions: bool = True) -> dict:
+    def update_all_persons(self, *,
+                           trigger: str = "cron",
+                           perf: "PerfStats | None" = None,
+                           update_covers: bool = True,
+                           update_descriptions: bool = True) -> dict:
        """
-        Runs a full update pass for both characters and staff of the manga.
-        MAL is tried first; AniList is used as fallback when MAL returns nothing.
+        Walks every Kavita person, syncing the ones whose name carries a
+        tracker character id.

-        Returns
-        -------
-        {
-          "characters": {"updated": n, "skipped": n, "not_found": n},
-          "staff":      {"updated": n, "skipped": n, "not_found": n},
-        }
+        Parameters
+        ----------
+        trigger : Source that started this run ("cron" | "web" | "ln") —
+                  recorded in the perf-stats run meta.
+        perf    : Optional PerfStats for per-person step timing.
+
+        Returns {"trigger", "updated", "skipped", "not_found",
+                 "conflicts", "errors"}.
        """
-        return {
-            "characters": self.update_characters(
-                mal_manga_id, al_manga_id=al_manga_id,
-                update_covers=update_covers,
-                update_descriptions=update_descriptions),
-            "staff": self.update_staff(
-                mal_manga_id, al_manga_id=al_manga_id,
-                update_covers=update_covers,
-                update_descriptions=update_descriptions),
-        }
+        perf = perf or PerfStats(None)
+        run = perf.begin_run(meta={"trigger": trigger})
+        result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
+                        "not_found": 0, "conflicts": 0, "errors": []}

-    # ------------------------------------------------------------------
-    # Public: character update
-    # ------------------------------------------------------------------
-    def update_characters(self, mal_manga_id: "int | None", *,
-                          al_manga_id: "int | None" = None,
-                          update_covers: bool = True,
-                          update_descriptions: bool = True) -> dict:
-        """
-        Updates Kavita persons that match MAL/AniList characters for the manga.
-        MAL is tried first; AniList is the fallback when MAL returns nothing.
+        try:
+            persons = self._client.list_all_persons()
+        except requests.RequestException as exc:
+            result["errors"].append(f"list persons failed: {exc}")
+            run.finish()
+            return result

-        Returns {"updated": n, "skipped": n, "not_found": n}.
-        """
-        entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
-        resolver = self._mal
-        if not entries and al_manga_id:
-            entries = self._al.get_characters_detailed(al_manga_id)
-            resolver = self._al
-        return self._sync_entries(entries, "character", resolver,
-                                  update_covers=update_covers,
-                                  update_descriptions=update_descriptions)
-
-    # ------------------------------------------------------------------
-    # Public: staff update
-    # ------------------------------------------------------------------
-    def update_staff(self, mal_manga_id: "int | None", *,
-                     al_manga_id: "int | None" = None,
-                     update_covers: bool = True,
-                     update_descriptions: bool = True) -> dict:
-        """
-        Updates Kavita persons that match MAL/AniList staff for the manga.
-        MAL is tried first; AniList is the fallback when MAL returns nothing.
-
-        Returns {"updated": n, "skipped": n, "not_found": n}.
-        """
-        entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
-        resolver = self._mal
-        if not entries and al_manga_id:
-            entries = self._al.get_staff_detailed(al_manga_id)
-            resolver = self._al
-        return self._sync_entries(entries, "staff", resolver,
-                                  update_covers=update_covers,
-                                  update_descriptions=update_descriptions)
-
-    # ------------------------------------------------------------------
-    # Public: cache management
-    # ------------------------------------------------------------------
-    def clear_cache(self) -> None:
-        """Clears the Kavita person search cache."""
-        self._person_search_cache.clear()
-
-    # ------------------------------------------------------------------
-    # Internal: main sync loop
-    # ------------------------------------------------------------------
-    def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
-                      update_covers: bool,
-                      update_descriptions: bool) -> dict:
-        result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
-                        "errors": []}
-        for entry in entries:
-            name = (entry.get("name") or "").strip()
-            raw_name = (entry.get("raw_name") or "").strip()
-            if not name and not raw_name:
+        for person in persons:
+            name = (person.get("name") or "").strip()
+            parsed = parse_person_tracker_id(name)
+            if not parsed:
+                result["skipped"] += 1          # author/staff or un-tagged
                continue

-            # Search by the cleaned (XML-safe) name first; if Kavita stores
-            # the legacy comma form, retry with the raw MAL name.
-            matches = self._find_kavita_person(name) if name else []
-            if not matches and raw_name and raw_name != name:
-                matches = self._find_kavita_person(raw_name)
-
-            if not matches:
-                result["not_found"] += 1
-                continue
-
-            changed = self._apply_mal_data(
-                matches[0], entry, kind, resolver,
-                update_cover=update_covers,
-                update_desc=update_descriptions,
-                errors=result["errors"])
-            result["updated" if changed else "skipped"] += 1
+            source, tracker_id = parsed
+            item = run.begin_item(name)
+            ok = True
+            try:
+                category = self._apply_to_person(
+                    person, source, tracker_id, item,
+                    update_cover=update_covers,
+                    update_desc=update_descriptions,
+                    errors=result["errors"])
+                result[category] += 1
+                ok = category != "conflicts"
+            except Exception as exc:
+                result["errors"].append(f"{name}: {exc}")
+                ok = False
+            finally:
+                item.finish(ok=ok)

+        run.finish()
+        print(f"[persons] trigger={trigger} updated={result['updated']} "
+              f"skipped={result['skipped']} not_found={result['not_found']} "
+              f"conflicts={result['conflicts']} errors={len(result['errors'])}",
+              flush=True)
        return result

    # ------------------------------------------------------------------
-    # Internal: Kavita person search
+    # Internal: apply tracker data to one person
    # ------------------------------------------------------------------
-    def _find_kavita_person(self, name: str) -> list[dict]:
+    def _apply_to_person(self, person: dict, source: str, tracker_id: int,
+                         item, *, update_cover: bool, update_desc: bool,
+                         errors: list) -> str:
        """
-        Searches Kavita for persons matching `name`.
-
-        Checks both the main name and any stored aliases.
-        Returns persons sorted by similarity, filtered by min_name_score.
-        Results are cached per (normalised) query name.
+        Applies MAL/AniList character data to one Kavita person.
+        Returns the result category: "updated" | "skipped" | "not_found"
+        | "conflicts".
        """
-        key = name.lower().strip()
-        if key in self._person_search_cache:
-            return self._person_search_cache[key]
-
-        try:
-            resp = self._session.get(
-                f"{self._base}/api/Person/search",
-                params={"queryString": name},
-                timeout=self._timeout,
-            )
-            resp.raise_for_status()
-            persons: list[dict] = resp.json() or []
-        except requests.RequestException:
-            self._person_search_cache[key] = []
-            return []
-
-        def score(p: dict) -> float:
-            candidates = [p.get("name") or ""]
-            candidates += [a for a in (p.get("aliases") or []) if a]
-            best = 0.0
-            q = key
-            for c in candidates:
-                r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
-                best = max(best, r)
-            return best
-
-        ranked = sorted(persons, key=score, reverse=True)
-        filtered = [p for p in ranked if score(p) >= self._min_score]
-        self._person_search_cache[key] = filtered
-        return filtered
-
-    # ------------------------------------------------------------------
-    # Internal: apply MAL data to a single Kavita person
-    # ------------------------------------------------------------------
-    def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
-                        resolver, *,
-                        update_cover: bool, update_desc: bool,
-                        errors: "list | None" = None) -> bool:
-        """
-        Applies tracker data (MAL or AniList) to one Kavita person record.
-
-        Fields updated
-        --------------
-        - malId      : set when the entry carries a MAL ID and it differs
-        - aniListId  : set when the entry carries an AniList ID and it differs
-        - description: set when empty and the tracker provides a description
-        - cover image: uploaded when not locked and no prior sync cover exists
-
-        Returns True if any change was made.  Failures are appended to the
-        `errors` list (if provided) instead of being silently swallowed.
-        """
-        person_id: "int | None" = person.get("id")
+        person_id = person.get("id")
        if not person_id:
-            return False
+            return "skipped"

-        person_name = person.get("name") or ""
+        resolver = self._mal if source == "mal" else self._al
+        id_field = "malId" if source == "mal" else "aniListId"
+        current = person.get(id_field) or 0

-        # Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
-        mal_id: "int | None" = mal_entry.get("mal_id")
-        al_id:  "int | None" = mal_entry.get("al_id")
-        entity_id = mal_id or al_id   # used for resolver detail calls
+        # The name is authoritative; a record linked to a different id is a
+        # data conflict — never overwrite it.
+        if current and current != tracker_id:
+            errors.append(
+                f"conflict: '{person.get('name')}' (#{person_id}) has "
+                f"{id_field}={current} but name says {tracker_id} — skipped")
+            return "conflicts"

-        current_mal_id: int = person.get("malId") or 0
-        current_al_id:  int = person.get("aniListId") or 0
-        needs_mal_id = bool(mal_id and current_mal_id != mal_id)
-        needs_al_id  = bool(al_id  and current_al_id  != al_id)
+        with item.measure("detail_fetch"):
+            details = resolver.get_character_details(tracker_id)
+        if not details:
+            return "not_found"

-        # ------ Lazy description fetch -----------------------------------
-        description: "str | None" = None
+        need_id = not current                    # write id when still missing
+        description = None
        if update_desc and not (person.get("description") or "").strip():
-            if entity_id:
-                if kind == "character":
-                    details = resolver.get_character_details(entity_id)
-                    if details:
-                        description = _build_character_description(details) or None
-                else:
-                    details = resolver.get_person_details(entity_id)
-                    if details:
-                        description = _build_person_description(details) or None
+            description = _build_character_description(details) or None
+        need_desc = bool(description)

-        needs_desc = bool(description)
-
-        # ------ Metadata update ------------------------------------------
        changed = False
-        if needs_mal_id or needs_al_id or needs_desc:
-            payload: dict = {
+        if need_id or need_desc:
+            payload = {
                "id":               person_id,
-                "name":             person_name,
-                # MUST stay a boolean — the cover image itself is uploaded
-                # separately via POST /api/Upload/person (below).  Putting a
-                # URL here makes Kavita reject the whole payload with HTTP 400.
+                "name":             person.get("name") or "",
+                # MUST stay a boolean — the cover is uploaded separately.
                "coverImageLocked": bool(person.get("coverImageLocked", False)),
                "aliases":          person.get("aliases") or [],
                "description":      description or person.get("description"),
-                "malId":    mal_id if needs_mal_id else (current_mal_id or None),
-                "aniListId": al_id if needs_al_id  else (current_al_id  or None),
+                "malId":     tracker_id if source == "mal"
+                             else (person.get("malId") or None),
+                "aniListId": tracker_id if source == "al"
+                             else (person.get("aniListId") or None),
            }
            try:
-                resp = self._session.post(
-                    f"{self._base}/api/Person/update",
-                    json=payload,
-                    timeout=self._timeout,
-                )
-                resp.raise_for_status()
+                with item.measure("person_update"):
+                    self._client.update_person(payload)
                changed = True
-            except requests.RequestException as e:
-                if errors is not None:
-                    errors.append(
-                        f"Person/update failed for #{person_id} "
-                        f"'{person_name}': {e}")
+            except requests.RequestException as exc:
+                errors.append(f"update failed #{person_id} "
+                              f"'{person.get('name')}': {exc}")

-        # ------ Cover image upload ----------------------------------------
-        # Upload whenever:
-        #   - caller requested cover updates
-        #   - cover is NOT locked (user did not manually pin it)
-        #   - we have not already uploaded this exact tracker entity's image
-        #     (i.e. the tracked ID differs OR there is no cover yet).
+        # Cover: upload when not locked and not already set for this id.
        if update_cover and not person.get("coverImageLocked"):
-            image_url = mal_entry.get("image_url")
-            already_uploaded = (
-                entity_id is not None
-                and (current_mal_id == mal_id or current_al_id == al_id)
-                and bool(person.get("coverImage"))
-            )
-            if image_url and not already_uploaded:
-                if self._upload_cover(person_id, image_url,
-                                      person_name=person_name,
-                                      errors=errors):
+            image_url = details.get("image_url")
+            already = bool(current) and bool(person.get("coverImage"))
+            if image_url and not already:
+                try:
+                    with item.measure("cover_upload"):
+                        self._client.upload_person_cover(person_id, image_url)
                    changed = True
+                except requests.RequestException as exc:
+                    errors.append(f"cover upload failed #{person_id} "
+                                  f"'{person.get('name')}': {exc}")

-        return changed
-
-    # ------------------------------------------------------------------
-    # Internal: cover upload
-    # ------------------------------------------------------------------
-    def _upload_cover(self, person_id: int, image_url: str,
-                      lock: bool = False, *,
-                      person_name: str = "",
-                      errors: "list | None" = None) -> bool:
-        """
-        Uploads a cover image to a Kavita person.
-
-        The image is downloaded with the plain (header-less) image session
-        and posted to `POST /api/Upload/person` as a raw base64 string in
-        the `url` field.
-
-        Notes on protocol quirks discovered against Kavita 0.9.0.2:
-          - The two-step `upload-by-url` -> `Upload/person` flow returns
-            "Unable to save cover image to Person" (HTTP 400).
-          - A `data:image/jpeg;base64,...` data URI is rejected with the
-            same error.
-          - Only the raw base64 blob (no prefix) is accepted.
-        """
-        label = (f"#{person_id} '{person_name}'"
-                 if person_name else f"#{person_id}")
-
-        # 1) Download the image with a clean session — the Kavita session's
-        #    `Accept: application/json` header makes some CDNs refuse to
-        #    return image bytes.
-        try:
-            img_resp = self._image_session.get(image_url,
-                                               timeout=self._timeout)
-            img_resp.raise_for_status()
-        except requests.RequestException as e:
-            if errors is not None:
-                errors.append(
-                    f"image download failed for {label} ({image_url}): {e}")
-            return False
-
-        b64 = base64.b64encode(img_resp.content).decode()
-
-        # 2) POST the raw base64 blob.
-        try:
-            resp = self._session.post(
-                f"{self._base}/api/Upload/person",
-                json={"id": person_id, "url": b64, "lockCover": lock},
-                timeout=self._timeout,
-            )
-            if resp.status_code >= 400:
-                if errors is not None:
-                    errors.append(
-                        f"Upload/person HTTP {resp.status_code} for {label}: "
-                        f"{_short_body(resp)}")
-                return False
-            return True
-        except requests.RequestException as e:
-            if errors is not None:
-                errors.append(
-                    f"Upload/person failed for {label}: {e}")
-            return False
+        return "updated" if changed else "skipped"


 # --------------------------------------------------------------------------
-# Module helpers: description builders
+# Module helper: character description builder
 # --------------------------------------------------------------------------
-def _plain_to_html(text: str) -> str:
-    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
-    if not text:
-        return ""
-    parts: list[str] = []
-    for para in re.split(r"\n{2,}", text.strip()):
-        para = para.strip()
-        if para:
-            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
-    return "".join(parts)
-
-
-def _format_birthday(birthday: str) -> str:
-    """Converts an ISO 8601 birthday string to "D Month YYYY"."""
-    if not birthday:
-        return ""
-    try:
-        dt = datetime.date.fromisoformat(birthday.split("T")[0])
-        return f"{dt.day} {dt.strftime('%B %Y')}"
-    except (ValueError, AttributeError):
-        return ""
-
-
 def _build_character_description(details: dict) -> str:
    """
-    Builds a Kavita-safe HTML description for a MAL character.
+    Builds a Kavita-safe HTML description for a MAL / AniList character.

-    Top line: "Favorites: N" as a link to the character's MAL page.
+    Top line: "Favorites: N" linked to the character page (when available).
    Remainder: the character's `about` text converted to HTML paragraphs.
    """
    parts: list[str] = []
    url = details.get("url") or ""
    favorites = details.get("favorites")
    if url and favorites is not None:
-        parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
+        parts.append(f'<p><a href="{url}" target="_blank">'
+                     f'Favorites: {favorites:,}</a></p>')
    about = (details.get("about") or "").strip()
    if about:
-        parts.append(_plain_to_html(about))
+        parts.append(paragraphs_to_html(about))
    return "<br>".join(parts)


-def _build_person_description(details: dict) -> str:
-    """
-    Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
-
-    Renders a summary table (given name, family name, birthday, website,
-    member favorites) followed by the `about` biography as HTML paragraphs.
-    """
-    _TD = 'style="padding-right:1.5em"'
-    rows: list[str] = []
-
-    given = (details.get("given_name") or "").strip()
-    family = (details.get("family_name") or "").strip()
-    birthday = details.get("birthday") or ""
-    favorites = details.get("favorites")
-    website = (details.get("website_url") or "").strip()
-    url = (details.get("url") or "").strip()
-
-    if given:
-        rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
-    if family:
-        rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
-    bday_str = _format_birthday(birthday)
-    if bday_str:
-        rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
-    if website:
-        rows.append(
-            f'<tr><td {_TD}>Website</td>'
-            f'<td><a href="{website}">{website}</a></td></tr>'
-        )
-    if favorites is not None:
-        fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
-                    else f"{favorites:,}")
-        rows.append(
-            f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
-
-    parts: list[str] = []
-    if rows:
-        parts.append(f'<table>{"".join(rows)}</table>')
-    about = (details.get("about") or "").strip()
-    if about:
-        parts.append(_plain_to_html(about))
-    return "<br>".join(parts)
-
-
-# --------------------------------------------------------------------------
-# Module helper
-# --------------------------------------------------------------------------
-def _short_body(resp: requests.Response, limit: int = 400) -> str:
-    """Returns the response body trimmed to `limit` chars for error logging."""
-    try:
-        text = resp.text or ""
-    except Exception:
-        return "<unreadable response body>"
-    text = text.strip().replace("\n", " ").replace("\r", " ")
-    if len(text) > limit:
-        text = text[:limit] + "…"
-    return text or "<empty body>"
-
-
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
-    KAVITA_URL = "http://192.168.2.2:5000"
-    KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
+    import os

-    updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
-
-    mal = MALResolver()
-    mal_id = mal.find_mal_id("よふかしのうた")
-    print("MAL ID:", mal_id)
-
-    if mal_id:
-        result = updater.update_for_manga(mal_id)
-        print("Characters:", {k: v for k, v in result["characters"].items()
-                              if k != "errors"})
-        print("Staff     :", {k: v for k, v in result["staff"].items()
-                              if k != "errors"})
-        # Surface any non-fatal upload / API errors for debugging
-        for section in ("characters", "staff"):
-            for err in result[section].get("errors", []):
-                print(f"[{section}] {err}")
+    client = KavitaClient(os.environ["KAVITA_URL"],
+                          os.environ["KAVITA_API_KEY"])
+    updater = KavitaPersonUpdater(client)
+    report = updater.update_all_persons(trigger="cron")
+    print(report)
+    for err in report["errors"]:
+        print("  ", err)
@@ -30,12 +30,12 @@ Dependencies
 from __future__ import annotations

 import datetime
-import difflib
 import time

 import requests

 from MediaResolver import MediaResolver
+from TextUtils import best_similarity


 class MALResolver(MediaResolver):
@@ -57,12 +57,21 @@ class MALResolver(MediaResolver):
            cls._instance._initialized = False
        return cls._instance

-    def __init__(self, *, request_timeout: int = 30):
+    def __init__(self, *, request_timeout: int = 30,
+                 search_type: str = "manga"):
+        """
+        search_type : Jikan `type` filter for title searches — "manga" for
+                      the manga container, "lightnovel" for the LN container.
+                      Only the FIRST construction in the process sets it
+                      (singleton); construct the resolver with the correct
+                      type in the entry point / orchestrator.
+        """
        if self._initialized:
            return

        self.JIKAN_BASE = "https://api.jikan.moe/v4"
        self.request_timeout = request_timeout
+        self.search_type = search_type

        self._session = requests.Session()
        self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
@@ -106,7 +115,7 @@ class MALResolver(MediaResolver):

        try:
            data = self._get(f"{self.JIKAN_BASE}/manga",
-                             {"q": title, "limit": 5, "type": "manga"})
+                             {"q": title, "limit": 5, "type": self.search_type})
            results = data.get("data") or []
        except requests.RequestException:
            return None
@@ -404,19 +413,12 @@ def _clean_mal_name(name: str) -> str:
 def _score_title(query: str, entry: dict) -> float:
    """Returns the best title-similarity score for a Jikan manga entry."""
    candidates = [
-        entry.get("title") or "",
-        entry.get("title_english") or "",
-        entry.get("title_japanese") or "",
+        entry.get("title"),
+        entry.get("title_english"),
+        entry.get("title_japanese"),
    ]
-    for alt in (entry.get("titles") or []):
-        candidates.append(alt.get("title") or "")
-    best = 0.0
-    q = query.lower()
-    for t in candidates:
-        if t:
-            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
-            best = max(best, ratio)
-    return best
+    candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
+    return best_similarity(query, candidates)


 # --------------------------------------------------------------------------
@@ -119,26 +119,18 @@ class MangaBakaWorksResolver:
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
-    def get_works(self, series_id: str) -> list[dict]:
+    def _fetch_all_pages(self, endpoint: str) -> list[dict]:
        """
-        Returns volume-level works for a series, filtered to those that have
-        a usable cover image.  Results are cached per series.
-
-        Pages through the API (limit=50) until the response returns an empty
-        page, collecting all works before applying the cover filter.
+        Pages through a MangaBaka list endpoint (limit=50 per page) and
+        returns all collected `data` items.  Network errors end the
+        pagination early; items fetched so far are returned.
        """
-        if not series_id:
-            return []
-
-        if series_id in self._cache:
-            return self._cache[series_id]
-
-        all_works: list[dict] = []
+        items: list[dict] = []
        page = 1
        try:
            while True:
                resp = self._session.get(
-                    f"{self.api_base_url}/series/{series_id}/works",
+                    f"{self.api_base_url}/series/{endpoint}",
                    params={"limit": 50, "page": page},
                    timeout=self.request_timeout,
                )
@@ -146,13 +138,31 @@ class MangaBakaWorksResolver:
                page_data = resp.json().get("data") or []
                if not page_data:
                    break
-                all_works.extend(page_data)
+                items.extend(page_data)
                if len(page_data) < 50:
                    break
                page += 1
        except requests.RequestException:
-            if not all_works:
-                return []
+            pass
+        return items
+
+    def get_works(self, series_id: str) -> list[dict]:
+        """
+        Returns volume-level works for a series, filtered to those that have
+        a usable cover image.
+
+        Results are cached per series — including empty results, so a series
+        without works is not re-paginated for every chapter of a move run.
+        The periodic cover updater calls clear_cache() before each scan, so
+        works added on MangaBaka later are still picked up there.
+        """
+        if not series_id:
+            return []
+
+        if series_id in self._cache:
+            return self._cache[series_id]
+
+        all_works = self._fetch_all_pages(f"{series_id}/works")

        # Discard works that carry no usable cover
        works_with_cover = [w for w in all_works if w.get("images")]
@@ -190,25 +200,7 @@ class MangaBakaWorksResolver:
        if series_id in self._images_cache:
            return self._images_cache[series_id]

-        raw_items: list[dict] = []
-        page = 1
-        try:
-            while True:
-                resp = self._session.get(
-                    f"{self.api_base_url}/series/{series_id}/images",
-                    params={"limit": 50, "page": page},
-                    timeout=self.request_timeout,
-                )
-                resp.raise_for_status()
-                page_data = resp.json().get("data") or []
-                if not page_data:
-                    break
-                raw_items.extend(page_data)
-                if len(page_data) < 50:
-                    break
-                page += 1
-        except requests.RequestException:
-            pass
+        raw_items = self._fetch_all_pages(f"{series_id}/images")

        # Group by normalised volume index; collect all languages per volume.
        by_volume: dict[str, dict[str, str]] = {}  # norm_vol -> {lang: url}
@@ -236,6 +228,9 @@ class MangaBakaWorksResolver:
            if url:
                result[norm] = url

+        # Cache even an empty result so a series without volume images is not
+        # re-paginated for every chapter.  The periodic cover updater clears
+        # this cache before each scan, so newly added images are still found.
        self._images_cache[series_id] = result
        return result

@@ -0,0 +1,254 @@
+"""
+perf_stats.py
+=============
+
+Generic run/step performance profiler with JSON persistence, shared by the
+move pipeline and the periodic updaters (volume/cover, persons).
+
+Each run is a tree of *items* (e.g. series -> chapter, or one person) and
+every item carries named *step* timings.  A run also carries free-form
+``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
+updater).
+
+Data model (one entry per run, newest first)::
+
+    {
+      "runs": [
+        {
+          "runId":        "…",
+          "startedAt":    1700000000,
+          "finishedAt":   1700000123,
+          "totalSeconds": 123.4,
+          "meta":         {"trigger": "cron"},
+          "itemCount":    2,              # top-level items
+          "leafCount":    31,            # items without children
+          "stepTotals":   {"cover": 41.2, "image_dimensions": 55.8, ...},
+          "items": [
+            {"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
+             "steps": {"fetch_metadata": 1.2},
+             "items": [
+               {"label": "1", "totalSeconds": 11.5, "ok": true,
+                "steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
+             ]}
+          ]
+        }
+      ]
+    }
+
+Usage::
+
+    perf = PerfStats(path)                       # path=None -> disabled
+    run = perf.begin_run(meta={"trigger": "cron"})
+    item = run.begin_item("Call of the Night")
+    with item.measure("fetch_metadata"):
+        ...
+    chap = item.begin_item("1")
+    with chap.measure("pack_cbz"):
+        ...
+    chap.finish()
+    item.finish()                                # flushes the run to disk
+    run.finish()
+
+When ``path`` is None every recorder is a no-op and nothing is written, so
+the profiler can be left permanently wired in at negligible cost.  The run
+is flushed after every top-level item finishes, so a long run is observable
+live and survives a crash mid-run.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+import time
+import uuid
+from contextlib import contextmanager
+from pathlib import Path
+
+
+# Keep the JSON small: only the most recent runs are retained on disk.
+_MAX_RUNS = 30
+
+
+class _StepTimer:
+    """
+    Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
+    wall-clock lifetime.  ``enabled=False`` turns every method into a no-op.
+    """
+
+    def __init__(self, enabled: bool = True):
+        self.steps: dict[str, float] = {}
+        self._enabled = enabled
+        self._t0 = time.monotonic()
+
+    @contextmanager
+    def measure(self, name: str):
+        """Context manager timing a named step (accumulates on repeat use)."""
+        if not self._enabled:
+            yield
+            return
+        start = time.monotonic()
+        try:
+            yield
+        finally:
+            self.steps[name] = round(
+                self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
+
+    def elapsed(self) -> float:
+        return round(time.monotonic() - self._t0, 4)
+
+
+class ItemRecorder(_StepTimer):
+    """
+    One node in a run's item tree.  Has its own step timings and may contain
+    nested child items (e.g. a series item containing chapter items).
+    """
+
+    def __init__(self, run: "RunRecorder", label: str, *,
+                 parent: "ItemRecorder | None" = None,
+                 enabled: bool = True):
+        super().__init__(enabled)
+        self._run = run
+        self._label = label
+        self._parent = parent
+        self._children: list[dict] = []
+
+    def begin_item(self, label: str) -> "ItemRecorder":
+        return ItemRecorder(self._run, label, parent=self,
+                            enabled=self._enabled)
+
+    def finish(self, *, ok: bool = True) -> None:
+        if not self._enabled:
+            return
+        node = {
+            "label":        self._label,
+            "totalSeconds": self.elapsed(),
+            "ok":           ok,
+            "steps":        self.steps,
+            "items":        self._children,
+        }
+        if self._parent is not None:
+            self._parent._children.append(node)
+        else:
+            # Top-level item: attach to the run and persist progress.
+            self._run._items.append(node)
+            self._run.flush()
+
+
+class RunRecorder:
+    """Top-level recorder for one full run."""
+
+    def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
+                 enabled: bool = True):
+        self._stats = stats
+        self._enabled = enabled
+        self._meta = meta or {}
+        self._items: list[dict] = []
+        self._started = time.time()
+        self._t0 = time.monotonic()
+        self._run_id = uuid.uuid4().hex
+
+    def begin_item(self, label: str) -> ItemRecorder:
+        return ItemRecorder(self, label, parent=None, enabled=self._enabled)
+
+    def _snapshot(self) -> dict:
+        step_totals: dict[str, float] = {}
+        leaf_count = 0
+
+        def walk(node: dict) -> None:
+            nonlocal leaf_count
+            for step, secs in node["steps"].items():
+                step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
+            if node["items"]:
+                for child in node["items"]:
+                    walk(child)
+            else:
+                leaf_count += 1
+
+        for item in self._items:
+            walk(item)
+
+        return {
+            "runId":        self._run_id,
+            "startedAt":    round(self._started),
+            "finishedAt":   round(time.time()),
+            "totalSeconds": round(time.monotonic() - self._t0, 4),
+            "meta":         self._meta,
+            "itemCount":    len(self._items),
+            "leafCount":    leaf_count,
+            "stepTotals":   step_totals,
+            "items":        self._items,
+        }
+
+    def flush(self) -> "dict | None":
+        """Writes the run's current state to disk (upsert by runId)."""
+        if not self._enabled:
+            return None
+        run = self._snapshot()
+        self._stats._upsert_run(run)
+        return run
+
+    def finish(self) -> "dict | None":
+        """Persists the final run state.  Returns the run dict."""
+        return self.flush()
+
+
+class PerfStats:
+    """
+    Profiler facade + JSON persistence.
+
+    Parameters
+    ----------
+    path : Destination JSON file.  None disables the profiler entirely
+           (every recorder becomes a no-op and nothing is written).
+    """
+
+    def __init__(self, path=None):
+        self._path = Path(path) if path else None
+        self._lock = threading.Lock()
+
+    @property
+    def enabled(self) -> bool:
+        return self._path is not None
+
+    def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
+        return RunRecorder(self, meta=meta, enabled=self.enabled)
+
+    # ------------------------------------------------------------------
+    # Read / write
+    # ------------------------------------------------------------------
+    def all(self) -> dict:
+        """Returns the persisted runs ({"runs": [...]}); newest first."""
+        if not self._path or not self._path.is_file():
+            return {"runs": []}
+        try:
+            with self._path.open("r", encoding="utf-8") as f:
+                data = json.load(f)
+        except (OSError, json.JSONDecodeError):
+            return {"runs": []}
+        if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
+            return {"runs": []}
+        return data
+
+    def _upsert_run(self, run: dict) -> None:
+        """
+        Inserts a new run (newest first) or replaces the existing entry with
+        the same runId — so incremental flushes during a run update one entry
+        rather than appending a duplicate after every item.
+        """
+        if not self._path:
+            return
+        with self._lock:
+            runs = self.all()["runs"]
+            run_id = run.get("runId")
+            for i, existing in enumerate(runs):
+                if existing.get("runId") == run_id:
+                    runs[i] = run
+                    break
+            else:
+                runs.insert(0, run)         # newest first
+                del runs[_MAX_RUNS:]        # cap history
+            self._path.parent.mkdir(parents=True, exist_ok=True)
+            tmp = self._path.with_suffix(self._path.suffix + ".tmp")
+            with tmp.open("w", encoding="utf-8") as f:
+                json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
+            tmp.replace(self._path)
@@ -0,0 +1,160 @@
+"""
+perf_web_page.py
+================
+
+Shared HTML page for browsing PerfStats output, used by both container web
+UIs.  ``render_perf_page(name, tabs)`` returns a standalone page that loads
+``/api/perf/<name>`` and renders each run's step totals plus the nested item
+tree (series -> chapter, or one person, …) and the run trigger from meta.
+
+``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
+available perf datasets in that container.
+"""
+
+from __future__ import annotations
+
+
+_PERF_PAGE = """<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>__PERF_NAME__ performance</title>
+  <style>
+    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
+    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
+    h2    { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
+    a     { color:#60a5fa; text-decoration:none; }
+    a:hover { text-decoration:underline; }
+    .tabs { margin-bottom:1rem; }
+    .tabs a { margin-right:1rem; }
+    .tabs a.active { font-weight:bold; text-decoration:underline; }
+    .bar  { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
+    select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
+    .summary { color:#9ca3af; margin:.3rem 0 1rem; }
+    table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
+    th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
+    th { background:#1d1d1d; }
+    td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
+    .barcell { position:relative; }
+    .barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
+    .barcell span { position:relative; z-index:1; }
+    details { margin:.2rem 0 .2rem 1rem; }
+    summary { cursor:pointer; padding:.2rem 0; }
+    .chip { color:#9ca3af; font-size:.85rem; }
+    .err  { color:#f87171; }
+  </style>
+</head>
+<body>
+<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
+<div class="tabs">__PERF_TABS__</div>
+<div class="bar">
+  <label>Run: <select id="runSelect"></select></label>
+  <button id="reload">Reload</button>
+  <span class="summary" id="summary"></span>
+</div>
+
+<div id="content"></div>
+
+<script>
+const PERF_NAME = "__PERF_NAME__";
+let runs = [];
+
+for (const a of document.querySelectorAll(".tabs a")) {
+  if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
+}
+
+function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
+function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
+function esc(s) {
+  return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
+}
+
+function stepTable(totals, grandTotal) {
+  const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
+  if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
+  const max = entries[0][1] || 1;
+  let rows = "";
+  for (const [name, secs] of entries) {
+    const pct = grandTotal ? (secs / grandTotal * 100) : 0;
+    const w = (secs / max * 100);
+    rows += "<tr><td>" + esc(name) + "</td>"
+         + "<td class='num'>" + fmtSecs(secs) + "</td>"
+         + "<td class='num'>" + pct.toFixed(1) + "%</td>"
+         + "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+         + "<span>&nbsp;</span></td></tr>";
+  }
+  return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+       + "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+       + rows + "</tbody></table>";
+}
+
+// Renders one item node (and its children) as a nested <details> block.
+function itemNode(it) {
+  const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
+    .map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "—";
+  const head = "<summary><b>" + esc(it.label) + "</b>"
+    + (it.ok === false ? " <span class=err>(failed)</span>" : "")
+    + " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
+  const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
+  const body = kids.map(itemNode).join("");
+  return "<details>" + head + body + "</details>";
+}
+
+function renderRun(run) {
+  const c = document.getElementById("content");
+  if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
+  const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
+  document.getElementById("summary").textContent =
+    fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+    + run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
+
+  let html = "<h2>Steps (summed over all items)</h2>"
+           + stepTable(run.stepTotals, run.totalSeconds)
+           + "<h2>Detail</h2>";
+  const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
+  html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
+  c.innerHTML = html;
+}
+
+function renderSelect() {
+  const sel = document.getElementById("runSelect");
+  sel.innerHTML = "";
+  runs.forEach((r, i) => {
+    const o = document.createElement("option");
+    o.value = i;
+    const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
+    o.textContent = fmtTime(r.startedAt) + "  (" + fmtSecs(r.totalSeconds) + ")" + trig;
+    sel.appendChild(o);
+  });
+}
+
+async function load() {
+  const r = await fetch("/api/perf/" + PERF_NAME);
+  const data = await r.json();
+  runs = data.runs || [];
+  renderSelect();
+  renderRun(runs[0]);
+}
+
+document.getElementById("runSelect").addEventListener("change", e => {
+  renderRun(runs[e.target.value]);
+});
+document.getElementById("reload").addEventListener("click", load);
+load();
+</script>
+</body>
+</html>
+"""
+
+
+def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
+    """
+    Returns the perf page HTML for dataset ``name``.
+
+    tabs : list of (label, dataset_name) for the cross-link bar.
+    """
+    tab_html = " ".join(
+        f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
+    return (_PERF_PAGE
+            .replace("__PERF_TABS__", tab_html)
+            .replace("__PERF_NAME__", name))
@@ -0,0 +1,99 @@
+"""
+text_utils.py
+=============
+
+Small text helpers shared across modules:
+
+* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
+  breaks into compact HTML (used for Kavita summary / description fields,
+  which must not contain raw newlines).
+* ``best_similarity`` — best difflib ratio between a query string and a
+  list of candidate strings (used for title / person-name matching).
+"""
+
+from __future__ import annotations
+
+import difflib
+import re
+from typing import Iterable
+
+
+def paragraphs_to_html(text: str) -> str:
+    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
+    if not text:
+        return ""
+    parts: list[str] = []
+    for para in re.split(r"\n{2,}", text.strip()):
+        para = para.strip()
+        if para:
+            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
+    return "".join(parts)
+
+
+def best_similarity(query: str, candidates: Iterable[str]) -> float:
+    """
+    Returns the best case-insensitive difflib similarity ratio (0..1)
+    between `query` and any non-empty candidate.
+    """
+    q = (query or "").lower()
+    best = 0.0
+    for candidate in candidates:
+        if candidate:
+            ratio = difflib.SequenceMatcher(
+                None, q, str(candidate).lower()).ratio()
+            best = max(best, ratio)
+    return best
+
+
+def person_name_with_id(name: str, *,
+                        mal_id: "int | None" = None,
+                        al_id:  "int | None" = None) -> str:
+    """
+    Disambiguates a character name with its tracker id: "Rem (MAL 118737)".
+
+    Kavita Person records are global and keyed by name only, so two
+    different characters who share a name would collapse into one record.
+    Suffixing the tracker *character* id keeps them apart while still
+    sharing the record across the manga and light-novel version of the
+    same series (MAL/AniList character ids are per character, not per
+    medium).  MAL is preferred; AniList ids get an "AL" marker so the two
+    id spaces cannot collide.  Without any id the name is returned as-is.
+
+    The format must stay in sync with the manga project so both tools
+    address the same Kavita person records.
+    """
+    name = (name or "").strip()
+    if not name:
+        return name
+    if mal_id:
+        return f"{name} (MAL {mal_id})"
+    if al_id:
+        return f"{name} (AL {al_id})"
+    return name
+
+
+# Matches the suffix produced by person_name_with_id at the end of a name.
+_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
+
+
+def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
+    """
+    Inverse of person_name_with_id: extracts the tracker id from a
+    disambiguated Kavita person name.
+
+    "Rem (MAL 118737)" -> ("mal", 118737)
+    "Subaru (AL 88311)" -> ("al", 88311)
+    "Kotoyama"          -> None   (no id suffix — e.g. an author/staff record)
+
+    Returns ("mal" | "al", id) or None.
+    """
+    if not name:
+        return None
+    m = _TRACKER_ID_RE.search(name)
+    if not m:
+        return None
+    source = "mal" if m.group(1).upper() == "MAL" else "al"
+    try:
+        return source, int(m.group(2))
+    except ValueError:
+        return None
@@ -0,0 +1,313 @@
+"""
+kavita_series_updater.py
+========================
+
+Diff-based update of a single Kavita series record from a
+LightNovelMetadataBuilder output dict.
+
+Behaviour
+---------
+* Locked fields in Kavita (``*Locked`` flags) are never touched, no matter
+  what MangaBaka returns.
+* Scalar fields (summary, releaseYear, ageRating, publicationStatus,
+  language, score, sortName, localizedName) are overwritten when the
+  newly-built value differs from the value currently stored in Kavita.
+* List fields (genres, tags, characters, writers, coverArtists,
+  publishers, imprints) are diff-merged: a name appearing in the new
+  set but not in the current one is added (id=0 so Kavita creates the
+  record); a name that is in Kavita but no longer in the new set is
+  dropped.  Comparison is case-insensitive on the ``name`` field.
+* Web links are stored as a comma-separated string in Kavita; this
+  updater treats them as a set and re-joins on write.
+* Series-level cover image (URL different from last time) is re-uploaded
+  whenever ``coverImageLocked`` is False.  The MangaBaka cover URL is
+  stamped onto matches.json as ``imageUrl`` so a subsequent run can skip
+  the upload when nothing changed.
+
+Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per
+series so the WebApp can surface what happened.
+"""
+
+from __future__ import annotations
+
+from typing import Iterable
+
+from KavitaClient import KavitaClient
+
+
+# Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key).
+# `item_key` is the dict key Kavita uses for the display name on each item:
+# GenreTagDto / TagDto use "title", PersonDto uses "name".
+_LIST_FIELDS: list[tuple[str, str, str]] = [
+    ("genres",       "genresLocked",      "title"),
+    ("tags",         "tagsLocked",        "title"),
+    ("characters",   "characterLocked",   "name"),
+    ("writers",      "writerLocked",      "name"),
+    ("coverArtists", "coverArtistLocked", "name"),
+    ("publishers",   "publisherLocked",   "name"),
+    ("imprints",     "imprintLocked",     "name"),
+]
+
+
+def _norm(name: str) -> str:
+    return (name or "").strip().lower()
+
+
+def _merge_list(
+    current: list[dict],
+    new_names: Iterable[str],
+    item_key: str,
+) -> "tuple[list[dict], bool]":
+    """
+    Diff-merges a Kavita list field with the canonical name list from
+    MangaBaka.  Returns (merged_list, changed_flag).
+
+    `item_key` is the dict key Kavita uses for the display name on each
+    item ("title" for GenreTagDto/TagDto, "name" for PersonDto).
+
+    * Items in `current` whose display value appears in `new_names` are
+      kept verbatim so existing ids and ancillary fields survive.
+    * New names (no matching entry in `current`) are appended with
+      ``{"id": 0, <item_key>: <name>}`` — Kavita creates the record on save.
+    * Items in `current` whose display value is *not* in `new_names` are
+      dropped.
+    """
+    new_set = [n for n in new_names if n and n.strip()]
+    new_index = {_norm(n): n.strip() for n in new_set}
+
+    merged: list[dict] = []
+    kept_keys: set[str] = set()
+    for item in (current or []):
+        key = _norm(item.get(item_key))
+        if key in new_index:
+            merged.append(item)
+            kept_keys.add(key)
+
+    added = False
+    for key, display in new_index.items():
+        if key not in kept_keys:
+            merged.append({"id": 0, item_key: display})
+            added = True
+
+    removed = len(current or []) != len(kept_keys)
+    return merged, added or removed
+
+
+def _parse_web_links(value) -> list[str]:
+    if not value:
+        return []
+    if isinstance(value, list):
+        return [str(v).strip() for v in value if v]
+    return [p.strip() for p in str(value).split(",") if p.strip()]
+
+
+def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]":
+    current = _parse_web_links(current_str)
+    new_norm = [l for l in new_links if l]
+    if not new_norm:
+        return ",".join(current), False
+
+    # Mirror MangaBaka's set: keep order from new_norm, then anything from
+    # current that's still in new_norm (already covered above).  Anything
+    # in current that's not in new_norm is dropped.
+    new_set = set(new_norm)
+    merged = list(new_norm)
+    changed = sorted(new_set) != sorted(set(current))
+    return ",".join(merged), changed
+
+
+class KavitaSeriesUpdater:
+    def __init__(self, client: KavitaClient):
+        self._client = client
+
+    # ------------------------------------------------------------------
+    # Public
+    # ------------------------------------------------------------------
+    def update_series(self, series_id: int, built: dict, *,
+                      previous_cover_url: "str | None" = None) -> dict:
+        """
+        Applies the diff between Kavita's current state for `series_id`
+        and the freshly-built MangaBaka dict.  Returns a per-field diff
+        report.
+        """
+        series   = self._client.get_series(series_id)
+        metadata = self._client.get_series_metadata(series_id)
+        report: dict = {}
+
+        meta_changed = self._diff_metadata(metadata, built, report)
+        if meta_changed:
+            self._client.update_series_metadata(metadata)
+
+        series_changed = self._diff_series(series, built, report)
+        if series_changed:
+            self._client.update_series(series)
+
+        # Cover: only re-upload when not locked AND URL actually changed.
+        new_cover = built.get("coverUrl")
+        if (new_cover
+                and not series.get("coverImageLocked")
+                and new_cover != previous_cover_url):
+            try:
+                self._client.upload_series_cover(series_id, new_cover)
+                report["coverImage"] = "changed"
+            except Exception as exc:
+                report["coverImage"] = f"error: {exc}"
+        elif series.get("coverImageLocked"):
+            report["coverImage"] = "locked"
+        else:
+            report["coverImage"] = "skipped"
+
+        return report
+
+    # ------------------------------------------------------------------
+    # Internal: SeriesMetadataDto
+    # ------------------------------------------------------------------
+    def _diff_metadata(self, metadata: dict, built: dict,
+                       report: dict) -> bool:
+        changed = False
+
+        # ----- Scalars ------------------------------------------------
+        # (built_key, metadata_key, locked_key, transform, skip_when_zero)
+        # `skip_when_zero` covers fields where 0 means "no data" rather
+        # than a real value (releaseYear, ageRating).  publicationStatus 0
+        # is a valid "Ongoing" status — never skip it.
+        scalar_map = [
+            ("summary",            "summary",            "summaryLocked",            None, False),
+            ("releaseYear",        "releaseYear",        "releaseYearLocked",        int,  True),
+            ("ageRating",          "ageRating",          "ageRatingLocked",          int,  True),
+            ("publicationStatus",  "publicationStatus",  "publicationStatusLocked",  int,  False),
+            ("language",           "language",           "languageLocked",           None, False),
+        ]
+        for built_key, meta_key, locked_key, transform, skip_zero in scalar_map:
+            new_val = built.get(built_key)
+            if new_val is None or new_val == "":
+                report[meta_key] = "skipped"
+                continue
+            if transform is not None:
+                try:
+                    new_val = transform(new_val)
+                except (TypeError, ValueError):
+                    report[meta_key] = "skipped"
+                    continue
+            if skip_zero and new_val == 0:
+                report[meta_key] = "skipped"
+                continue
+            if metadata.get(locked_key):
+                report[meta_key] = "locked"
+                continue
+            if metadata.get(meta_key) != new_val:
+                metadata[meta_key] = new_val
+                changed = True
+                report[meta_key] = "changed"
+            else:
+                report[meta_key] = "unchanged"
+
+        # ----- Web links (single comma-separated string) ---------------
+        # SeriesMetadataDto has no dedicated lock for webLinks — always update.
+        web_str, web_changed = _merge_web_links(
+            metadata.get("webLinks"), built.get("webLinks") or [])
+        if web_changed:
+            metadata["webLinks"] = web_str
+            changed = True
+            report["webLinks"] = "changed"
+        else:
+            report["webLinks"] = "unchanged"
+
+        # ----- List fields --------------------------------------------
+        list_map = {
+            "genres":       built.get("genres"),
+            "tags":         built.get("tags"),
+            "characters":   built.get("characters"),
+            "writers":      built.get("writers"),
+            "coverArtists": built.get("coverArtists"),
+            "publishers":   built.get("publishers"),
+            "imprints":     [built["imprint"]] if built.get("imprint") else [],
+        }
+        for meta_key, locked_key, item_key in _LIST_FIELDS:
+            new_names = list_map.get(meta_key) or []
+            if metadata.get(locked_key):
+                report[meta_key] = "locked"
+                continue
+            if not new_names and not (metadata.get(meta_key) or []):
+                report[meta_key] = "unchanged"
+                continue
+            merged, list_changed = _merge_list(
+                metadata.get(meta_key) or [], new_names, item_key)
+            if list_changed:
+                metadata[meta_key] = merged
+                changed = True
+                report[meta_key] = "changed"
+            else:
+                report[meta_key] = "unchanged"
+
+        return changed
+
+    # ------------------------------------------------------------------
+    # Internal: SeriesDto (sortName, userRating, tracker ids)
+    # ------------------------------------------------------------------
+    def _diff_series(self, series: dict, built: dict, report: dict) -> bool:
+        changed = False
+
+        # sortName / localizedName
+        if not series.get("sortNameLocked"):
+            new_sort = built.get("sortName") or ""
+            if new_sort and series.get("sortName") != new_sort:
+                series["sortName"] = new_sort
+                changed = True
+                report["sortName"] = "changed"
+            else:
+                report["sortName"] = "unchanged"
+        else:
+            report["sortName"] = "locked"
+
+        if not series.get("localizedNameLocked"):
+            new_loc = built.get("localizedName") or ""
+            if new_loc and series.get("localizedName") != new_loc:
+                series["localizedName"] = new_loc
+                changed = True
+                report["localizedName"] = "changed"
+            else:
+                report["localizedName"] = "unchanged"
+        else:
+            report["localizedName"] = "locked"
+
+        # Tracker ids — Kavita exposes malId, aniListId, mangaBakaId
+        for built_key, series_key in (
+            ("malId",       "malId"),
+            ("anilistId",   "aniListId"),
+            ("mangabakaId", "mangaBakaId"),
+        ):
+            new_val = built.get(built_key)
+            if new_val in (None, "", 0):
+                continue
+            try:
+                new_int = int(new_val)
+            except (TypeError, ValueError):
+                continue
+            if int(series.get(series_key) or 0) != new_int:
+                series[series_key] = new_int
+                changed = True
+                report[series_key] = "changed"
+
+        # userRating from MangaBaka (0..5)
+        new_score = built.get("score")
+        if new_score is not None:
+            try:
+                new_score = float(new_score)
+            except (TypeError, ValueError):
+                new_score = None
+        if new_score is not None:
+            current_score = series.get("userRating")
+            try:
+                current_score = float(current_score) if current_score is not None else None
+            except (TypeError, ValueError):
+                current_score = None
+            if current_score != new_score:
+                series["userRating"] = new_score
+                series["hasUserRated"] = True
+                changed = True
+                report["userRating"] = "changed"
+            else:
+                report["userRating"] = "unchanged"
+
+        return changed
@@ -0,0 +1,571 @@
+"""
+light_novel_metadata_builder.py
+===============================
+
+Fetches series-level metadata for a light novel from MangaBaka, enriches
+it with MyAnimeList / AniList tracker statistics and character data, and
+returns a structured dict ready to be diffed against Kavita's
+SeriesMetadataDto.
+
+Differences vs. the manga project's ComicInfoBuilder:
+  - No chapter / page handling — Kavita reads volumes from the files.
+  - No XML output — produces a plain dict.
+  - No MangaDex resolver — light novels don't have a chapter→volume
+    mapping problem.
+  - MangaBaka search type is fixed to ``novel`` so only light/web novels
+    are returned.
+"""
+
+from __future__ import annotations
+
+import re
+
+import requests
+
+from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from MALResolver import MALResolver
+from AniListResolver import AniListResolver
+from MatchesCache import MatchesCache
+from TextUtils import paragraphs_to_html, person_name_with_id
+
+
+# MangaBaka series type for the search endpoint.
+_SEARCH_TYPES = ["novel"]
+
+# MangaBaka content_rating  ->  Kavita AgeRating enum
+# Kavita AgeRating values (from openapi.json):
+#   0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly
+_AGE_RATING_MAP = {
+    "safe":          3,    # Everyone
+    "suggestive":    8,    # Teen
+    "erotica":       10,   # Mature17Plus
+    "pornographic":  13,   # AdultsOnly
+}
+
+# MangaBaka status  ->  Kavita PublicationStatus enum
+# Kavita PublicationStatus (from openapi.json):
+#   0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended
+_PUB_STATUS_MAP = {
+    "ongoing":   0,
+    "hiatus":    1,
+    "completed": 2,
+    "cancelled": 3,
+    "ended":     4,
+}
+
+# External-tracker URL templates used to enrich the web-links list.
+_TRACKER_URL_TEMPLATES = {
+    "anilist":          "https://anilist.co/manga/{id}",
+    "myanimelist":      "https://myanimelist.net/manga/{id}",
+    "mal":              "https://myanimelist.net/manga/{id}",
+    "mangaupdates":     "https://www.mangaupdates.com/series.html?id={id}",
+    "kitsu":            "https://kitsu.app/manga/{id}",
+    "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
+    "ann":              "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
+    "animeplanet":      "https://www.anime-planet.com/manga/{id}",
+    "shikimori":        "https://shikimori.one/mangas/{id}",
+    "bookwalker":       "https://bookwalker.jp/{id}",
+}
+
+_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
+
+
+# --------------------------------------------------------------------------
+# Helpers
+# --------------------------------------------------------------------------
+def _normalise_key(key) -> str:
+    return re.sub(r"[^a-z0-9]", "", str(key).lower())
+
+
+def _format_term(value: str) -> str:
+    return str(value).replace("_", " ").strip().title() if value else ""
+
+
+def _md_to_html(text: str) -> str:
+    """Converts the subset of Markdown produced by MangaBaka to compact HTML."""
+    if not text:
+        return ""
+    text = _MD_ESCAPE_RE.sub(r'\1', text)
+    text = re.sub(
+        r'\[([^\]]+)\]\(([^)]+)\)',
+        lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
+        text,
+    )
+    text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
+    text = re.sub(r'\*(.+?)\*',     r'<em>\1</em>',         text, flags=re.DOTALL)
+    return paragraphs_to_html(text)
+
+
+def pick_cover_url(cover) -> "str | None":
+    """Selects the best cover URL from a MangaBaka cover object."""
+    if not cover:
+        return None
+    if isinstance(cover, str):
+        return cover
+    if not isinstance(cover, dict):
+        return None
+
+    raw = cover.get("raw")
+    if isinstance(raw, dict):
+        url = raw.get("url")
+        if isinstance(url, str) and url:
+            return url
+    elif isinstance(raw, str) and raw:
+        return raw
+
+    for size_key in ("x350", "x250", "x150"):
+        variant = cover.get(size_key)
+        if isinstance(variant, dict):
+            for density in ("x3", "x2", "x1"):
+                url = variant.get(density)
+                if isinstance(url, str) and url:
+                    return url
+        elif isinstance(variant, str) and variant:
+            return variant
+
+    for val in cover.values():
+        if isinstance(val, str) and val.startswith("http"):
+            return val
+        if isinstance(val, dict):
+            for sub in val.values():
+                if isinstance(sub, str) and sub.startswith("http"):
+                    return sub
+    return None
+
+
+def pick_thumbnail_url(cover) -> "str | None":
+    """Picks a small cover variant suitable for a UI thumbnail."""
+    if not cover:
+        return None
+    if isinstance(cover, str):
+        return cover
+    if not isinstance(cover, dict):
+        return None
+    for size_key in ("x150", "x250", "x350"):
+        variant = cover.get(size_key)
+        if isinstance(variant, dict):
+            for density in ("x2", "x1", "x3"):
+                url = variant.get(density)
+                if isinstance(url, str) and url:
+                    return url
+        elif isinstance(variant, str) and variant:
+            return variant
+    return pick_cover_url(cover)
+
+
+def _id_from_source(md: dict, *names: str) -> "int | None":
+    target = {_normalise_key(n) for n in names}
+    for raw_key, info in (md.get("source") or {}).items():
+        if _normalise_key(raw_key) in target and isinstance(info, dict):
+            mid = info.get("id")
+            if mid is not None:
+                try:
+                    return int(mid)
+                except (TypeError, ValueError):
+                    pass
+    return None
+
+
+# --------------------------------------------------------------------------
+# Builder
+# --------------------------------------------------------------------------
+class LightNovelMetadataBuilder:
+    """
+    Resolves a light-novel series on MangaBaka and produces a structured
+    metadata dict ready to be merged into Kavita.
+    """
+
+    def __init__(self, *,
+                 api_base_url: str = "https://api.mangabaka.dev/v1",
+                 language: str = "en",
+                 request_timeout: int = 30,
+                 session: "requests.Session | None" = None,
+                 mal_resolver: "MALResolver | None" = None,
+                 al_resolver: "AniListResolver | None" = None,
+                 matches_cache: "MatchesCache | None" = None):
+        self.api_base_url = api_base_url.rstrip("/")
+        self.language = language
+        self.request_timeout = request_timeout
+
+        self._session = session or requests.Session()
+        self._session.headers.setdefault("User-Agent",
+                                         "LightNovelMetadataBuilder/1.0")
+        _apply_mangabaka_rate_limit(self._session)
+
+        self._mal = mal_resolver or MALResolver(
+            request_timeout=request_timeout, search_type="lightnovel")
+        self._al  = al_resolver  or AniListResolver(
+            request_timeout=request_timeout, media_format="novel")
+        self._matches_cache = matches_cache
+
+    # ------------------------------------------------------------------
+    # MangaBaka search / fetch
+    # ------------------------------------------------------------------
+    def search_series(self, title: str) -> "dict | None":
+        """Returns the top MangaBaka novel hit for `title`, or None."""
+        if not title or not title.strip():
+            return None
+        url = f"{self.api_base_url}/series/search"
+        try:
+            resp = self._session.get(
+                url, params={"q": title, "type": _SEARCH_TYPES,
+                             "page": 1, "limit": 1},
+                timeout=self.request_timeout)
+            resp.raise_for_status()
+        except requests.RequestException:
+            return None
+        data = resp.json().get("data") or []
+        return data[0] if data else None
+
+    def fetch_series(self, series_id) -> "dict | None":
+        """
+        Returns the full MangaBaka series dict for the given id, following
+        ``merged_with`` redirects.  A seen-set guards against merge cycles.
+        """
+        if series_id is None or str(series_id).strip() == "":
+            return None
+        seen: set[str] = set()
+        current = series_id
+        while str(current) not in seen:
+            seen.add(str(current))
+            url = f"{self.api_base_url}/series/{current}"
+            resp = self._session.get(url, timeout=self.request_timeout)
+            resp.raise_for_status()
+            data = resp.json().get("data")
+            if data and data.get("state") == "merged" and data.get("merged_with"):
+                current = data["merged_with"]
+                continue
+            return data
+        return None
+
+    # ------------------------------------------------------------------
+    # Resolve title -> MangaBaka series (caches the match)
+    # ------------------------------------------------------------------
+    def resolve(self, title: str) -> "dict | None":
+        """
+        Returns the MangaBaka series for `title`.
+
+        Lookup order:
+          1. MatchesCache (uses stored mangabakaId, skips the search).
+          2. Fresh MangaBaka search — top hit. Result is persisted to the
+             cache so it survives a crash.
+        """
+        if self._matches_cache is not None:
+            cached = self._matches_cache.get(title)
+            if cached and cached.get("mangabakaId"):
+                try:
+                    series = self.fetch_series(cached["mangabakaId"])
+                    if series:
+                        return series
+                except Exception:
+                    pass
+
+        series = self.search_series(title)
+        if series and self._matches_cache is not None:
+            self._matches_cache.upsert(
+                title,
+                mangabaka_id=series.get("id"),
+                mangabaka_name=series.get("title") or "",
+                image_url=pick_thumbnail_url(series.get("cover")),
+            )
+        return series
+
+    # ------------------------------------------------------------------
+    # Main entry point
+    # ------------------------------------------------------------------
+    def build(self, *, title: str = "",
+              mangabaka_id=None) -> "dict | None":
+        """
+        Fetches and enriches metadata for one series, returning the
+        normalised dict described in the module docstring.
+
+        Pass either `title` (will resolve via cache/search) or
+        `mangabaka_id` (direct fetch).
+        """
+        if mangabaka_id is not None and str(mangabaka_id).strip():
+            md = self.fetch_series(mangabaka_id)
+        else:
+            md = self.resolve(title)
+        if not md:
+            return None
+        return self._assemble(md)
+
+    # ------------------------------------------------------------------
+    # Internal: assemble the result dict
+    # ------------------------------------------------------------------
+    def _assemble(self, md: dict) -> dict:
+        mal_id = _id_from_source(md, "myanimelist", "mal")
+        al_id  = _id_from_source(md, "anilist")
+
+        # Fall back to a title-based MAL lookup when the source map does
+        # not carry an id — Jikan is the only tracker that ships staff
+        # data we can use to enrich author / artist person records.
+        if mal_id is None:
+            mal_id = self._mal.find_mal_id(md.get("title") or "")
+
+        mal_stats = self._mal.get_stats(mal_id) if mal_id else None
+
+        characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else []
+        if not characters_detailed and al_id:
+            characters_detailed = self._al.get_characters_detailed(al_id)
+
+        staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else []
+        if not staff_detailed and al_id:
+            staff_detailed = self._al.get_staff_detailed(al_id)
+
+        # Character names for SeriesMetadata, disambiguated with the
+        # tracker character id ("Rem (MAL 118737)") because Kavita person
+        # records are global and keyed by name only.
+        character_names = [
+            person_name_with_id(c["name"],
+                                mal_id=c.get("mal_id"),
+                                al_id=c.get("al_id"))
+            for c in characters_detailed if c.get("name")
+        ]
+        # Writers come from MangaBaka first (authoritative for novels)
+        writers = list(md.get("authors") or [])
+        # Illustrators / artists -> CoverArtists (Kavita has no dedicated
+        # illustrator field, and Pencillers is the wrong semantic for
+        # text-only novels).
+        cover_artists = list(md.get("artists") or [])
+
+        # Publisher: prefer English licence, else original.  When both
+        # exist, the original publisher becomes the imprint.
+        english_pubs  = self._publishers_by_type(md, "English")
+        original_pubs = self._publishers_by_type(md, "Original")
+        publishers = english_pubs or original_pubs
+        imprint = original_pubs[0] if english_pubs and original_pubs else None
+
+        # Release year
+        release_year = None
+        try:
+            if md.get("year") is not None:
+                release_year = int(md["year"])
+        except (TypeError, ValueError):
+            pass
+
+        # Score: MangaBaka rating is 0..100  ->  Kavita userRating is 0..5
+        score = None
+        if md.get("rating") is not None:
+            try:
+                score = round(float(md["rating"]) / 20.0, 1)
+            except (TypeError, ValueError):
+                pass
+
+        # Tags / genres come back as snake_case slugs.
+        genres = [_format_term(g) for g in (md.get("genres") or []) if g]
+        tags   = [_format_term(t) for t in (md.get("tags")   or []) if t]
+
+        # Web links
+        web_links = self._collect_web_links(md)
+
+        # Summary HTML
+        summary = self._build_summary(md, mal_stats)
+
+        # Cover URL
+        cover_url = pick_cover_url(md.get("cover"))
+
+        # Title variants
+        all_alt = self._collect_all_alt_titles(md)
+
+        return {
+            "mangabakaId":     str(md.get("id") or ""),
+            "mangabakaTitle":  md.get("title") or "",
+            "originalName":    md.get("native_title") or "",
+            "localizedName":   md.get("romanized_title") or "",
+            "sortName":        self._sort_title(md),
+            "altTitles":       all_alt,
+            "summary":         summary,
+            "genres":          genres,
+            "tags":            tags,
+            "characters":      character_names,
+            "writers":         writers,
+            "coverArtists":    cover_artists,
+            "publishers":      publishers,
+            "imprint":         imprint,
+            "releaseYear":     release_year,
+            "ageRating":       _AGE_RATING_MAP.get(md.get("content_rating"), 0),
+            "publicationStatus": _PUB_STATUS_MAP.get(
+                (md.get("status") or "").lower(), 0),
+            "language":        self.language,
+            "webLinks":        web_links,
+            "score":           score,
+            "coverUrl":        cover_url,
+            "malId":           mal_id,
+            "anilistId":       al_id,
+            "relationships":   list(md.get("relationships_v2") or []),
+            "charactersDetailed": characters_detailed,
+            "staffDetailed":   staff_detailed,
+            "raw":             md,
+        }
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _publishers_by_type(md: dict, ptype: str) -> list[str]:
+        return [p.get("name") for p in (md.get("publishers") or [])
+                if p.get("type") == ptype and p.get("name")]
+
+    def _sort_title(self, md: dict) -> str:
+        lang = self.language.lower()
+        alts = self._collect_alt_titles(md)
+        return alts.get(lang) or md.get("title") or ""
+
+    def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
+        """Returns one best title per language code (en/de/jp/romaji)."""
+        titles = md.get("titles") or md.get("alt_titles") or []
+
+        def pick(language_codes: tuple, prefer_trait: "str | None" = None
+                 ) -> "str | None":
+            best_score = -1
+            best_title: "str | None" = None
+            for entry in titles:
+                if not isinstance(entry, dict):
+                    continue
+                lang = (entry.get("language") or entry.get("lang") or "").lower()
+                if lang not in language_codes:
+                    continue
+                title = entry.get("title")
+                if not title:
+                    continue
+                traits = entry.get("traits") or []
+                score = 0
+                if prefer_trait and prefer_trait in traits:
+                    score += 4
+                if "official" in traits:
+                    score += 2
+                if entry.get("is_primary"):
+                    score += 1
+                if score > best_score:
+                    best_score, best_title = score, title
+            return best_title
+
+        result: dict[str, str] = {}
+        kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
+        if kanji:
+            result["jp"] = kanji
+        romaji = pick(("ja-latn", "ja-romaji"))
+        if not romaji:
+            rt = md.get("romanized_title") or ""
+            if rt and all(ord(c) < 128 for c in rt):
+                romaji = rt
+        if romaji:
+            result["romaji"] = romaji
+        en = pick(("en",)) or md.get("title")
+        if en:
+            result["en"] = en
+        de = pick(("de",))
+        if de:
+            result["de"] = de
+        return result
+
+    @staticmethod
+    def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]":
+        _GROUPS = {
+            "en":        ("en",),
+            "de":        ("de",),
+            "ja":        ("ja",),
+            "ja-romaji": ("ja-latn", "ja-romaji"),
+            "ko":        ("ko",),
+            "ko-romaji": ("ko-latn", "ko-romaji"),
+            "zh":        ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
+            "zh-romaji": ("zh-latn",),
+        }
+        lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls}
+        result: dict[str, list[str]] = {}
+        seen:   dict[str, set]       = {}
+        for entry in (md.get("titles") or md.get("alt_titles") or []):
+            if not isinstance(entry, dict):
+                continue
+            lang  = (entry.get("language") or entry.get("lang") or "").lower()
+            group = lang_to_group.get(lang)
+            if not group:
+                continue
+            title = (entry.get("title") or "").strip()
+            if not title:
+                continue
+            result.setdefault(group, [])
+            seen.setdefault(group, set())
+            if title not in seen[group]:
+                result[group].append(title)
+                seen[group].add(title)
+        return result
+
+    def _collect_web_links(self, md: dict) -> list[str]:
+        links: list[str] = [l for l in (md.get("links") or []) if l]
+        for raw_key, info in (md.get("source") or {}).items():
+            template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
+            if not template or not isinstance(info, dict):
+                continue
+            source_id = info.get("id")
+            if source_id is not None:
+                links.append(template.format(id=source_id))
+        seen: set[str] = set()
+        unique: list[str] = []
+        for link in links:
+            if link not in seen:
+                seen.add(link)
+                unique.append(link)
+        return unique
+
+    def _build_summary(self, md: dict,
+                       mal_stats: "dict | None") -> str:
+        """Builds the HTML summary with stats table + description + alt titles."""
+        _TD = 'style="padding-right:1.5em"'
+        parts: list[str] = []
+
+        if mal_stats:
+            url     = mal_stats.get("url", "")
+            as_of   = mal_stats.get("as_of", "")
+            rows: list[str] = []
+            for label, key, fmt in (
+                ("Score",     "score",      "{}"),
+                ("Ranked",    "rank",       "#{}"),
+                ("Scored by", "scored_by",  "{:,} users"),
+                ("Popularity","popularity", "#{}"),
+                ("Members",   "members",    "{:,}"),
+                ("Favorites", "favorites",  "{:,}"),
+            ):
+                v = mal_stats.get(key)
+                if v is None:
+                    continue
+                try:
+                    formatted = fmt.format(v)
+                except (TypeError, ValueError):
+                    formatted = str(v)
+                rows.append(f"<tr><td {_TD}>{label}</td><td>{formatted}</td></tr>")
+            if rows:
+                link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
+                parts.append(f"<p>{link} stats as of {as_of}:</p>"
+                             f"<table>{''.join(rows)}</table>")
+
+        desc_raw = (md.get("description") or "").strip()
+        if desc_raw:
+            parts.append(_md_to_html(desc_raw))
+
+        all_alt = self._collect_all_alt_titles(md)
+        if all_alt:
+            label_map = {
+                "en":        "EN",
+                "de":        "DE",
+                "ja":        "JA",
+                "ja-romaji": "JA Romaji",
+                "ko":        "KO",
+                "ko-romaji": "KO Romaji",
+                "zh":        "ZH",
+                "zh-romaji": "ZH Romaji",
+            }
+            alt_rows: list[str] = []
+            for group in ("en", "de", "ja", "ja-romaji",
+                          "ko", "ko-romaji", "zh", "zh-romaji"):
+                titles = all_alt.get(group)
+                if not titles:
+                    continue
+                cell = "<br>".join(titles)
+                alt_rows.append(
+                    f"<tr><td {_TD}>{label_map[group]}</td><td>{cell}</td></tr>")
+            if alt_rows:
+                parts.append(f"<table>{''.join(alt_rows)}</table>")
+
+        return "<br>".join(parts)
@@ -0,0 +1,265 @@
+"""
+light_novel_orchestrator.py
+===========================
+
+High-level workflow on top of the resolvers, the Kavita client and the
+diff-based updaters.  Exposes three operations to the WebApp:
+
+  - build_matches(library_ids):
+        Scan one or more Kavita libraries, resolve every series against
+        MangaBaka and persist the match in matches.json.
+  - update_series(kavita_series_id):
+        Re-fetch MangaBaka, MAL and AniList data for a single Kavita
+        series and apply the diff (metadata + persons + relationships).
+  - update_all(library_ids):
+        Run update_series for every series that has a match in the
+        cache and lives in the given libraries.
+
+A single shared HTTP session (rate-limited for MangaBaka) and shared
+resolver singletons are used across the whole run to maximise cache
+hits.
+"""
+
+from __future__ import annotations
+
+import requests
+
+from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from MALResolver import MALResolver
+from AniListResolver import AniListResolver
+from MatchesCache import MatchesCache
+from KavitaClient import KavitaClient
+from KavitaPersonUpdater import KavitaPersonUpdater
+from KavitaSeriesUpdater import KavitaSeriesUpdater
+from LightNovelMetadataBuilder import (
+    LightNovelMetadataBuilder,
+    pick_thumbnail_url,
+)
+from RelationshipSync import RelationshipSync
+
+
+class LightNovelOrchestrator:
+    def __init__(self, *,
+                 kavita_url: str,
+                 kavita_api_key: str,
+                 matches_cache: MatchesCache,
+                 language: str = "en",
+                 request_timeout: int = 30,
+                 api_base_url: str = "https://api.mangabaka.dev/v1"):
+        self._cache = matches_cache
+        self._timeout = request_timeout
+
+        session = requests.Session()
+        session.headers.setdefault("User-Agent",
+                                   "KavitaLightNovelOrchestrator/1.0")
+        _apply_mangabaka_rate_limit(session)
+        self._session = session
+
+        # First construction in the LN container — pins the singletons to
+        # light-novel search mode (manga container uses the defaults).
+        self._mal = MALResolver(request_timeout=request_timeout,
+                                search_type="lightnovel")
+        self._al  = AniListResolver(request_timeout=request_timeout,
+                                    media_format="novel")
+
+        self._client = KavitaClient(kavita_url, kavita_api_key,
+                                    request_timeout=request_timeout)
+        self._builder = LightNovelMetadataBuilder(
+            api_base_url=api_base_url,
+            language=language,
+            request_timeout=request_timeout,
+            session=session,
+            mal_resolver=self._mal,
+            al_resolver=self._al,
+            matches_cache=matches_cache,
+        )
+        self._series_updater = KavitaSeriesUpdater(self._client)
+        self._person_updater = KavitaPersonUpdater(
+            self._client,
+            mal_resolver=self._mal,
+            al_resolver=self._al,
+        )
+        self._relation_sync = RelationshipSync(
+            self._client, matches_cache, builder=self._builder)
+
+    # ------------------------------------------------------------------
+    # Library listings
+    # ------------------------------------------------------------------
+    def list_libraries(self) -> list[dict]:
+        return self._client.list_libraries()
+
+    def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]:
+        result: list[dict] = []
+        for lib_id in library_ids:
+            try:
+                result.extend(self._client.list_series_in_library(int(lib_id)))
+            except Exception as exc:
+                print(f"[orchestrator] library {lib_id} list failed: {exc}",
+                      flush=True)
+        return result
+
+    # ------------------------------------------------------------------
+    # Matching
+    # ------------------------------------------------------------------
+    def build_matches(self, library_ids: list[int]) -> dict:
+        """
+        Resolves every series in the given libraries against MangaBaka.
+
+        Series already present in matches.json keep their stored
+        mangabakaId; the kavitaSeriesId + libraryId fields are refreshed
+        in case the user moved a series between libraries.
+        """
+        stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0}
+        for series in self.list_series_in_libraries(library_ids):
+            title = (series.get("name") or "").strip()
+            if not title:
+                continue
+            stats["checked"] += 1
+            kavita_id = int(series.get("id") or 0)
+            library_id = int(series.get("libraryId") or 0)
+
+            cached = self._cache.get(title)
+            if cached and cached.get("mangabakaId"):
+                self._cache.upsert(
+                    title,
+                    kavita_series_id=kavita_id,
+                    library_id=library_id,
+                )
+                stats["skipped"] += 1
+                continue
+
+            mb_series = self._builder.search_series(title)
+            if not mb_series:
+                self._cache.upsert(
+                    title,
+                    kavita_series_id=kavita_id,
+                    library_id=library_id,
+                )
+                stats["missing"] += 1
+                print(f"[match] {title!r}: no MangaBaka hit", flush=True)
+                continue
+
+            self._cache.upsert(
+                title,
+                mangabaka_id=mb_series.get("id"),
+                mangabaka_name=mb_series.get("title") or "",
+                image_url=pick_thumbnail_url(mb_series.get("cover")),
+                kavita_series_id=kavita_id,
+                library_id=library_id,
+            )
+            stats["matched"] += 1
+            print(f"[match] {title!r} -> {mb_series.get('title')!r} "
+                  f"(id={mb_series.get('id')})", flush=True)
+        return stats
+
+    # ------------------------------------------------------------------
+    # Updating
+    # ------------------------------------------------------------------
+    def update_series(self, kavita_series_id: int) -> dict:
+        """Runs a full metadata update for a single Kavita series."""
+        hit = self._cache.get_by_kavita_id(int(kavita_series_id))
+        if not hit:
+            # Try to resolve via the Kavita series name on the fly.
+            series = self._client.get_series(int(kavita_series_id))
+            title = (series.get("name") or "").strip()
+            if not title:
+                return {"ok": False, "error": "series not in matches.json"}
+            built = self._builder.build(title=title)
+            if not built:
+                return {"ok": False, "error": "no MangaBaka match"}
+            self._cache.upsert(
+                title,
+                mangabaka_id=built.get("mangabakaId"),
+                mangabaka_name=built.get("mangabakaTitle"),
+                image_url=built.get("coverUrl"),
+                kavita_series_id=int(kavita_series_id),
+                library_id=int(series.get("libraryId") or 0),
+            )
+            cached_title = title
+            cached_entry = self._cache.get(title) or {}
+        else:
+            cached_title, cached_entry = hit
+            built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId"))
+            if not built:
+                return {"ok": False, "error": "mangabaka id no longer resolvable"}
+
+        prev_cover = cached_entry.get("imageUrl") or ""
+        try:
+            series_report = self._series_updater.update_series(
+                int(kavita_series_id), built,
+                previous_cover_url=prev_cover,
+            )
+        except Exception as exc:
+            return {"ok": False, "error": f"series update failed: {exc}"}
+
+        # Person sync no longer runs per series — it has its own global,
+        # id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
+        # on a separate cron schedule.
+
+        # Relationships + collection
+        try:
+            relation_report = self._relation_sync.sync(
+                int(kavita_series_id), built)
+        except Exception as exc:
+            relation_report = {"error": str(exc)}
+
+        # Stamp the new cover URL on the cache so the next run knows when
+        # to re-upload.
+        self._cache.upsert(
+            cached_title,
+            image_url=built.get("coverUrl") or prev_cover,
+        )
+        self._cache.mark_updated(cached_title)
+
+        return {
+            "ok":            True,
+            "title":         cached_title,
+            "mangabakaId":   built.get("mangabakaId"),
+            "series":        series_report,
+            "relationships": relation_report,
+        }
+
+    # ------------------------------------------------------------------
+    # Person sync (global, id-based — independent of series updates)
+    # ------------------------------------------------------------------
+    def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
+        """
+        Runs the global, id-based person updater over every Kavita person.
+        Covers both manga and light-novel libraries in one pass.
+        """
+        return self._person_updater.update_all_persons(
+            trigger=trigger, perf=perf)
+
+    def update_all(self, library_ids: "list[int] | None") -> dict:
+        """Updates every cached series in the given libraries."""
+        if library_ids is None:
+            entries = self._cache.all()["matches"]
+        else:
+            entries = self._cache.all_in_libraries(library_ids)["matches"]
+
+        results: list[dict] = []
+        ok = fail = 0
+        for title, entry in entries.items():
+            ksid = int(entry.get("kavitaSeriesId") or 0)
+            if not ksid or not entry.get("mangabakaId"):
+                continue
+            try:
+                res = self.update_series(ksid)
+            except Exception as exc:
+                res = {"ok": False, "error": str(exc)}
+            res["title"] = title
+            results.append(res)
+            if res.get("ok"):
+                ok += 1
+            else:
+                fail += 1
+            print(f"[update] {title!r}: "
+                  f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}",
+                  flush=True)
+        return {"ok": ok, "failed": fail, "results": results}
+
+    # ------------------------------------------------------------------
+    # Direct helpers exposed to the WebApp
+    # ------------------------------------------------------------------
+    def fetch_series(self, mangabaka_id) -> "dict | None":
+        return self._builder.fetch_series(mangabaka_id)
@@ -0,0 +1,187 @@
+"""
+matches_cache.py
+================
+
+Persistent JSON cache that maps a Kavita series title to the MangaBaka
+series it was matched against, plus enough context to update the right
+Kavita record later.
+
+Structure on disk::
+
+    {
+      "matches": {
+        "<kavita series name>": {
+          "mangabakaId":      "12345",
+          "mangabakaName":    "Re:Zero",
+          "imageUrl":         "https://.../cover.jpg",
+          "kavitaSeriesId":   42,
+          "libraryId":        3,
+          "firstMatchTime":   1700000000,
+          "lastUpdateTime":   1700100000
+        },
+        ...
+      }
+    }
+
+The cache is the source of truth for the WebUI's matches table and is
+written back on every mutation so a crash mid-batch does not lose
+matches that were resolved in the current run.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+import time
+from pathlib import Path
+
+
+def _set_int(entry: dict, key: str, value) -> None:
+    """Sets entry[key] = int(value); ignores values that don't coerce."""
+    try:
+        entry[key] = int(value)
+    except (TypeError, ValueError):
+        pass
+
+
+class MatchesCache:
+    def __init__(self, path):
+        self._path = Path(path)
+        self._lock = threading.RLock()
+        self._data: dict = {"matches": {}}
+        self._load()
+
+    # ------------------------------------------------------------------
+    # Public lookup / mutation API
+    # ------------------------------------------------------------------
+    def get(self, title: str) -> "dict | None":
+        with self._lock:
+            entry = self._data["matches"].get(title)
+            return dict(entry) if entry else None
+
+    def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None":
+        with self._lock:
+            for title, entry in self._data["matches"].items():
+                if entry.get("kavitaSeriesId") == kavita_series_id:
+                    return title, dict(entry)
+        return None
+
+    def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None":
+        target = str(mangabaka_id) if mangabaka_id is not None else ""
+        if not target:
+            return None
+        with self._lock:
+            for title, entry in self._data["matches"].items():
+                if str(entry.get("mangabakaId") or "") == target:
+                    return title, dict(entry)
+        return None
+
+    def upsert(self, title: str, *,
+               mangabaka_id=None,
+               mangabaka_name=None,
+               image_url=None,
+               kavita_series_id=None,
+               library_id=None,
+               first_match_time=None,
+               last_update_time=None) -> dict:
+        """
+        Inserts or updates an entry.  Only fields passed explicitly are
+        modified; the rest are preserved.
+        """
+        with self._lock:
+            entry = self._data["matches"].get(title)
+            if entry is None:
+                entry = {
+                    "mangabakaId":    "",
+                    "mangabakaName":  "",
+                    "imageUrl":       "",
+                    "kavitaSeriesId": 0,
+                    "libraryId":      0,
+                    "firstMatchTime": int(time.time()),
+                    "lastUpdateTime": 0,
+                }
+                self._data["matches"][title] = entry
+            if mangabaka_id is not None:
+                entry["mangabakaId"] = str(mangabaka_id)
+            if mangabaka_name is not None:
+                entry["mangabakaName"] = mangabaka_name
+            if image_url is not None:
+                entry["imageUrl"] = image_url
+            if kavita_series_id is not None:
+                _set_int(entry, "kavitaSeriesId", kavita_series_id)
+            if library_id is not None:
+                _set_int(entry, "libraryId", library_id)
+            if first_match_time is not None:
+                _set_int(entry, "firstMatchTime", first_match_time)
+            if last_update_time is not None:
+                _set_int(entry, "lastUpdateTime", last_update_time)
+            self._save_unlocked()
+            return dict(entry)
+
+    def mark_updated(self, title: str) -> None:
+        with self._lock:
+            entry = self._data["matches"].get(title)
+            if entry is not None:
+                entry["lastUpdateTime"] = int(time.time())
+                self._save_unlocked()
+
+    def rename(self, old_title: str, new_title: str) -> bool:
+        if not new_title or old_title == new_title:
+            return False
+        with self._lock:
+            entry = self._data["matches"].pop(old_title, None)
+            if entry is None:
+                return False
+            self._data["matches"][new_title] = entry
+            self._save_unlocked()
+            return True
+
+    def remove(self, title: str) -> bool:
+        with self._lock:
+            existed = title in self._data["matches"]
+            if existed:
+                del self._data["matches"][title]
+                self._save_unlocked()
+            return existed
+
+    def all(self) -> dict:
+        with self._lock:
+            return {"matches": {k: dict(v)
+                                for k, v in self._data["matches"].items()}}
+
+    def all_in_libraries(self, library_ids: "list[int] | None") -> dict:
+        """
+        Returns the cache filtered to entries whose libraryId is in
+        `library_ids`.  Pass None to return everything.
+        """
+        if library_ids is None:
+            return self.all()
+        ids = {int(i) for i in library_ids}
+        with self._lock:
+            return {"matches": {
+                k: dict(v) for k, v in self._data["matches"].items()
+                if int(v.get("libraryId") or 0) in ids
+            }}
+
+    # ------------------------------------------------------------------
+    # Internal IO
+    # ------------------------------------------------------------------
+    def _load(self) -> None:
+        if not self._path.is_file():
+            return
+        try:
+            with self._path.open("r", encoding="utf-8") as f:
+                loaded = json.load(f)
+        except (OSError, json.JSONDecodeError) as exc:
+            print(f"[MatchesCache] failed to load {self._path}: {exc}",
+                  flush=True)
+            return
+        if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
+            self._data = loaded
+
+    def _save_unlocked(self) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        tmp = self._path.with_suffix(self._path.suffix + ".tmp")
+        with tmp.open("w", encoding="utf-8") as f:
+            json.dump(self._data, f, ensure_ascii=False, indent=2)
+        tmp.replace(self._path)
@@ -0,0 +1,815 @@
+"""
+matches_web_app.py
+==================
+
+Flask web UI for the Kavita light-novel metadata fetcher.
+
+Pages
+-----
+GET  /                          HTML UI (matches table + actions)
+
+Match cache (JSON)
+------------------
+GET  /api/libraries             Lists Kavita libraries
+GET  /api/matches               Full cache, optionally filtered by libraryIds=
+POST /api/matches               Upsert a single match
+                                  body: {title, mangabakaId}
+POST /api/matches/delete        Remove a match
+                                  body: {title}
+
+Background jobs
+---------------
+POST /api/build                 Build matches for libraries
+                                  body: {libraryIds: [int, ...]}
+POST /api/update                Update a single series
+                                  body: {kavitaSeriesId}
+POST /api/update-all            Update every cached series in libraries
+                                  body: {libraryIds: [int, ...] | null}
+GET  /api/status                Current background job status (status, log)
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+
+from flask import Flask, jsonify, request, Response
+
+from MatchesCache import MatchesCache
+from LightNovelMetadataBuilder import pick_thumbnail_url
+from PerfWebPage import render_perf_page
+
+# Only the person dataset exists in the LN container.
+_PERF_TABS = [("persons", "person")]
+
+
+def _int_list(values) -> list[int]:
+    """Coerces an iterable of mixed values to a list of positive ints."""
+    out: list[int] = []
+    for v in (values or []):
+        try:
+            n = int(v)
+        except (TypeError, ValueError):
+            continue
+        if n > 0:
+            out.append(n)
+    return out
+
+
+_INDEX_HTML = r"""<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Kavita light-novel metadata fetcher</title>
+  <style>
+    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
+    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
+    .bar  { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
+    .bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
+    .bar select[multiple] { background:#222; color:#eee; border:1px solid #444; min-width: 14rem; min-height: 4.2rem; }
+    button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
+    button.primary { background:#2563eb; border-color:#2563eb; color:white; }
+    button.danger  { background:#7f1d1d; border-color:#7f1d1d; color:white; }
+    button.success { background:#15803d; border-color:#15803d; color:white; }
+    button:disabled { opacity:.5; cursor:default; }
+    table { border-collapse: collapse; width: 100%; }
+    th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
+    th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
+    th.sortable { cursor: pointer; user-select: none; }
+    th.sortable:hover { background:#252525; }
+    th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
+    tr:nth-child(even) td { background: #161616; }
+    td.image img { max-width: 90px; max-height: 130px; display:block; }
+    td.id input { width: 12rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
+    td.title a { color: #60a5fa; text-decoration: none; }
+    td.title a:hover { text-decoration: underline; }
+    td.actions { white-space: nowrap; }
+    .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
+    .dirty td { background: #1f2937 !important; }
+    .count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
+    pre.log { background:#0a0a0a; color:#9ca3af; padding:.5rem .75rem; max-height:18rem; overflow:auto; border:1px solid #333; font-size:.8rem; white-space:pre-wrap; }
+    label { font-size:.9rem; color:#9ca3af; }
+  </style>
+</head>
+<body>
+<h1>Kavita light-novel metadata fetcher <span id="count" class="count"></span></h1>
+
+<div class="bar">
+  <label>Libraries
+    <select id="libraries" multiple size="3"></select>
+  </label>
+  <button id="reload">Reload</button>
+  <button id="build">Match all in libraries</button>
+  <button id="updateAll" class="success">Update all in libraries</button>
+  <button id="syncPersons">Sync persons</button>
+  <button id="batchSave" class="primary">Save dirty (0)</button>
+  <a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
+  <span class="status" id="status"></span>
+</div>
+
+<div class="bar">
+  <input id="filter" type="search" placeholder="Filter by title…">
+  <span class="count" id="jobStatus"></span>
+</div>
+
+<pre id="jobLog" class="log" hidden></pre>
+
+<table>
+  <thead>
+    <tr>
+      <th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
+      <th>mangabakaId</th>
+      <th>mangabakaName</th>
+      <th>library</th>
+      <th class="sortable" data-col="lastUpdateTime">Last update <span class="arrow" id="arrow-lastUpdateTime"></span></th>
+      <th>Image</th>
+      <th></th>
+    </tr>
+  </thead>
+  <tbody id="rows"></tbody>
+</table>
+
+<script>
+const MB_SEARCH = "https://mangabaka.org/search?q=";
+let matchesData = {};
+let librariesById = {};
+let currentSort = { col: "title", asc: true };
+let jobPollHandle = null;
+
+function fmtTime(unix) {
+  if (!unix) return "";
+  const d = new Date(unix * 1000);
+  return d.toLocaleString();
+}
+
+function setStatus(msg) { document.getElementById("status").textContent = msg; }
+
+function selectedLibraryIds() {
+  const sel = document.getElementById("libraries");
+  return Array.from(sel.selectedOptions).map(o => parseInt(o.value, 10));
+}
+
+function updateDirtyCount() {
+  const n = document.querySelectorAll("#rows tr.dirty").length;
+  const btn = document.getElementById("batchSave");
+  btn.textContent = "Save dirty (" + n + ")";
+  btn.disabled = n === 0;
+}
+
+function makeRow(title, e) {
+  const tr = document.createElement("tr");
+  tr.dataset.title = title;
+
+  // Title — links to MangaBaka search
+  const titleTd = document.createElement("td");
+  titleTd.className = "title";
+  const a = document.createElement("a");
+  a.href = MB_SEARCH + encodeURIComponent(title) + "&type=novel";
+  a.target = "_blank";
+  a.rel = "noopener";
+  a.textContent = title;
+  titleTd.appendChild(a);
+  tr.appendChild(titleTd);
+
+  // mangabakaId (editable)
+  const idTd = document.createElement("td");
+  idTd.className = "id";
+  const idInp = document.createElement("input");
+  idInp.value = e.mangabakaId || "";
+  idInp.dataset.original = e.mangabakaId || "";
+  idInp.addEventListener("input", () => {
+    if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
+    else tr.classList.remove("dirty");
+    updateDirtyCount();
+  });
+  idTd.appendChild(idInp);
+  tr.appendChild(idTd);
+
+  // mangabakaName
+  const nameTd = document.createElement("td");
+  nameTd.textContent = e.mangabakaName || "";
+  tr.appendChild(nameTd);
+
+  // library
+  const libTd = document.createElement("td");
+  const libId = e.libraryId || 0;
+  libTd.textContent = librariesById[libId] || (libId ? "#" + libId : "");
+  tr.appendChild(libTd);
+
+  // lastUpdateTime
+  const timeTd = document.createElement("td");
+  timeTd.textContent = e.lastUpdateTime ? fmtTime(e.lastUpdateTime) : "";
+  tr.appendChild(timeTd);
+
+  // Image
+  const imgTd = document.createElement("td");
+  imgTd.className = "image";
+  const img = document.createElement("img");
+  img.src = e.imageUrl || "";
+  img.alt = "";
+  img.loading = "lazy";
+  imgTd.appendChild(img);
+  tr.appendChild(imgTd);
+
+  // Actions
+  const actTd = document.createElement("td");
+  actTd.className = "actions";
+
+  const save = document.createElement("button");
+  save.textContent = "Save";
+  save.className = "primary";
+  save.addEventListener("click", () => saveRow(tr));
+  actTd.appendChild(save);
+
+  const update = document.createElement("button");
+  update.textContent = "Update";
+  update.className = "success";
+  update.style.marginLeft = ".25rem";
+  update.disabled = !e.kavitaSeriesId;
+  update.title = e.kavitaSeriesId
+    ? "Push metadata to Kavita series #" + e.kavitaSeriesId
+    : "Run a Match cycle first so we know the Kavita series id";
+  update.addEventListener("click", () => updateRow(tr));
+  actTd.appendChild(update);
+
+  const del = document.createElement("button");
+  del.textContent = "Delete";
+  del.className = "danger";
+  del.style.marginLeft = ".25rem";
+  del.addEventListener("click", () => deleteRow(tr));
+  actTd.appendChild(del);
+
+  tr.appendChild(actTd);
+
+  tr._idInp = idInp;
+  tr._nameTd = nameTd;
+  tr._img = img;
+  tr._timeTd = timeTd;
+  tr._update = update;
+  return tr;
+}
+
+async function saveRow(tr) {
+  const title = tr.dataset.title;
+  const newId = tr._idInp.value.trim();
+  setStatus("Saving " + title + "…");
+  try {
+    const r = await fetch("/api/matches", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ title: title, mangabakaId: newId }),
+    });
+    if (!r.ok) throw new Error(await r.text());
+    const data = await r.json();
+    const entry = data.entry || {};
+    matchesData[title] = entry;
+    tr._idInp.value = entry.mangabakaId || "";
+    tr._idInp.dataset.original = entry.mangabakaId || "";
+    tr._nameTd.textContent = entry.mangabakaName || "";
+    tr._img.src = entry.imageUrl || "";
+    tr.classList.remove("dirty");
+    updateDirtyCount();
+    setStatus("Saved " + title);
+    return true;
+  } catch (err) {
+    setStatus("Save failed (" + title + "): " + err.message);
+    return false;
+  }
+}
+
+async function deleteRow(tr) {
+  const title = tr.dataset.title;
+  if (!confirm("Delete " + title + "?")) return;
+  setStatus("Deleting " + title + "…");
+  try {
+    const r = await fetch("/api/matches/delete", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ title: title }),
+    });
+    if (!r.ok) throw new Error(await r.text());
+    delete matchesData[title];
+    tr.remove();
+    document.getElementById("count").textContent =
+        "(" + Object.keys(matchesData).length + " entries)";
+    setStatus("Deleted");
+  } catch (err) {
+    setStatus("Delete failed: " + err.message);
+  }
+}
+
+async function updateRow(tr) {
+  const title = tr.dataset.title;
+  const entry = matchesData[title] || {};
+  if (!entry.kavitaSeriesId) {
+    setStatus("No kavitaSeriesId for " + title + " — run match first");
+    return;
+  }
+  setStatus("Updating " + title + "…");
+  tr._update.disabled = true;
+  try {
+    const r = await fetch("/api/update", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ kavitaSeriesId: entry.kavitaSeriesId }),
+    });
+    if (!r.ok) throw new Error(await r.text());
+    const res = await r.json();
+    setStatus(res.ok ? "Updated " + title : "Update failed: " + res.error);
+    if (res.ok) {
+      entry.lastUpdateTime = Math.floor(Date.now() / 1000);
+      tr._timeTd.textContent = fmtTime(entry.lastUpdateTime);
+    }
+  } catch (err) {
+    setStatus("Update failed: " + err.message);
+  } finally {
+    tr._update.disabled = false;
+  }
+}
+
+async function batchSave() {
+  const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
+  if (dirty.length === 0) return;
+  if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
+  setStatus("Batch saving " + dirty.length + " rows…");
+  let ok = 0, fail = 0;
+  for (const tr of dirty) {
+    const success = await saveRow(tr);
+    if (success) ok++; else fail++;
+  }
+  setStatus("Batch: " + ok + " ok, " + fail + " failed");
+}
+
+function sortedTitles() {
+  const titles = Object.keys(matchesData);
+  const dir = currentSort.asc ? 1 : -1;
+  if (currentSort.col === "title") {
+    return titles.sort((a, b) => a.localeCompare(b) * dir);
+  }
+  if (currentSort.col === "lastUpdateTime") {
+    return titles.sort((a, b) => {
+      const av = matchesData[a].lastUpdateTime || 0;
+      const bv = matchesData[b].lastUpdateTime || 0;
+      return (av - bv) * dir;
+    });
+  }
+  return titles;
+}
+
+function updateSortArrows() {
+  for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
+  const id = "arrow-" + currentSort.col;
+  const el = document.getElementById(id);
+  if (el) el.textContent = currentSort.asc ? "▲" : "▼";
+}
+
+function applyFilter() {
+  const q = document.getElementById("filter").value.toLowerCase();
+  const libs = new Set(selectedLibraryIds());
+  for (const tr of document.querySelectorAll("#rows tr")) {
+    const title = tr.dataset.title;
+    const entry = matchesData[title] || {};
+    const titleMatch = title.toLowerCase().includes(q);
+    const libMatch = libs.size === 0 || libs.has(entry.libraryId || 0);
+    tr.style.display = (titleMatch && libMatch) ? "" : "none";
+  }
+}
+
+function render() {
+  const tbody = document.getElementById("rows");
+  tbody.innerHTML = "";
+  for (const t of sortedTitles()) {
+    tbody.appendChild(makeRow(t, matchesData[t]));
+  }
+  updateSortArrows();
+  applyFilter();
+  updateDirtyCount();
+  document.getElementById("count").textContent =
+      "(" + Object.keys(matchesData).length + " entries)";
+}
+
+async function loadLibraries() {
+  try {
+    const r = await fetch("/api/libraries");
+    const data = await r.json();
+    const libs = data.libraries || [];
+    const defaults = new Set(data.defaults || []);
+    librariesById = {};
+    const sel = document.getElementById("libraries");
+    sel.innerHTML = "";
+    for (const lib of libs) {
+      librariesById[lib.id] = lib.name;
+      const opt = document.createElement("option");
+      opt.value = lib.id;
+      opt.textContent = lib.name + " (#" + lib.id + ")";
+      if (defaults.has(lib.id)) opt.selected = true;
+      sel.appendChild(opt);
+    }
+  } catch (err) {
+    setStatus("Failed to load libraries: " + err.message);
+  }
+}
+
+async function load() {
+  setStatus("Loading…");
+  try {
+    const r = await fetch("/api/matches");
+    const data = await r.json();
+    matchesData = data.matches || {};
+    render();
+    setStatus(Object.keys(matchesData).length + " entries");
+  } catch (err) {
+    setStatus("Load failed: " + err.message);
+  }
+}
+
+async function pollJob() {
+  try {
+    const r = await fetch("/api/status");
+    const s = await r.json();
+    const jobStatus = document.getElementById("jobStatus");
+    const jobLog = document.getElementById("jobLog");
+    if (!s.running && !s.lastFinished) {
+      jobStatus.textContent = "";
+      jobLog.hidden = true;
+      stopPolling();
+      return;
+    }
+    jobLog.hidden = false;
+    jobLog.textContent = (s.log || []).join("\n");
+    jobLog.scrollTop = jobLog.scrollHeight;
+    if (s.running) {
+      jobStatus.textContent = "Running: " + (s.label || "");
+    } else {
+      jobStatus.textContent = "Done: " + (s.label || "");
+      stopPolling();
+      load();
+    }
+  } catch (err) {
+    /* keep polling silently */
+  }
+}
+
+function startPolling() {
+  if (jobPollHandle) return;
+  jobPollHandle = setInterval(pollJob, 1000);
+  pollJob();
+}
+
+function stopPolling() {
+  if (jobPollHandle) clearInterval(jobPollHandle);
+  jobPollHandle = null;
+}
+
+async function startBuild() {
+  const libs = selectedLibraryIds();
+  if (libs.length === 0) {
+    setStatus("Pick at least one library");
+    return;
+  }
+  if (!confirm("Match every series in " + libs.length + " library(ies)?")) return;
+  setStatus("Build started");
+  try {
+    const r = await fetch("/api/build", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ libraryIds: libs }),
+    });
+    if (!r.ok) throw new Error(await r.text());
+    startPolling();
+  } catch (err) {
+    setStatus("Build failed: " + err.message);
+  }
+}
+
+async function startUpdateAll() {
+  const libs = selectedLibraryIds();
+  if (libs.length === 0) {
+    if (!confirm("No libraries selected — update every cached series?")) return;
+  } else if (!confirm("Update every cached series in " + libs.length + " library(ies)?")) {
+    return;
+  }
+  setStatus("Update-all started");
+  try {
+    const r = await fetch("/api/update-all", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ libraryIds: libs.length ? libs : null }),
+    });
+    if (!r.ok) throw new Error(await r.text());
+    startPolling();
+  } catch (err) {
+    setStatus("Update-all failed: " + err.message);
+  }
+}
+
+async function startSyncPersons() {
+  if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
+  setStatus("Person sync started");
+  try {
+    const r = await fetch("/api/persons/sync", { method: "POST" });
+    if (!r.ok) throw new Error(await r.text());
+    startPolling();
+  } catch (err) {
+    setStatus("Person sync failed: " + err.message);
+  }
+}
+
+document.getElementById("filter").addEventListener("input", applyFilter);
+document.getElementById("libraries").addEventListener("change", applyFilter);
+document.getElementById("reload").addEventListener("click", load);
+document.getElementById("batchSave").addEventListener("click", batchSave);
+document.getElementById("build").addEventListener("click", startBuild);
+document.getElementById("updateAll").addEventListener("click", startUpdateAll);
+document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
+for (const th of document.querySelectorAll("th.sortable")) {
+  th.addEventListener("click", () => {
+    const col = th.dataset.col;
+    if (currentSort.col === col) currentSort.asc = !currentSort.asc;
+    else { currentSort.col = col; currentSort.asc = true; }
+    render();
+  });
+}
+
+(async () => {
+  await loadLibraries();
+  await load();
+  // Resume polling if there's a job running from a previous session
+  pollJob();
+})();
+</script>
+</body>
+</html>
+"""
+
+
+class _JobState:
+    """Thread-safe container for the current background job's progress."""
+
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._running = False
+        self._label = ""
+        self._log: list[str] = []
+        self._last_finished_at = 0
+        self._thread: "threading.Thread | None" = None
+
+    def start(self, label: str, target, *args, **kwargs) -> bool:
+        with self._lock:
+            if self._running:
+                return False
+            self._running = True
+            self._label = label
+            self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"]
+
+        def runner():
+            try:
+                target(self, *args, **kwargs)
+            except Exception as exc:
+                self.append(f"FATAL: {exc}")
+            finally:
+                with self._lock:
+                    self._running = False
+                    self._last_finished_at = int(time.time())
+                self.append(f"[{time.strftime('%H:%M:%S')}] finished")
+
+        self._thread = threading.Thread(target=runner,
+                                        name=f"job:{label}",
+                                        daemon=True)
+        self._thread.start()
+        return True
+
+    def append(self, line: str) -> None:
+        with self._lock:
+            self._log.append(line)
+            # Cap log length so the response stays bounded.
+            if len(self._log) > 1000:
+                self._log = self._log[-800:]
+
+    def snapshot(self) -> dict:
+        with self._lock:
+            return {
+                "running":      self._running,
+                "label":        self._label,
+                "log":          list(self._log),
+                "lastFinished": self._last_finished_at,
+            }
+
+
+class MatchesWebApp:
+    def __init__(self, cache: MatchesCache, *,
+                 orchestrator=None,
+                 default_library_ids: "list[int] | None" = None,
+                 person_perf=None,
+                 host: str = "0.0.0.0",
+                 port: int = 8080):
+        self._cache = cache
+        self._orchestrator = orchestrator
+        self._defaults = list(default_library_ids or [])
+        self._person_perf = person_perf
+        self._host = host
+        self._port = port
+        self._job = _JobState()
+        self._app = Flask(__name__)
+        self._thread: "threading.Thread | None" = None
+        self._register_routes()
+
+    @property
+    def app(self) -> Flask:
+        return self._app
+
+    def start(self) -> threading.Thread:
+        if self._thread is not None and self._thread.is_alive():
+            return self._thread
+        self._thread = threading.Thread(
+            target=self._app.run,
+            kwargs={"host": self._host, "port": self._port,
+                    "debug": False, "use_reloader": False,
+                    "threaded": True},
+            name="MatchesWebApp",
+            daemon=False,
+        )
+        self._thread.start()
+        print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
+              flush=True)
+        return self._thread
+
+    def wait(self) -> None:
+        if self._thread is not None:
+            self._thread.join()
+
+    # ------------------------------------------------------------------
+    # Routes
+    # ------------------------------------------------------------------
+    def _register_routes(self) -> None:
+        app = self._app
+        cache = self._cache
+
+        @app.get("/")
+        def index() -> Response:
+            return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
+
+        @app.get("/api/libraries")
+        def api_libraries():
+            if self._orchestrator is None:
+                return jsonify([])
+            try:
+                libs = self._orchestrator.list_libraries()
+            except Exception as exc:
+                return Response(f"libraries failed: {exc}", status=502)
+            return jsonify({"libraries": libs, "defaults": self._defaults})
+
+        @app.get("/api/matches")
+        def api_list():
+            raw = request.args.get("libraryIds") or ""
+            lib_ids = _int_list(raw.split(","))
+            if lib_ids:
+                return jsonify(cache.all_in_libraries(lib_ids))
+            return jsonify(cache.all())
+
+        @app.post("/api/matches")
+        def api_upsert():
+            body = request.get_json(silent=True) or {}
+            title = (body.get("title") or "").strip()
+            if not title:
+                return Response("title is required", status=400)
+            new_id_raw = body.get("mangabakaId")
+            new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
+            if not new_id:
+                return Response("mangabakaId is required", status=400)
+
+            new_name: "str | None" = None
+            new_image: "str | None" = None
+            if self._orchestrator is not None:
+                try:
+                    series = self._orchestrator.fetch_series(new_id)
+                except Exception as exc:
+                    return Response(f"resolve failed: {exc}", status=502)
+                if not series:
+                    return Response(
+                        f"MangaBaka has no series with id {new_id}",
+                        status=404)
+                new_name  = series.get("title") or ""
+                new_image = pick_thumbnail_url(series.get("cover")) or ""
+
+            entry = cache.upsert(
+                title,
+                mangabaka_id=new_id,
+                mangabaka_name=new_name,
+                image_url=new_image,
+            )
+            return jsonify({"title": title, "entry": entry})
+
+        @app.post("/api/matches/delete")
+        def api_delete():
+            body = request.get_json(silent=True) or {}
+            title = (body.get("title") or "").strip()
+            if not title:
+                return Response("title is required", status=400)
+            removed = cache.remove(title)
+            return jsonify({"removed": removed, "title": title})
+
+        @app.post("/api/build")
+        def api_build():
+            if self._orchestrator is None:
+                return Response("no orchestrator configured", status=503)
+            body = request.get_json(silent=True) or {}
+            library_ids = _int_list(body.get("libraryIds"))
+            if not library_ids:
+                return Response("libraryIds required", status=400)
+
+            label = f"match libraries {library_ids}"
+
+            def task(job: _JobState, lib_ids):
+                stats = self._orchestrator.build_matches(lib_ids)
+                job.append(f"matched={stats.get('matched')} "
+                           f"skipped={stats.get('skipped')} "
+                           f"missing={stats.get('missing')} "
+                           f"checked={stats.get('checked')}")
+
+            if not self._job.start(label, task, library_ids):
+                return Response("a job is already running", status=409)
+            return jsonify({"started": label})
+
+        @app.post("/api/update")
+        def api_update():
+            if self._orchestrator is None:
+                return Response("no orchestrator configured", status=503)
+            body = request.get_json(silent=True) or {}
+            ksid = body.get("kavitaSeriesId")
+            try:
+                ksid_int = int(ksid)
+            except (TypeError, ValueError):
+                return Response("kavitaSeriesId required", status=400)
+            try:
+                res = self._orchestrator.update_series(ksid_int)
+            except Exception as exc:
+                return Response(f"update failed: {exc}", status=500)
+            return jsonify(res)
+
+        @app.post("/api/update-all")
+        def api_update_all():
+            if self._orchestrator is None:
+                return Response("no orchestrator configured", status=503)
+            body = request.get_json(silent=True) or {}
+            raw = body.get("libraryIds")
+            library_ids = None if raw is None else _int_list(raw)
+
+            label = ("update all (every library)" if library_ids is None
+                     else f"update all in libraries {library_ids}")
+
+            def task(job: _JobState, lib_ids):
+                summary = self._orchestrator.update_all(lib_ids)
+                job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}")
+                for res in summary.get("results", []):
+                    title = res.get("title", "?")
+                    if res.get("ok"):
+                        flags = []
+                        sr = res.get("series") or {}
+                        for k, v in sr.items():
+                            if v == "changed":
+                                flags.append(k)
+                        job.append(
+                            f"  {title}: changed=[{', '.join(flags) or '-'}]")
+                    else:
+                        job.append(f"  {title}: FAIL {res.get('error')}")
+
+            if not self._job.start(label, task, library_ids):
+                return Response("a job is already running", status=409)
+            return jsonify({"started": label})
+
+        @app.post("/api/persons/sync")
+        def api_persons_sync():
+            if self._orchestrator is None:
+                return Response("no orchestrator configured", status=503)
+
+            def task(job: _JobState):
+                report = self._orchestrator.sync_persons(
+                    trigger="ln", perf=self._person_perf)
+                job.append(f"updated={report['updated']} "
+                           f"skipped={report['skipped']} "
+                           f"not_found={report['not_found']} "
+                           f"conflicts={report['conflicts']}")
+                for err in report.get("errors", []):
+                    job.append(f"  {err}")
+
+            if not self._job.start("person sync", task):
+                return Response("a job is already running", status=409)
+            return jsonify({"started": "person sync"})
+
+        @app.get("/api/status")
+        def api_status():
+            snap = self._job.snapshot()
+            snap["defaults"] = self._defaults
+            return jsonify(snap)
+
+        @app.get("/perf")
+        @app.get("/perf/<name>")
+        def perf_page(name: str = "person") -> Response:
+            return Response(render_perf_page(name, _PERF_TABS),
+                            mimetype="text/html; charset=utf-8")
+
+        @app.get("/api/perf/<name>")
+        def api_perf(name: str):
+            stats = self._person_perf if name == "person" else None
+            return jsonify(stats.all() if stats is not None else {"runs": []})
@@ -0,0 +1,174 @@
+"""
+relationship_sync.py
+====================
+
+Mirrors MangaBaka's ``relationships_v2`` graph into Kavita:
+
+  1. Every related MangaBaka series that is *also* present in Kavita
+     (resolved via MatchesCache) is added to a shared Kavita collection
+     so the whole franchise can be browsed in one place.
+  2. Series-level relationships (prequel / sequel / spin-off / …) are
+     written via ``POST /api/Series/update-related`` so navigating
+     between entries surfaces the right neighbours.
+
+Only relationships where both endpoints exist in Kavita are written.
+Relationships pointing to series that have not been imported yet are
+silently skipped (the next match run picks them up).
+"""
+
+from __future__ import annotations
+
+from KavitaClient import KavitaClient
+from MatchesCache import MatchesCache
+
+
+# MangaBaka relation_type  ->  Kavita UpdateRelatedSeriesDto bucket
+_RELATION_MAP = {
+    "prequel":             "prequels",
+    "sequel":              "sequels",
+    "side_story":          "sideStories",
+    "spin_off":            "spinOffs",
+    "spinoff":             "spinOffs",
+    "alternative_version": "alternativeVersions",
+    "alternative_story":   "alternativeVersions",
+    "alternative_setting": "alternativeSettings",
+    "adapted_from":        "adaptations",
+    "adaptation":          "adaptations",
+    "doujinshi":           "doujinshis",
+    "parent":              "contains",   # the parent "contains" the child
+}
+
+_ALL_BUCKETS = (
+    "adaptations", "characters", "contains", "others",
+    "prequels", "sequels", "sideStories", "spinOffs",
+    "alternativeSettings", "alternativeVersions", "doujinshis",
+    "editions", "annuals",
+)
+
+
+class RelationshipSync:
+    def __init__(self, client: KavitaClient, cache: MatchesCache, *,
+                 builder=None):
+        """
+        Parameters
+        ----------
+        client  : KavitaClient for collection / relation writes.
+        cache   : MatchesCache to resolve mangabakaId -> kavitaSeriesId.
+        builder : optional LightNovelMetadataBuilder used to fetch parent
+                  series titles when picking the collection name.
+        """
+        self._client = client
+        self._cache = cache
+        self._builder = builder
+
+    # ------------------------------------------------------------------
+    # Public
+    # ------------------------------------------------------------------
+    def sync(self, kavita_series_id: int, built: dict) -> dict:
+        """
+        Applies the relationship and collection links described by
+        `built["relationships"]` (raw MangaBaka relationships_v2 list)
+        for the given Kavita series.  Returns a small status dict.
+        """
+        report: dict = {"relations": {}, "collection": None,
+                        "missing_series": []}
+
+        relationships = built.get("relationships") or []
+        if not relationships:
+            return report
+
+        # Resolve mangabakaId -> kavitaSeriesId for every related entry.
+        related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS}
+        all_kavita_ids: set[int] = set()
+        for rel in relationships:
+            mb_id = rel.get("to_series_id")
+            if mb_id is None:
+                continue
+            hit = self._cache.get_by_mangabaka_id(mb_id)
+            if not hit:
+                report["missing_series"].append(int(mb_id))
+                continue
+            _title, entry = hit
+            ksid = int(entry.get("kavitaSeriesId") or 0)
+            if not ksid:
+                report["missing_series"].append(int(mb_id))
+                continue
+            bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(),
+                                       "others")
+            if ksid not in related[bucket]:
+                related[bucket].append(ksid)
+            all_kavita_ids.add(ksid)
+
+        # ----- Relationships ------------------------------------------
+        if any(related.values()):
+            payload = {"seriesId": int(kavita_series_id)}
+            for bucket in _ALL_BUCKETS:
+                payload[bucket] = related[bucket]
+            try:
+                self._client.update_related(payload)
+                report["relations"] = {k: v for k, v in related.items() if v}
+            except Exception as exc:
+                report["relations"] = {"error": str(exc)}
+
+        # ----- Collection ---------------------------------------------
+        # Include the current series in the collection so it shows up too.
+        all_kavita_ids.add(int(kavita_series_id))
+        if len(all_kavita_ids) >= 2:
+            collection_name = self._collection_name(built, relationships)
+            collection_id = self._find_collection_id(collection_name)
+            try:
+                self._client.add_series_to_collection(
+                    collection_id=collection_id,
+                    title=collection_name,
+                    series_ids=sorted(all_kavita_ids),
+                )
+                report["collection"] = collection_name
+            except Exception as exc:
+                report["collection"] = f"error: {exc}"
+
+        return report
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    def _find_collection_id(self, name: str) -> int:
+        """Returns the id of an existing collection by title, or 0 to create."""
+        if not name:
+            return 0
+        target = name.strip().lower()
+        try:
+            for col in self._client.list_collections():
+                if (col.get("title") or "").strip().lower() == target:
+                    try:
+                        return int(col.get("id") or 0)
+                    except (TypeError, ValueError):
+                        return 0
+        except Exception:
+            pass
+        return 0
+
+    def _collection_name(self, built: dict,
+                         relationships: list[dict]) -> str:
+        """
+        Picks the collection name.  Uses the parent series title from
+        MangaBaka if the current series has one; otherwise falls back to
+        the current series' own title.
+        """
+        for rel in relationships:
+            if (rel.get("relation_type") or "").lower() == "parent":
+                parent_id = rel.get("to_series_id")
+                if parent_id is not None and self._builder is not None:
+                    try:
+                        parent_md = self._builder.fetch_series(parent_id)
+                        if parent_md and parent_md.get("title"):
+                            return parent_md["title"]
+                    except Exception:
+                        pass
+                # Even without a builder, the cache may know the parent.
+                hit = self._cache.get_by_mangabaka_id(parent_id)
+                if hit:
+                    _title, entry = hit
+                    name = entry.get("mangabakaName")
+                    if name:
+                        return name
+        return built.get("mangabakaTitle") or ""
@@ -37,19 +37,27 @@ Data source notes

 from __future__ import annotations

-import difflib
 import re
+import sys
 import xml.etree.ElementTree as ET
+from contextlib import contextmanager
 from pathlib import Path

 import requests

+# Shared modules live one level up (src/); needed when a module in this
+# folder is run directly as a script (the entry points set the path).
+if __name__ == "__main__":
+    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from CoverCache import CoverCache, _IMAGE_EXTS
+from TextUtils import person_name_with_id

 try:
    from PIL import Image
@@ -58,11 +66,19 @@ except ImportError:
    _HAS_PIL = False


+@contextmanager
+def _no_measure():
+    """No-op stand-in for a perf recorder's measure() context manager."""
+    yield
+
+
+# Sentinel marking a per-chapter memo slot as "not computed yet".
+_UNSET = object()
+
+
 # --------------------------------------------------------------------------
 # Constants
 # --------------------------------------------------------------------------
-_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
-
 # Series types accepted by the MangaBaka search endpoint.  Light/web novels
 # are filtered out because this pipeline only handles image-based manga.
 # Passed to `requests` as a list so each value becomes its own `&type=...`
@@ -179,7 +195,8 @@ class ComicInfoBuilder:
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None,
                 al_resolver: "AniListResolver | None" = None,
-                 matches_cache: "MatchesCache | None" = None):
+                 matches_cache: "MatchesCache | None" = None,
+                 cover_cache: "CoverCache | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")

@@ -210,11 +227,24 @@ class ComicInfoBuilder:
        self._al_resolver = al_resolver or AniListResolver(
            request_timeout=request_timeout)
        self._matches_cache = matches_cache
+        self._cover_cache = cover_cache or _default_cover_cache()
+
+        # Optional performance recorder (duck-typed: any object with a
+        # .measure(name) context manager).  The mover sets this per chapter;
+        # when None, _measure() is a no-op so the builder stays decoupled
+        # from PerfStats and works standalone (e.g. the cover updater).
+        self.perf = None

        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
        self._cover_path: "Path | None" = None
        self._suwayomi_data: dict = {}
+        # Per-chapter memo for _determine_volume (resolved up to 3x/chapter
+        # otherwise: cover download, explicit volume step, XML build).
+        self._volume_memo = _UNSET
+        # Per-series cache for full series fetches by id (parent series for
+        # SeriesGroup, merged-series redirects) — reused across all chapters.
+        self._series_by_id_cache: dict[str, dict] = {}

    # ----- Repr -----------------------------------------------------------
    def __repr__(self) -> str:
@@ -254,6 +284,13 @@ class ComicInfoBuilder:
        self._pages = []
        self._cover_path = None
        self._suwayomi_data = {}
+        self._volume_memo = _UNSET
+
+    def _measure(self, name: str):
+        """Times a named step on the attached recorder; no-op when unset."""
+        if self.perf is not None:
+            return self.perf.measure(name)
+        return _no_measure()

    # ======================================================================
    # Public XML functions
@@ -298,11 +335,13 @@ class ComicInfoBuilder:
        if not folder.is_dir():
            raise NotADirectoryError(f"Folder not found: {folder}")

-        self._suwayomi_data = self._read_existing_comicinfo(folder)
+        with self._measure("read_comicinfo"):
+            self._suwayomi_data = self._read_existing_comicinfo(folder)

        self._cover_path = None
        if download_cover:
-            self._cover_path = self._download_cover(folder, cover_filename)
+            with self._measure("cover"):
+                self._cover_path = self._download_cover(folder, cover_filename)

        cover_resolved = self._cover_path.resolve() if self._cover_path else None
        story_images: list[Path] = []
@@ -322,20 +361,23 @@ class ComicInfoBuilder:
        ordered.extend((img, "Story") for img in story_images)

        self._pages = []
-        for index, (img_path, page_type) in enumerate(ordered):
-            width, height = self._image_dimensions(img_path)
-            try:
-                size = img_path.stat().st_size
-            except OSError:
-                size = None
-            self._pages.append({
-                "image": index,
-                "type": page_type,
-                "width": width,
-                "height": height,
-                "size": size,
-                "double": bool(width and height and width > height),
-            })
+        # Probing every page for its pixel dimensions reads each file — on a
+        # network share this is often the dominant per-chapter cost.
+        with self._measure("image_dimensions"):
+            for index, (img_path, page_type) in enumerate(ordered):
+                width, height = self._image_dimensions(img_path)
+                try:
+                    size = img_path.stat().st_size
+                except OSError:
+                    size = None
+                self._pages.append({
+                    "image": index,
+                    "type": page_type,
+                    "width": width,
+                    "height": height,
+                    "size": size,
+                    "double": bool(width and height and width > height),
+                })

        return {
            "page_count": len(self._pages),
@@ -406,12 +448,20 @@ class ComicInfoBuilder:
        return series

    def _fetch_series_by_id(self, series_id) -> dict:
+        # Cached per builder (i.e. per series): SeriesGroup resolution calls
+        # this for the parent on every chapter — without the cache that is
+        # one MangaBaka request per chapter for the same parent id.
+        key = str(series_id)
+        cached = self._series_by_id_cache.get(key)
+        if cached is not None:
+            return cached
        url = f"{self.api_base_url}/series/{series_id}"
        resp = self._session.get(url, timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data")
        if not data:
            raise RuntimeError(f"Series with ID {series_id} not found.")
+        self._series_by_id_cache[key] = data
        return data

    # ======================================================================
@@ -483,9 +533,19 @@ class ComicInfoBuilder:
        add("Tags",  ", ".join(_format_term(t) for t in (md.get("tags") or [])))

        # ----- Characters — MAL first, AniList fallback ---------------------
-        characters = self._mal_resolver.get_characters(mal_id)
-        if not characters and al_id:
-            characters = self._al_resolver.get_characters(al_id)
+        # Names are disambiguated with the tracker *character* id
+        # ("Rem (MAL 118737)") so same-named characters from different
+        # series stay separate Kavita person records.  The format is shared
+        # with the light-novel updater — see TextUtils.person_name_with_id.
+        char_entries = self._mal_resolver.get_characters_detailed(mal_id)
+        if not char_entries and al_id:
+            char_entries = self._al_resolver.get_characters_detailed(al_id)
+        characters = [
+            person_name_with_id(e.get("name"),
+                                mal_id=e.get("mal_id"),
+                                al_id=e.get("al_id"))
+            for e in char_entries if (e.get("name") or "").strip()
+        ]
        add("Characters", ", ".join(characters) if characters else None)

        # ----- Web links ----------------------------------------------------
@@ -537,6 +597,18 @@ class ComicInfoBuilder:
    # Volume determination
    # ======================================================================
    def _determine_volume(self) -> "str | None":
+        """
+        Resolves the volume for the current chapter, memoized per chapter.
+
+        The result is reused across the three call sites per chapter (cover
+        download, explicit volume step, XML build); the memo is cleared
+        whenever the chapter or manga title changes (see _clear_results).
+        """
+        if self._volume_memo is _UNSET:
+            self._volume_memo = self._resolve_volume()
+        return self._volume_memo
+
+    def _resolve_volume(self) -> "str | None":
        """
        Resolves the volume for the current chapter via MangaDex.
        Falls back to estimation when the chapter is absent from MangaDex.
@@ -580,11 +652,13 @@ class ComicInfoBuilder:
    # ======================================================================
    def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
        """
-        Downloads the cover for the current chapter/volume.
+        Fetches the cover for the current chapter/volume and writes it into
+        `folder`.

-        If a volume is known and a volume-specific cover exists in MangaBaka
-        works, that cover is used.  Otherwise the series default cover is
-        downloaded (raw variant preferred).
+        If a volume is known and a volume-specific cover exists in MangaBaka,
+        that cover is used; otherwise the series default cover.  The image
+        itself comes from the CoverCache, so a cover shared by many chapters
+        is downloaded only once.
        """
        md = self._get_metadata()
        volume = self._determine_volume()
@@ -602,18 +676,13 @@ class ComicInfoBuilder:
        if not cover_url:
            cover_url = _pick_cover_url(md.get("cover"))

-        if not cover_url:
+        fetched = self._cover_cache.get(cover_url) if cover_url else None
+        if not fetched:
            return None

-        try:
-            resp = self._session.get(cover_url, timeout=self.request_timeout)
-            resp.raise_for_status()
-        except requests.RequestException:
-            return None
-
-        ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
+        data, ext = fetched
        target = folder / f"{cover_filename}{ext}"
-        target.write_bytes(resp.content)
+        target.write_bytes(data)
        return target

    # ======================================================================
@@ -656,6 +725,41 @@ class ComicInfoBuilder:
        "manhua": ("zh-latn",),
    }

+    @staticmethod
+    def _pick_best_title(titles, language_codes: tuple,
+                         prefer_trait: "str | None" = None) -> "str | None":
+        """
+        Picks the highest-scoring entry from a MangaBaka `titles` list for
+        any of the given language codes.
+
+        Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
+        (+1); first seen wins on ties.  Returns None when no entry matches.
+        """
+        if not isinstance(titles, list):
+            return None
+        best_score = -1
+        best_title: "str | None" = None
+        for entry in titles:
+            if not isinstance(entry, dict):
+                continue
+            lang = (entry.get("language") or entry.get("lang") or "").lower()
+            if lang not in language_codes:
+                continue
+            title = entry.get("title")
+            if not title:
+                continue
+            traits = entry.get("traits") or []
+            score = 0
+            if prefer_trait and prefer_trait in traits:
+                score += 4
+            if "official" in traits:
+                score += 2
+            if entry.get("is_primary"):
+                score += 1
+            if score > best_score:
+                best_score, best_title = score, title
+        return best_title
+
    @classmethod
    def _romanized_for_native(cls, md: dict) -> "str | None":
        """
@@ -686,30 +790,7 @@ class ComicInfoBuilder:
            return None

        titles = md.get("titles") or md.get("alt_titles") or []
-        if not isinstance(titles, list):
-            return None
-
-        best_score = -1
-        best_title: "str | None" = None
-        for entry in titles:
-            if not isinstance(entry, dict):
-                continue
-            lang = (entry.get("language") or entry.get("lang") or "").lower()
-            if lang not in langs:
-                continue
-            title = entry.get("title")
-            if not title:
-                continue
-            traits = entry.get("traits") or []
-            score = 0
-            if "official" in traits:
-                score += 2
-            if entry.get("is_primary"):
-                score += 1
-            if score > best_score:
-                best_score = score
-                best_title = title
-        return best_title
+        return cls._pick_best_title(titles, langs)

    def _get_sort_title(self, md: dict) -> "str | None":
        """
@@ -745,31 +826,7 @@ class ComicInfoBuilder:

        def pick(language_codes: tuple, prefer_trait: "str | None" = None
                 ) -> "str | None":
-            """Picks the best title entry for any of the given language codes."""
-            if not isinstance(titles, list):
-                return None
-            best_score = -1
-            best_title: "str | None" = None
-            for entry in titles:
-                if not isinstance(entry, dict):
-                    continue
-                lang = (entry.get("language") or entry.get("lang") or "").lower()
-                if lang not in language_codes:
-                    continue
-                title = entry.get("title")
-                if not title:
-                    continue
-                traits = entry.get("traits") or []
-                score = 0
-                if prefer_trait and prefer_trait in traits:
-                    score += 4
-                if "official" in traits:
-                    score += 2
-                if entry.get("is_primary"):
-                    score += 1
-                if score > best_score:
-                    best_score, best_title = score, title
-            return best_title
+            return self._pick_best_title(titles, language_codes, prefer_trait)

        result: dict[str, str] = {}

@@ -1080,6 +1137,18 @@ class ComicInfoBuilder:
 # generic image-block picker; _pick_cover_url is kept for backward compat.
 _pick_cover_url = _pick_image_url

+# Shared fallback CoverCache for builders constructed without an explicit
+# one (temporary directory, removed at process exit).  Created lazily so
+# importing this module never touches the filesystem.
+_shared_cover_cache: "CoverCache | None" = None
+
+
+def _default_cover_cache() -> CoverCache:
+    global _shared_cover_cache
+    if _shared_cover_cache is None:
+        _shared_cover_cache = CoverCache()
+    return _shared_cover_cache
+

 def _pick_thumbnail_url(cover) -> "str | None":
    """
@@ -1113,17 +1182,6 @@ def _pick_thumbnail_url(cover) -> "str | None":
    return _pick_cover_url(cover)


-def _guess_extension(url: str, content_type: str) -> str:
-    url_ext = Path(url.split("?")[0]).suffix.lower()
-    if url_ext in _IMAGE_EXTS:
-        return url_ext
-    ct = (content_type or "").lower()
-    if "png"  in ct: return ".png"
-    if "webp" in ct: return ".webp"
-    if "gif"  in ct: return ".gif"
-    return ".jpg"
-
-
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
@@ -44,7 +44,7 @@ Dependencies
 from __future__ import annotations

 import io
-import threading
+import sys
 import xml.etree.ElementTree as ET
 import zipfile
 from datetime import datetime
@@ -52,7 +52,12 @@ from pathlib import Path

 import requests

-from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
+# Shared modules live one level up (src/); needed when a module in this
+# folder is run directly as a script (the entry points set the path).
+if __name__ == "__main__":
+    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from ComicInfoBuilder import ComicInfoBuilder
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
@@ -61,7 +66,8 @@ from MatchesCache import MatchesCache
 from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
                           _sanitize_dirname, _normalise_volume_value)
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
-from CronSchedule import CronSchedule
+from PerfStats import PerfStats
+from CoverCache import CoverCache, _IMAGE_EXTS

 try:
    from PIL import Image
@@ -129,10 +135,12 @@ class KavitaVolumeCoverUpdater:
    request_timeout  : HTTP timeout in seconds.
    log_path         : File that receives one line per updated chapter.
                       Default: <kavita_path>/volume_updater.log
-    schedule         : Cron expression (5 fields) defining when scans run,
-                       e.g. "0 19 * * 1,4" = 19:00 every Monday and
-                       Thursday.  Evaluated in local time — set the TZ env
-                       var inside Docker.  Default: "0 19 * * 1,4".
+    cover_cache_dir  : Directory for the persistent cover cache.  None ->
+                       temporary cache, deleted at process exit.
+    perf_stats       : Optional PerfStats instance for per-step timing.
+
+    Scheduling lives outside this class (see CronRunner); call update_all()
+    on whatever cadence you like.
    """

    def __init__(self,
@@ -143,7 +151,8 @@ class KavitaVolumeCoverUpdater:
                 request_timeout: int = 30,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 log_path=None,
-                 schedule: str = "0 19 * * 1,4"):
+                 cover_cache_dir=None,
+                 perf_stats: "PerfStats | None" = None):
        self._dst = Path(kavita_path)
        self._matches_cache = matches_cache
        self._language = language
@@ -151,7 +160,7 @@ class KavitaVolumeCoverUpdater:
        self._api_base_url = api_base_url.rstrip("/")
        self._log_path = (Path(log_path) if log_path
                          else self._dst / "volume_updater.log")
-        self._cron = CronSchedule(schedule)
+        self._perf = perf_stats or PerfStats(None)

        session = requests.Session()
        session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -165,51 +174,8 @@ class KavitaVolumeCoverUpdater:
        self._works_resolver = MangaBakaWorksResolver(
            api_base_url=api_base_url,
            request_timeout=request_timeout, session=session)
-
-        self._stop = threading.Event()
-        self._thread: "threading.Thread | None" = None
-
-    # ------------------------------------------------------------------
-    # Cron API (mirrors SuwayomiFolderWatcher)
-    # ------------------------------------------------------------------
-    def start(self) -> None:
-        """Starts the periodic scan thread.  Non-blocking."""
-        if self._thread is not None and self._thread.is_alive():
-            return
-        self._stop.clear()
-        self._thread = threading.Thread(
-            target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
-        self._thread.start()
-        print(f"[{_now()}] [updater] scanning {self._dst} "
-              f"on cron '{self._cron.expression}'", flush=True)
-
-    def stop(self) -> None:
-        """Stops the scan thread (current scan finishes its series first)."""
-        self._stop.set()
-        if self._thread is not None:
-            self._thread.join(timeout=10)
-
-    def wait(self) -> None:
-        """Blocks the calling thread until stop() is invoked."""
-        self._stop.wait()
-
-    def _loop(self) -> None:
-        while not self._stop.is_set():
-            next_run = self._cron.next_after(datetime.now())
-            wait = max(0.0, (next_run - datetime.now()).total_seconds())
-            print(f"[{_now()}] [updater] next scheduled scan: "
-                  f"{next_run.isoformat(timespec='minutes')}", flush=True)
-            if self._stop.wait(wait):
-                break
-
-            try:
-                summary = self.update_all()
-                print(f"[{_now()}] [updater] scan done: "
-                      f"{summary['series_updated']} series / "
-                      f"{summary['chapters_updated']} chapters updated",
-                      flush=True)
-            except Exception as exc:
-                print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
+        self._cover_cache = CoverCache(
+            cover_cache_dir, session=session, request_timeout=request_timeout)

    # ------------------------------------------------------------------
    # Public scan API
@@ -225,23 +191,31 @@ class KavitaVolumeCoverUpdater:
            print(f"[updater] kavita path missing: {self._dst}", flush=True)
            return summary

-        for series_dir in sorted(self._dst.iterdir()):
-            if self._stop.is_set():
-                break
-            if not series_dir.is_dir():
-                continue
-            summary["series_scanned"] += 1
-            try:
-                updated = self.update_series(series_dir)
-            except Exception as exc:
-                print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
-                continue
-            if updated:
-                summary["series_updated"] += 1
-                summary["chapters_updated"] += updated
+        # The whole point of a scan is detecting volume assignments added
+        # since the previous run — start from fresh API data, not the
+        # process-lifetime resolver caches.
+        self._vol_resolver.clear_cache()
+        self._works_resolver.clear_cache()
+
+        run = self._perf.begin_run()
+        try:
+            for series_dir in sorted(self._dst.iterdir()):
+                if not series_dir.is_dir():
+                    continue
+                summary["series_scanned"] += 1
+                try:
+                    updated = self.update_series(series_dir, run)
+                except Exception as exc:
+                    print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
+                    continue
+                if updated:
+                    summary["series_updated"] += 1
+                    summary["chapters_updated"] += updated
+        finally:
+            run.finish()
        return summary

-    def update_series(self, series_dir: Path) -> int:
+    def update_series(self, series_dir: Path, run=None) -> int:
        """
        Updates one series folder.  Returns the number of updated chapters.

@@ -277,24 +251,29 @@ class KavitaVolumeCoverUpdater:
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
+            cover_cache=self._cover_cache,
        )
        md = builder.fetch_metadata()
        series_id = str(md.get("id") or "")

+        series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
+
        # Resolve volumes for all null-volume chapters first (API only).
        updates: dict[str, dict] = {}   # num -> {"volume": str, "cover": tuple|None}
-        for num in sorted(missing, key=_chapter_sort_value):
-            builder.chapter = num
-            try:
-                volume = builder._determine_volume()
-            except Exception:
-                volume = None
-            if not volume:
-                continue
-            updates[num] = {"volume": volume,
-                            "cover": self._fetch_cover(series_id, volume)}
+        with series_rec.measure("resolve_volumes"):
+            for num in sorted(missing, key=_chapter_sort_value):
+                builder.chapter = num
+                try:
+                    volume = builder._determine_volume()
+                except Exception:
+                    volume = None
+                if not volume:
+                    continue
+                updates[num] = {"volume": volume,
+                                "cover": self._fetch_cover(series_id, volume)}

        if not updates:
+            series_rec.finish(ok=True)
            return 0

        first = min(chapters, key=_chapter_sort_value)
@@ -309,10 +288,13 @@ class KavitaVolumeCoverUpdater:
                continue
            # The first chapter gets a full metadata rebuild (Kavita reads
            # series metadata from it); other chapters only a volume edit.
-            ok, cover_swapped = self._apply_update(
-                cbz, builder, num,
-                volume=up["volume"], cover=up["cover"],
-                full_rebuild=(num == first))
+            chap_rec = series_rec.begin_item(num)
+            with chap_rec.measure("archive_rewrite"):
+                ok, cover_swapped = self._apply_update(
+                    cbz, builder, num,
+                    volume=up["volume"], cover=up["cover"],
+                    full_rebuild=(num == first))
+            chap_rec.finish(ok=ok)
            if not ok:
                continue
            entry["volume"] = _normalise_volume_value(up["volume"])
@@ -327,15 +309,19 @@ class KavitaVolumeCoverUpdater:
            first_entry = chapters.get(first) or {}
            cbz = series_dir / (first_entry.get("archiveName") or "")
            if first_entry.get("archiveName") and cbz.is_file():
-                ok, _ = self._apply_update(
-                    cbz, builder, first,
-                    volume=None, cover=None, full_rebuild=True)
+                chap_rec = series_rec.begin_item(f"{first} (refresh)")
+                with chap_rec.measure("archive_rewrite"):
+                    ok, _ = self._apply_update(
+                        cbz, builder, first,
+                        volume=None, cover=None, full_rebuild=True)
+                chap_rec.finish(ok=ok)
                if ok:
                    self._log(f"{series_dir.name} | chapter {first} | "
                              f"first-chapter metadata refreshed | {cbz.name}")

        if updated:
            _save_chapter_index(series_dir, index)
+        series_rec.finish(ok=True)
        return updated

    # ------------------------------------------------------------------
@@ -367,7 +353,8 @@ class KavitaVolumeCoverUpdater:
    # ------------------------------------------------------------------
    def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
        """
-        Downloads the MangaBaka volume cover.
+        Fetches the MangaBaka volume cover via the CoverCache (one download
+        per unique URL, even across chapters sharing a volume).
        Returns ("000<ext>", bytes) or None when no cover is available.
        """
        try:
@@ -376,13 +363,11 @@ class KavitaVolumeCoverUpdater:
            url = None
        if not url:
            return None
-        try:
-            resp = self._session.get(url, timeout=self._timeout)
-            resp.raise_for_status()
-        except requests.RequestException:
+        fetched = self._cover_cache.get(url)
+        if not fetched:
            return None
-        ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
-        return (f"000{ext}", resp.content)
+        data, ext = fetched
+        return (f"000{ext}", data)

    # ------------------------------------------------------------------
    # Archive update (single read + single write per archive)
@@ -527,10 +512,7 @@ if __name__ == "__main__":
        matches_cache=MatchesCache(MATCHES_PATH),
    )

-    # One-shot scan (no cron thread):
+    # One-shot scan.  Scheduling is handled externally via CronRunner
+    # (see main_manga.py).
    summary = updater.update_all()
    print(f"\n[updater] {summary}")
-
-    # Or run on the cron schedule (default: 19:00 every Mon + Thu):
-    # updater.start()
-    # updater.wait()
@@ -43,7 +43,6 @@ Dependencies
 from __future__ import annotations

 import difflib
-import re

 import requests

@@ -94,6 +93,9 @@ class MangaDexVolumeResolver:
        self._cache: dict[str, dict] = {}
        # Cache: manga_id -> {relation_type: [title, ...]}
        self._relations_cache: dict[str, dict] = {}
+        # Cache: title_lower -> manga_id (or None) — avoids repeating the
+        # MangaDex search for every chapter of the same series.
+        self._id_cache: dict[str, "str | None"] = {}

    # ----------------------------------------------------------------------
    # Locate the manga ID
@@ -106,15 +108,25 @@ class MangaDexVolumeResolver:
        if not title or not title.strip():
            return None

-        resp = self._session.get(
-            f"{self.base_url}/manga",
-            params={"title": title, "limit": 5,
-                    "contentRating[]": ["safe", "suggestive",
-                                        "erotica", "pornographic"]},
-            timeout=self.request_timeout)
-        resp.raise_for_status()
-        results = resp.json().get("data") or []
+        key = title.strip().lower()
+        if key in self._id_cache:
+            return self._id_cache[key]
+
+        try:
+            resp = self._session.get(
+                f"{self.base_url}/manga",
+                params={"title": title, "limit": 5,
+                        "contentRating[]": ["safe", "suggestive",
+                                            "erotica", "pornographic"]},
+                timeout=self.request_timeout)
+            resp.raise_for_status()
+            results = resp.json().get("data") or []
+        except requests.RequestException:
+            # Don't cache transient failures — allow a retry next time.
+            return None
+
        if not results:
+            self._id_cache[key] = None
            return None

        def score(entry) -> float:
@@ -131,7 +143,9 @@ class MangaDexVolumeResolver:
            return best

        results.sort(key=score, reverse=True)
-        return results[0].get("id")
+        manga_id = results[0].get("id")
+        self._id_cache[key] = manga_id
+        return manga_id

    # ----------------------------------------------------------------------
    # Main function: retrieve and return volume / chapter data
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response

 from MatchesCache import MatchesCache
 from ComicInfoBuilder import _pick_thumbnail_url
+from PerfWebPage import render_perf_page
+
+# Cross-link tabs shown on every perf page in the manga container.
+_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]


 _INDEX_HTML = """<!doctype html>
@@ -71,6 +75,8 @@ _INDEX_HTML = """<!doctype html>
  <button id="batchSave" class="primary">Save dirty (0)</button>
  <button id="build">Build all (rescan)</button>
  <button id="move">Start move</button>
+  <button id="syncPersons">Sync persons</button>
+  <a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
  <span class="status" id="status"></span>
 </div>

@@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
    btn.disabled = false;
  }
 });
+document.getElementById("syncPersons").addEventListener("click", async () => {
+  if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
+  const btn = document.getElementById("syncPersons");
+  btn.disabled = true;
+  setStatus("Syncing persons… (running on the server)");
+  try {
+    const r = await fetch("/api/persons/sync", { method: "POST" });
+    if (!r.ok) throw new Error(await r.text());
+    const d = await r.json();
+    setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
+      + d.not_found + " not found, " + d.conflicts + " conflicts");
+  } catch (err) {
+    setStatus("Person sync failed: " + err.message);
+  } finally {
+    btn.disabled = false;
+  }
+});
 for (const th of document.querySelectorAll("th.sortable")) {
  th.addEventListener("click", () => {
    const col = th.dataset.col;
@@ -357,6 +380,8 @@ load();
 """


+
+
 class MatchesWebApp:
    """
    Flask app exposing the MatchesCache. `mover` is required when you want
@@ -367,14 +392,22 @@ class MatchesWebApp:

    def __init__(self, cache: MatchesCache, *,
                 mover=None,
+                 person_updater=None,
+                 person_trigger: str = "web",
+                 perf_stats=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._mover = mover
+        self._person_updater = person_updater
+        self._person_trigger = person_trigger
+        # perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
+        self._perf = perf_stats or {}
        self._host = host
        self._port = port
        self._build_lock = threading.Lock()
        self._move_lock  = threading.Lock()
+        self._person_lock = threading.Lock()
        self._app = Flask(__name__)
        self._thread: "threading.Thread | None" = None
        self._register_routes()
@@ -498,3 +531,31 @@ class MatchesWebApp:
            finally:
                self._move_lock.release()
            return jsonify({"results": results})
+
+        @app.post("/api/persons/sync")
+        def api_persons_sync():
+            if self._person_updater is None:
+                return Response("no person updater configured", status=503)
+            if not self._person_lock.acquire(blocking=False):
+                return Response("person sync already running", status=409)
+            try:
+                report = self._person_updater.update_all_persons(
+                    trigger=self._person_trigger,
+                    perf=self._perf.get("person"))
+            except Exception as exc:
+                return Response(f"person sync failed: {exc}", status=500)
+            finally:
+                self._person_lock.release()
+            return jsonify(report)
+
+        # Perf pages: /perf (move) + /perf/<name> for the updaters.
+        @app.get("/perf")
+        @app.get("/perf/<name>")
+        def perf_page(name: str = "move") -> Response:
+            return Response(render_perf_page(name, _PERF_TABS),
+                            mimetype="text/html; charset=utf-8")
+
+        @app.get("/api/perf/<name>")
+        def api_perf(name: str):
+            stats = self._perf.get(name)
+            return jsonify(stats.all() if stats is not None else {"runs": []})
@@ -29,7 +29,6 @@ from __future__ import annotations

 import queue
 import threading
-import time
 from datetime import datetime
 from pathlib import Path

@@ -46,23 +46,30 @@ from __future__ import annotations
 import json
 import re
 import shutil
+import sys
 import xml.etree.ElementTree as ET
 import zipfile
 from pathlib import Path

 import requests

-from ComicInfoBuilder import (ComicInfoBuilder, _pick_cover_url, _pick_thumbnail_url, _SEARCH_TYPES)
+# Shared modules live one level up (src/); needed when a module in this
+# folder is run directly as a script (the entry points set the path).
+if __name__ == "__main__":
+    sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
+                              _SEARCH_TYPES, _natural_key)
 from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
-from KavitaPersonUpdater import KavitaPersonUpdater
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
+from CoverCache import CoverCache, _IMAGE_EXTS
+from PerfStats import PerfStats


-_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
 _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')

 # JSON file written into each Kavita series folder, listing every chapter
@@ -133,11 +140,6 @@ _SOURCE_LABEL_RE = re.compile(
 _WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')


-def _natural_key(name: str) -> list:
-    return [int(p) if p.isdigit() else p.lower()
-            for p in re.split(r"(\d+)", name)]
-
-
 def _sanitize_dirname(name: str) -> str:
    """
    Makes a string safe to use as a Windows (or SMB) directory name.
@@ -192,34 +194,6 @@ def _clean_suwayomi_title(title: str) -> str:
    return _SOURCE_LABEL_RE.sub("", title).strip()


-def _mal_id_from_metadata(md: dict) -> "int | None":
-    """Extracts the MAL ID from a MangaBaka series dict's source map."""
-    for raw_key, info in (md.get("source") or {}).items():
-        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
-            if isinstance(info, dict):
-                mal_id = info.get("id")
-                if mal_id is not None:
-                    try:
-                        return int(mal_id)
-                    except (TypeError, ValueError):
-                        pass
-    return None
-
-
-def _al_id_from_metadata(md: dict) -> "int | None":
-    """Extracts the AniList ID from a MangaBaka series dict's source map."""
-    for raw_key, info in (md.get("source") or {}).items():
-        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
-            if isinstance(info, dict):
-                al_id = info.get("id")
-                if al_id is not None:
-                    try:
-                        return int(al_id)
-                    except (TypeError, ValueError):
-                        pass
-    return None
-
-
 def _chapter_image_size(chapter_dir: Path) -> int:
    """Returns the total file size of all images in a chapter folder."""
    return sum(
@@ -330,25 +304,30 @@ class SuwayomiMover:
                      Expected layout: <root>/<Source>/<Title>/<Chapter N>/
    kavita_path     : Root of the Kavita library.
                      Series sub-directories are created automatically.
-    kavita_base_url : Kavita server URL — required only for person sync,
-                      e.g. "http://192.168.2.2:5000".
-    kavita_api_key  : Kavita API key   — required only for person sync.
    language        : ComicInfo LanguageISO and SeriesSort language ("en").
    request_timeout : HTTP timeout in seconds for all API / image requests.
    delete_source   : Remove the source chapter folder after successful pack.
+    cover_cache_dir : Directory for the persistent cover cache.  None ->
+                      temporary cache, deleted at process exit.
+    perf_stats      : Optional PerfStats instance for per-step timing.  None
+                      (default) disables profiling.
+
+    Note: Kavita person sync is no longer done here — it runs as a separate,
+    global, id-based updater on its own cron schedule (KavitaPersonUpdater).
+    The mover only touches MangaBaka / MangaDex / MAL / AniList.
    """

    def __init__(self,
                 suwayomi_path,
                 kavita_path,
                 *,
-                 kavita_base_url: "str | None" = None,
-                 kavita_api_key: "str | None" = None,
                 language: str = "en",
                 request_timeout: int = 30,
                 delete_source: bool = True,
                 matches_cache: "MatchesCache | None" = None,
-                 api_base_url: str = "https://api.mangabaka.dev/v1"):
+                 api_base_url: str = "https://api.mangabaka.dev/v1",
+                 cover_cache_dir=None,
+                 perf_stats: "PerfStats | None" = None):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
@@ -356,6 +335,7 @@ class SuwayomiMover:
        self._delete_source = delete_source
        self._matches_cache = matches_cache
        self._api_base_url = api_base_url.rstrip("/")
+        self._perf = perf_stats or PerfStats(None)

        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
@@ -371,14 +351,8 @@ class SuwayomiMover:
            request_timeout=request_timeout, session=session)
        self._works_resolver = MangaBakaWorksResolver(
            request_timeout=request_timeout, session=session)
-
-        self._person_updater: "KavitaPersonUpdater | None" = None
-        if kavita_base_url and kavita_api_key:
-            self._person_updater = KavitaPersonUpdater(
-                kavita_base_url, kavita_api_key,
-                mal_resolver=self._mal,
-                al_resolver=self._al,
-                request_timeout=request_timeout)
+        self._cover_cache = CoverCache(
+            cover_cache_dir, session=session, request_timeout=request_timeout)

    # ------------------------------------------------------------------
    # Public API
@@ -394,15 +368,19 @@ class SuwayomiMover:
        dict from _process_series_dir.
        """
        results: dict = {}
-        for source_dir in sorted(self._src.iterdir()):
-            if not source_dir.is_dir():
-                continue
-            for manga_dir in sorted(source_dir.iterdir()):
-                if not manga_dir.is_dir():
+        run = self._perf.begin_run()
+        try:
+            for source_dir in sorted(self._src.iterdir()):
+                if not source_dir.is_dir():
                    continue
-                title = manga_dir.name
-                print(f"[SuwayomiMover] {title}")
-                results[title] = self._process_series_dir(manga_dir)
+                for manga_dir in sorted(source_dir.iterdir()):
+                    if not manga_dir.is_dir():
+                        continue
+                    title = manga_dir.name
+                    print(f"[SuwayomiMover] {title}")
+                    results[title] = self._process_series_dir(manga_dir, run)
+        finally:
+            run.finish()
        return results

    def process_series(self, manga_title: str) -> dict:
@@ -418,7 +396,11 @@ class SuwayomiMover:
                continue
            candidate = source_dir / manga_title
            if candidate.is_dir():
-                return self._process_series_dir(candidate)
+                run = self._perf.begin_run()
+                try:
+                    return self._process_series_dir(candidate, run)
+                finally:
+                    run.finish()
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")

@@ -505,8 +487,9 @@ class SuwayomiMover:
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
-    def _process_series_dir(self, manga_dir: Path) -> dict:
+    def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
        manga_title = manga_dir.name
+        series_rec = (run or self._perf.begin_run()).begin_item(manga_title)

        chapter_dirs = sorted(
            (d for d in manga_dir.iterdir() if d.is_dir()),
@@ -550,13 +533,15 @@ class SuwayomiMover:
            mal_resolver=self._mal,
            al_resolver=self._al,
            matches_cache=self._matches_cache,
+            cover_cache=self._cover_cache,
        )

        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.
        md: "dict | None" = None
        mangabaka_title = manga_title
        try:
-            md = builder.fetch_metadata()
+            with series_rec.measure("fetch_metadata"):
+                md = builder.fetch_metadata()
            mangabaka_title = md.get("title") or manga_title
        except Exception as exc:
            print(f"  [warn] metadata fetch failed: {exc}")
@@ -588,7 +573,7 @@ class SuwayomiMover:
        chapter_results: list[dict] = []
        for chapter_dir, _fields, chapter_num in pending:
            result = self._process_chapter(
-                builder, chapter_num, chapter_dir, dest_series)
+                builder, chapter_num, chapter_dir, dest_series, series_rec)
            chapter_results.append(result)
            status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
            print(f"  Chapter {chapter_num}: {status}")
@@ -599,25 +584,11 @@ class SuwayomiMover:
                }
                _save_chapter_index(dest_series, chapter_index)

-        # Sync Kavita persons once per series.
-        # Both MAL and AniList IDs come from MangaBaka's source map;
-        # AniList is used as fallback when MAL returns no characters/staff.
-        person_result: "dict | None" = None
-        if self._person_updater:
-            mal_id = (_mal_id_from_metadata(md) if md else None
-                      or self._mal.find_mal_id(builder_title))
-            al_id  = _al_id_from_metadata(md) if md else None
-            if mal_id or al_id:
-                try:
-                    person_result = self._person_updater.update_for_manga(
-                        mal_id, al_manga_id=al_id)
-                    print(f"  Persons: chars={person_result['characters'].get('updated')} "
-                          f"staff={person_result['staff'].get('updated')}")
-                except Exception as exc:
-                    person_result = {"error": str(exc)}
-                    print(f"  Persons: ERROR {exc}")
-
-        return {"chapters": chapter_results, "persons": person_result}
+        # Person sync no longer runs here — it has its own global,
+        # id-based updater on a separate cron schedule (see
+        # KavitaPersonUpdater.update_all_persons).
+        series_rec.finish()
+        return {"chapters": chapter_results}

    # ------------------------------------------------------------------
    # Internal: chapter
@@ -626,7 +597,8 @@ class SuwayomiMover:
                         builder: ComicInfoBuilder,
                         chapter_num: str,
                         chapter_dir: Path,
-                         dest_series: Path) -> dict:
+                         dest_series: Path,
+                         series_rec=None) -> dict:
        """
        Generates ComicInfo.xml for one chapter, packs it to CBZ, and
        optionally removes the source folder.
@@ -636,6 +608,11 @@ class SuwayomiMover:
        <Pages> element correctly points to the front cover).
        """
        cbz_path = dest_series / f"{chapter_dir.name}.cbz"
+        chap_rec = (series_rec or self._perf.begin_run().begin_item("")
+                    ).begin_item(chapter_num)
+        # add_pages_from_folder records its own sub-steps on this recorder.
+        builder.perf = chap_rec
+        ok = False
        try:
            builder.chapter = chapter_num
            builder.add_pages_from_folder(chapter_dir, cover_filename="000")
@@ -643,18 +620,26 @@ class SuwayomiMover:
            # by add_pages_from_folder, so it's effectively free.  Used by
            # the chapter index in the Kavita destination folder.
            try:
-                volume = builder._determine_volume()
+                with chap_rec.measure("volume"):
+                    volume = builder._determine_volume()
            except Exception:
                volume = None
-            builder.save_xml(chapter_dir)
-            _pack_to_cbz(chapter_dir, cbz_path)
+            with chap_rec.measure("save_xml"):
+                builder.save_xml(chapter_dir)
+            with chap_rec.measure("pack_cbz"):
+                _pack_to_cbz(chapter_dir, cbz_path)
            if self._delete_source:
-                shutil.rmtree(chapter_dir)
+                with chap_rec.measure("delete_source"):
+                    shutil.rmtree(chapter_dir)
+            ok = True
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": True, "volume": volume}
        except Exception as exc:
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": False, "error": str(exc)}
+        finally:
+            builder.perf = None
+            chap_rec.finish(ok=ok)


 # --------------------------------------------------------------------------
@@ -664,8 +649,6 @@ if __name__ == "__main__":
    # Local (no-Docker) smoke test.  Adjust paths to your environment.
    SUWAYOMI_PATH = r"M:\config\downloads\mangas"
    KAVITA_PATH   = r"\\192.168.2.2\root\ServerData\Kavita\test"
-    KAVITA_URL    = "http://192.168.2.2:5000"
-    KAVITA_KEY    = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"

    # matches.json lives next to this script during local testing.
    MATCHES_PATH  = Path(__file__).resolve().parent.parent / "matches.json"
@@ -674,8 +657,6 @@ if __name__ == "__main__":
    mover = SuwayomiMover(
        SUWAYOMI_PATH,
        KAVITA_PATH,
-        kavita_base_url=KAVITA_URL,
-        kavita_api_key=KAVITA_KEY,
        delete_source=False,
        matches_cache=matches_cache,
    )
Author	SHA1	Message	Date
johannesbot	b7bec295f2	Merge pull request 'Performance and Person Updater Improvements' (#7 ) from time-measurement into master Build and Deploy / build (push) Successful in 36s Details Build and Deploy / deploy (push) Successful in 37s Details Build Release / build (push) Successful in 24s Details Reviewed-on: #7	2026-06-16 18:46:55 +02:00
johannesbot	6ca1a245a3	Person Updater overhaul	2026-06-16 18:46:17 +02:00
johannesbot	a59cff3951	Performance Improvements	2026-06-16 11:37:47 +02:00
johannesbot	b6d7f2d0af	time measurement	2026-06-15 11:23:37 +02:00
johannesbot	b0692a6527	time measurement	2026-06-15 11:23:20 +02:00
johannesbot	216771f709	merged ln metadata into manga mover Build and Deploy / build (push) Successful in 59s Details Build and Deploy / deploy (push) Successful in 24s Details	2026-06-14 10:47:47 +02:00
johannesbot	8a44b85a48	cleanup Build and Deploy / build (push) Successful in 23s Details Build and Deploy / deploy (push) Successful in 41s Details Build Release / build (push) Successful in 16s Details	2026-06-11 21:31:20 +02:00