Merge pull request 'Performance and Person Updater Improvements' (#7) from time-measurement into master

Reviewed-on: #7
2026-06-16 18:46:55 +02:00
parent 216771f709 6ca1a245a3
commit b7bec295f2
18 changed files with 1142 additions and 559 deletions
@@ -11,12 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config
 MANGA_WEB_PORT=8080
 SETTLE_SECONDS=600
 DELETE_SOURCE=true
 # Periodic updaters (volume/cover + global person sync) run together on
 # this cron. Sundays 10:00. Person updater also covers LN libraries.
 UPDATER_ENABLED=true
-UPDATER_SCHEDULE=0 19 * * 1,4
+UPDATER_SCHEDULE=0 10 * * 0
 COVER_CACHE_PATH=/config/covers
 PERF_PATH=/config/perf_stats.json
 VOLUME_PERF_PATH=/config/volume_perf_stats.json
 PERSON_PERF_PATH=/config/person_perf_stats.json
 # Light-novel container (kavita-lightnovel-metadata-fetcher)
 HOST_LN_CONFIG_PATH=/path/to/ln-config
 LN_WEB_PORT=8081
 LN_LIBRARY_IDS=3,5
 LN_UPDATER_ENABLED=true
@@ -13,15 +13,18 @@ services:
      SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
      DELETE_SOURCE:  "${DELETE_SOURCE:-true}"
      MATCH_PATH:     "/config/matches.json"
-      # Volume/cover back-fill updater
+      # Periodic updaters (volume/cover back-fill + global person sync) run
      # together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
      UPDATER_ENABLED:  "${UPDATER_ENABLED:-true}"
-      # Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
+      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
      # (local time, see TZ)
      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
      UPDATER_LOG:      "/config/volume_updater.log"
      # Persistent cover cache (empty = temp dir, deleted on container stop)
      COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
-      # Timezone for the cron schedule — without this 19:00 means 19:00 UTC
+      # Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
      PERF_PATH:        "${PERF_PATH:-/config/perf_stats.json}"
      VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
      PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
      # Timezone for the cron schedule — without this 10:00 means 10:00 UTC
      TZ:               "${TZ:-Europe/Berlin}"
    ports:
      - "${MANGA_WEB_PORT:-8080}:8080"
@@ -43,6 +46,10 @@ services:
      LIBRARY_IDS:    "${LN_LIBRARY_IDS}"
      LANGUAGE:       "${LANGUAGE:-en}"
      MATCH_PATH:     "/config/matches.json"
      # Global person sync on cron (same default cadence as the manga side)
      UPDATER_ENABLED:  "${LN_UPDATER_ENABLED:-true}"
      UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
      PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
      TZ:             "${TZ:-Europe/Berlin}"
    ports:
      - "${LN_WEB_PORT:-8081}:8080"
@@ -27,6 +27,12 @@ Environment variables
    MATCH_PATH          default /config/matches.json
    WEB_PORT            default 8080
    WEB_HOST            default 0.0.0.0
    UPDATER_ENABLED     default true  (run the person updater on cron)
    UPDATER_SCHEDULE    cron expression for the person updater,
                        default "0 10 * * 0" = Sundays 10:00
                        (local time — set TZ inside the container!)
    PERSON_PERF_PATH    JSON file for person updater timing.
                        Default /config/person_perf_stats.json
 """
 from __future__ import annotations
@@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln"))
 from MatchesCache import MatchesCache                       # noqa: E402
 from LightNovelOrchestrator import LightNovelOrchestrator   # noqa: E402
 from MatchesWebApp import MatchesWebApp                     # noqa: E402
 from PerfStats import PerfStats                             # noqa: E402
 from CronRunner import CronRunner                           # noqa: E402
 def _env_bool(name: str, default: bool) -> bool:
    raw = os.environ.get(name)
    if raw is None:
        return default
    return raw.strip().lower() in ("1", "true", "yes", "y", "on")
 def _env_str(name: str, default: "str | None" = None,
@@ -98,6 +113,10 @@ def main() -> int:
    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
    web_port        = _env_int("WEB_PORT", 8080)
    library_ids     = _env_int_list("LIBRARY_IDS")
    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
    person_perf_path = _env_str("PERSON_PERF_PATH",
                                "/config/person_perf_stats.json") or None
    print(f"[main] kavita url    = {kavita_url}",     flush=True)
    print(f"[main] language      = {language}",       flush=True)
@@ -107,6 +126,7 @@ def main() -> int:
    print(f"[main] web           = {web_host}:{web_port}", flush=True)
    cache = MatchesCache(match_path)
    person_perf = PerfStats(person_perf_path)
    orchestrator = LightNovelOrchestrator(
        kavita_url=kavita_url,
        kavita_api_key=kavita_api_key,
@@ -118,9 +138,22 @@ def main() -> int:
    app = MatchesWebApp(
        cache, orchestrator=orchestrator,
        default_library_ids=library_ids,
        person_perf=person_perf,
        host=web_host, port=web_port,
    )
    app.start()
    if updater_enabled:
        try:
            CronRunner(
                updater_schedule,
                lambda: orchestrator.sync_persons(trigger="cron",
                                                  perf=person_perf),
                name="person-updater").start()
        except ValueError as exc:
            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
                  f"scheduled person sync DISABLED", flush=True)
    app.wait()
    return 0
@@ -28,13 +28,19 @@ Environment variables
    MATCH_PATH          default /config/matches.json
    WEB_PORT            default 8080  (Flask web UI for matches.json)
    WEB_HOST            default 0.0.0.0
-    UPDATER_ENABLED     default true  (volume/cover back-fill cron)
+    UPDATER_ENABLED     default true  (run volume/cover + person updaters on cron)
-    UPDATER_SCHEDULE    cron expression for the updater scans,
+    UPDATER_SCHEDULE    cron expression for the periodic updaters,
-                        default "0 19 * * 1,4" = 19:00 every Mon + Thu
+                        default "0 10 * * 0" = Sundays 10:00
                        (local time — set TZ inside the container!)
    UPDATER_LOG         default /config/volume_updater.log
    COVER_CACHE_PATH    directory for the persistent cover cache;
                        empty (default) = temporary cache, deleted on exit
    PERF_PATH           JSON file for per-step move timing stats.
                        Default /config/perf_stats.json (empty disables it)
    VOLUME_PERF_PATH    JSON file for volume/cover updater timing.
                        Default /config/volume_perf_stats.json
    PERSON_PERF_PATH    JSON file for person updater timing.
                        Default /config/person_perf_stats.json
 """
 from __future__ import annotations
@@ -42,7 +48,6 @@ from __future__ import annotations
 import os
 import sys
 from pathlib import Path
 try:
    from dotenv import load_dotenv
    load_dotenv()
@@ -61,6 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher        # noqa: E402,F401
 from MatchesCache import MatchesCache                          # noqa: E402
 from MatchesWebApp import MatchesWebApp                        # noqa: E402
 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater  # noqa: E402
 from KavitaClient import KavitaClient                          # noqa: E402
 from KavitaPersonUpdater import KavitaPersonUpdater            # noqa: E402
 from PerfStats import PerfStats                                # noqa: E402
 from CronRunner import CronRunner                              # noqa: E402
 def _env_str(name: str, default: "str | None" = None,
@@ -104,9 +113,14 @@ def main() -> int:
    web_host        = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
    web_port        = _env_int("WEB_PORT", 8080)
    updater_enabled  = _env_bool("UPDATER_ENABLED", True)
-    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
+    updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
    updater_log      = _env_str("UPDATER_LOG", "/config/volume_updater.log")
    cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
    perf_path        = _env_str("PERF_PATH", "/config/perf_stats.json") or None
    volume_perf_path = _env_str("VOLUME_PERF_PATH",
                                "/config/volume_perf_stats.json") or None
    person_perf_path = _env_str("PERSON_PERF_PATH",
                                "/config/person_perf_stats.json") or None
    print(f"[main] suwayomi  = {suwayomi_path}",  flush=True)
    print(f"[main] kavita    = {kavita_path}",    flush=True)
@@ -118,40 +132,62 @@ def main() -> int:
    print(f"[main] web       = {web_host}:{web_port}", flush=True)
    matches_cache = MatchesCache(match_path)
    perf_move   = PerfStats(perf_path)
    perf_volume = PerfStats(volume_perf_path)
    perf_person = PerfStats(person_perf_path)
    mover = SuwayomiMover(
        suwayomi_path, kavita_path,
        kavita_base_url=kavita_url,
        kavita_api_key=kavita_api_key,
        language=language,
        request_timeout=request_timeout,
        delete_source=delete_source,
        matches_cache=matches_cache,
        cover_cache_dir=cover_cache_path,
        perf_stats=perf_move,
    )
    # Standalone, global, id-based person updater (manga + LN libraries).
    person_updater = None
    if kavita_api_key:
        kavita_client = KavitaClient(kavita_url, kavita_api_key,
                                     request_timeout=request_timeout)
        person_updater = KavitaPersonUpdater(kavita_client)
    # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
-    web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
+    web_app = MatchesWebApp(
        matches_cache, mover=mover,
        person_updater=person_updater, person_trigger="web",
        perf_stats={"move": perf_move, "volume": perf_volume,
                    "person": perf_person},
        host=web_host, port=web_port)
    web_app.start()
    if updater_enabled:
        updater = KavitaVolumeCoverUpdater(
            kavita_path,
            matches_cache=matches_cache,
            language=language,
            request_timeout=request_timeout,
            log_path=updater_log,
            cover_cache_dir=cover_cache_path,
            perf_stats=perf_volume,
        )
        def _scheduled_job():
            updater.update_all()
            if person_updater is not None:
                person_updater.update_all_persons(trigger="cron",
                                                  perf=perf_person)
        try:
-            updater = KavitaVolumeCoverUpdater(
+            CronRunner(updater_schedule, _scheduled_job,
-                kavita_path,
+                       name="updaters").start()
                matches_cache=matches_cache,
                language=language,
                request_timeout=request_timeout,
                log_path=updater_log,
                schedule=updater_schedule,
                cover_cache_dir=cover_cache_path,
            )
            updater.start()
        except ValueError as exc:
            # Invalid cron expression — keep the service up, just without
-            # the updater, and make the config error obvious in the logs.
+            # the scheduled updaters, and surface the config error.
            print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
-                  f"volume/cover updater DISABLED", flush=True)
+                  f"scheduled updaters DISABLED", flush=True)
    # watcher.start()
    # watcher.wait()   # blocks until stop() is called via a signal
@@ -0,0 +1,87 @@
 """
 cron_runner.py
 ==============
 Runs a single callable on a cron schedule on a background thread.
 Decouples *what* runs from *when*: both the manga container (volume/cover
 updater + person updater) and the LN container (person updater) schedule
 their work through this one helper, using a shared ``CronSchedule`` for the
 ``next_after`` arithmetic.
 Usage::
    runner = CronRunner("0 10 * * 0", job=my_callable)   # Sundays 10:00
    runner.start()
    ...
    runner.stop()
 When the schedule string is invalid, the CronSchedule constructor raises
 ValueError — the caller decides whether to disable the runner or fall back.
 The schedule is evaluated in local time (set TZ inside the container).
 """
 from __future__ import annotations
 import threading
 from datetime import datetime
 from CronSchedule import CronSchedule
 def _now() -> str:
    return datetime.now().isoformat(timespec="seconds")
 class CronRunner:
    """
    Fires ``job()`` whenever the cron ``schedule`` elapses.
    Parameters
    ----------
    schedule : 5-field cron expression (see CronSchedule).
    job      : Zero-arg callable invoked on each scheduled tick.  Exceptions
               are caught and logged so a failing run does not kill the loop.
    name     : Thread name (for logs).
    """
    def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
        self._cron = CronSchedule(schedule)
        self._job = job
        self._name = name
        self._stop = threading.Event()
        self._thread: "threading.Thread | None" = None
    def start(self) -> None:
        """Starts the scheduling thread.  Non-blocking."""
        if self._thread is not None and self._thread.is_alive():
            return
        self._stop.clear()
        self._thread = threading.Thread(
            target=self._loop, name=self._name, daemon=True)
        self._thread.start()
        print(f"[{_now()}] [{self._name}] scheduled on "
              f"cron '{self._cron.expression}'", flush=True)
    def stop(self) -> None:
        """Signals the loop to stop (a job already running finishes first)."""
        self._stop.set()
        if self._thread is not None:
            self._thread.join(timeout=10)
    def wait(self) -> None:
        """Blocks the calling thread until stop() is invoked."""
        self._stop.wait()
    def _loop(self) -> None:
        while not self._stop.is_set():
            next_run = self._cron.next_after(datetime.now())
            wait = max(0.0, (next_run - datetime.now()).total_seconds())
            print(f"[{_now()}] [{self._name}] next run: "
                  f"{next_run.isoformat(timespec='minutes')}", flush=True)
            if self._stop.wait(wait):
                break
            try:
                self._job()
            except Exception as exc:
                print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
@@ -204,6 +204,31 @@ class KavitaClient:
        r.raise_for_status()
        return r.json() or []
    def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
        """
        Returns every PersonDto in the instance.
        Pages through POST /api/Person/all (the browse endpoint) with an
        empty filter until an empty page is returned — same paging pattern
        as list_series_in_library.
        """
        results: list[dict] = []
        page = 1
        while True:
            r = self._session.post(
                f"{self._base}/api/Person/all",
                params={"PageNumber": page, "PageSize": page_size},
                json={}, timeout=self._timeout)
            r.raise_for_status()
            chunk = r.json() or []
            if not chunk:
                break
            results.extend(chunk)
            if len(chunk) < page_size:
                break
            page += 1
        return results
    def update_person(self, payload: dict) -> None:
        """Writes a person record (malId, aniListId, description, …)."""
        r = self._session.post(f"{self._base}/api/Person/update",
@@ -2,407 +2,220 @@
 kavita_person_updater.py
 ========================
-Synchronises Kavita person / character records with MyAnimeList data.
+Synchronises Kavita character person-records with MyAnimeList / AniList data.
-For every character and staff member that MAL knows about for a given manga
+Global, id-based mode
-the updater:
+---------------------
-  1. Searches Kavita for a matching Person record (by name similarity /
+Kavita person-records are created with a disambiguated name carrying the
-     alias match, configurable threshold).
+tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
-  2. Sets the MAL ID on the Kavita person if it is not yet linked.
+ComicInfo <Characters>; light novels: written by the metadata builder).
-  3. Uploads the MAL profile image when the cover is not locked and has
+``update_all_persons`` walks **every** person in the Kavita instance, reads
-     not been set in a previous sync run.
+that id from the name, looks the character up on MAL / AniList by id, and
-  4. Populates the description field when Kavita has none and MAL provides
+writes back:
-     an 'about' text (requires an extra Jikan request per character; only
+
-     performed when update_descriptions=True).
+  * the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
  * a description (when the record has none),
  * the profile image (when not locked and not already set).
 Persons whose name carries no id (authors / staff, which are not
 disambiguated) are skipped.  A record already linked to a *different*
 tracker id than its name says is reported as a conflict and left untouched.
 This mode is format-independent (it only does id lookups, never title
 searches) so a single pass covers both the manga and light-novel libraries.
 All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
-(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
+(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
 Tested against Kavita 0.9.0.2.
 """
 from __future__ import annotations
 import datetime
 import requests
 from KavitaClient import KavitaClient
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
-from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id
+from PerfStats import PerfStats
 from TextUtils import paragraphs_to_html, parse_person_tracker_id
 class KavitaPersonUpdater:
    """
-    Syncs Kavita Person records with MyAnimeList data.
+    Syncs Kavita character person-records with MAL / AniList data, keyed by
    the tracker id embedded in each person's name.
    Parameters
    ----------
-    client          : Shared KavitaClient (session, auth, cover uploads)
+    client       : Shared KavitaClient (session, auth, cover uploads).
-    mal_resolver    : Shared MALResolver singleton (created automatically if omitted)
+    mal_resolver : Shared MALResolver singleton (created if omitted).
-    al_resolver     : Shared AniListResolver singleton (created automatically if omitted)
+    al_resolver  : Shared AniListResolver singleton (created if omitted).
    min_name_score  : Minimum difflib similarity ratio (0–1) required to accept a
                      Kavita person as a match for a MAL name.  Default 0.80.
    """
    def __init__(self, client: KavitaClient, *,
                 mal_resolver: "MALResolver | None" = None,
-                 al_resolver: "AniListResolver | None" = None,
+                 al_resolver: "AniListResolver | None" = None):
                 min_name_score: float = 0.80):
        self._client = client
        self._min_score = min_name_score
        self._mal = mal_resolver or MALResolver()
        self._al  = al_resolver  or AniListResolver()
        # Cache: normalised name -> list of PersonDto dicts (best matches first)
        self._person_search_cache: dict[str, list[dict]] = {}
    # ------------------------------------------------------------------
-    # Public: combined update
+    # Public: global person sync
    # ------------------------------------------------------------------
-    def update_for_manga(self, mal_manga_id: "int | None", *,
+    def update_all_persons(self, *,
-                         al_manga_id: "int | None" = None,
+                           trigger: str = "cron",
-                         update_covers: bool = True,
+                           perf: "PerfStats | None" = None,
-                         update_descriptions: bool = True) -> dict:
+                           update_covers: bool = True,
                           update_descriptions: bool = True) -> dict:
        """
-        Runs a full update pass for both characters and staff of the manga.
+        Walks every Kavita person, syncing the ones whose name carries a
-        MAL is tried first; AniList is used as fallback when MAL returns nothing.
+        tracker character id.
-        Returns
+        Parameters
-        -------
+        ----------
-        {
+        trigger : Source that started this run ("cron" | "web" | "ln") —
-          "characters": {"updated": n, "skipped": n, "not_found": n},
+                  recorded in the perf-stats run meta.
-          "staff":      {"updated": n, "skipped": n, "not_found": n},
+        perf    : Optional PerfStats for per-person step timing.
-        }
+
        Returns {"trigger", "updated", "skipped", "not_found",
                 "conflicts", "errors"}.
        """
-        return {
+        perf = perf or PerfStats(None)
-            "characters": self.update_characters(
+        run = perf.begin_run(meta={"trigger": trigger})
-                mal_manga_id, al_manga_id=al_manga_id,
+        result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
-                update_covers=update_covers,
+                        "not_found": 0, "conflicts": 0, "errors": []}
                update_descriptions=update_descriptions),
            "staff": self.update_staff(
                mal_manga_id, al_manga_id=al_manga_id,
                update_covers=update_covers,
                update_descriptions=update_descriptions),
        }
-    # ------------------------------------------------------------------
+        try:
-    # Public: character update
+            persons = self._client.list_all_persons()
-    # ------------------------------------------------------------------
+        except requests.RequestException as exc:
-    def update_characters(self, mal_manga_id: "int | None", *,
+            result["errors"].append(f"list persons failed: {exc}")
-                          al_manga_id: "int | None" = None,
+            run.finish()
-                          update_covers: bool = True,
+            return result
                          update_descriptions: bool = True) -> dict:
        """
        Updates Kavita persons that match MAL/AniList characters for the manga.
        MAL is tried first; AniList is the fallback when MAL returns nothing.
-        Returns {"updated": n, "skipped": n, "not_found": n}.
+        for person in persons:
-        """
+            name = (person.get("name") or "").strip()
-        entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
+            parsed = parse_person_tracker_id(name)
-        resolver = self._mal
+            if not parsed:
-        if not entries and al_manga_id:
+                result["skipped"] += 1          # author/staff or un-tagged
            entries = self._al.get_characters_detailed(al_manga_id)
            resolver = self._al
        return self._sync_entries(entries, "character", resolver,
                                  update_covers=update_covers,
                                  update_descriptions=update_descriptions)
    # ------------------------------------------------------------------
    # Public: staff update
    # ------------------------------------------------------------------
    def update_staff(self, mal_manga_id: "int | None", *,
                     al_manga_id: "int | None" = None,
                     update_covers: bool = True,
                     update_descriptions: bool = True) -> dict:
        """
        Updates Kavita persons that match MAL/AniList staff for the manga.
        MAL is tried first; AniList is the fallback when MAL returns nothing.
        Returns {"updated": n, "skipped": n, "not_found": n}.
        """
        entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
        resolver = self._mal
        if not entries and al_manga_id:
            entries = self._al.get_staff_detailed(al_manga_id)
            resolver = self._al
        return self._sync_entries(entries, "staff", resolver,
                                  update_covers=update_covers,
                                  update_descriptions=update_descriptions)
    # ------------------------------------------------------------------
    # Public: cache management
    # ------------------------------------------------------------------
    def clear_cache(self) -> None:
        """Clears the Kavita person search cache."""
        self._person_search_cache.clear()
    # ------------------------------------------------------------------
    # Internal: main sync loop
    # ------------------------------------------------------------------
    def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
                      update_covers: bool,
                      update_descriptions: bool) -> dict:
        result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
                        "errors": []}
        for entry in entries:
            name = (entry.get("name") or "").strip()
            raw_name = (entry.get("raw_name") or "").strip()
            if not name and not raw_name:
                continue
-            if kind == "character":
+            source, tracker_id = parsed
-                # Characters are stored under their disambiguated name
+            item = run.begin_item(name)
-                # ("Rem (MAL 118737)") — see person_name_with_id.  The
+            ok = True
-                # series metadata write creates the person under exactly
+            try:
-                # this name, so only that form is searched.
+                category = self._apply_to_person(
-                search_names = [person_name_with_id(
+                    person, source, tracker_id, item,
-                    name, mal_id=entry.get("mal_id"),
+                    update_cover=update_covers,
-                    al_id=entry.get("al_id"))]
+                    update_desc=update_descriptions,
-            else:
+                    errors=result["errors"])
-                # Staff: cleaned (XML-safe) name first; if Kavita stores
+                result[category] += 1
-                # the legacy comma form, retry with the raw MAL name.
+                ok = category != "conflicts"
-                search_names = [name]
+            except Exception as exc:
-                if raw_name and raw_name != name:
+                result["errors"].append(f"{name}: {exc}")
-                    search_names.append(raw_name)
+                ok = False
-
+            finally:
-            matches: list[dict] = []
+                item.finish(ok=ok)
            for search_name in search_names:
                if not search_name:
                    continue
                matches = self._find_kavita_person(search_name)
                if matches:
                    break
            if not matches:
                result["not_found"] += 1
                continue
            changed = self._apply_mal_data(
                matches[0], entry, kind, resolver,
                update_cover=update_covers,
                update_desc=update_descriptions,
                errors=result["errors"])
            result["updated" if changed else "skipped"] += 1
        run.finish()
        print(f"[persons] trigger={trigger} updated={result['updated']} "
              f"skipped={result['skipped']} not_found={result['not_found']} "
              f"conflicts={result['conflicts']} errors={len(result['errors'])}",
              flush=True)
        return result
    # ------------------------------------------------------------------
-    # Internal: Kavita person search
+    # Internal: apply tracker data to one person
    # ------------------------------------------------------------------
-    def _find_kavita_person(self, name: str) -> list[dict]:
+    def _apply_to_person(self, person: dict, source: str, tracker_id: int,
                         item, *, update_cover: bool, update_desc: bool,
                         errors: list) -> str:
        """
-        Searches Kavita for persons matching `name`.
+        Applies MAL/AniList character data to one Kavita person.
-
+        Returns the result category: "updated" | "skipped" | "not_found"
-        Checks both the main name and any stored aliases.
+        | "conflicts".
        Returns persons sorted by similarity, filtered by min_name_score.
        Results are cached per (normalised) query name.
        """
-        key = name.lower().strip()
+        person_id = person.get("id")
        if key in self._person_search_cache:
            return self._person_search_cache[key]
        try:
            persons = self._client.search_persons(name)
        except requests.RequestException:
            self._person_search_cache[key] = []
            return []
        scored = []
        for p in persons:
            candidates = [p.get("name")] + list(p.get("aliases") or [])
            scored.append((best_similarity(key, candidates), p))
        scored.sort(key=lambda pair: pair[0], reverse=True)
        filtered = [p for score, p in scored if score >= self._min_score]
        self._person_search_cache[key] = filtered
        return filtered
    # ------------------------------------------------------------------
    # Internal: apply MAL data to a single Kavita person
    # ------------------------------------------------------------------
    def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
                        resolver, *,
                        update_cover: bool, update_desc: bool,
                        errors: "list | None" = None) -> bool:
        """
        Applies tracker data (MAL or AniList) to one Kavita person record.
        Fields updated
        --------------
        - malId      : set when the entry carries a MAL ID and it differs
        - aniListId  : set when the entry carries an AniList ID and it differs
        - description: set when empty and the tracker provides a description
        - cover image: uploaded when not locked and no prior sync cover exists
        Returns True if any change was made.  Failures are appended to the
        `errors` list (if provided) instead of being silently swallowed.
        """
        person_id: "int | None" = person.get("id")
        if not person_id:
-            return False
+            return "skipped"
-        person_name = person.get("name") or ""
+        resolver = self._mal if source == "mal" else self._al
        id_field = "malId" if source == "mal" else "aniListId"
        current = person.get(id_field) or 0
-        # Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
+        # The name is authoritative; a record linked to a different id is a
-        mal_id: "int | None" = mal_entry.get("mal_id")
+        # data conflict — never overwrite it.
-        al_id:  "int | None" = mal_entry.get("al_id")
+        if current and current != tracker_id:
-        entity_id = mal_id or al_id   # used for resolver detail calls
+            errors.append(
                f"conflict: '{person.get('name')}' (#{person_id}) has "
                f"{id_field}={current} but name says {tracker_id} — skipped")
            return "conflicts"
-        current_mal_id: int = person.get("malId") or 0
+        with item.measure("detail_fetch"):
-        current_al_id:  int = person.get("aniListId") or 0
+            details = resolver.get_character_details(tracker_id)
        if not details:
            return "not_found"
-        # Collision guard: the Kavita person is already linked to a
+        need_id = not current                    # write id when still missing
-        # *different* tracker entity — same display name, different
+        description = None
        # character/person.  Never overwrite; first writer wins.
        if ((mal_id and current_mal_id and current_mal_id != mal_id)
                or (al_id and current_al_id and current_al_id != al_id)):
            if errors is not None:
                errors.append(
                    f"conflict: '{person_name}' (#{person_id}) is linked to "
                    f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} "
                    f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} "
                    f"— skipped")
            return False
        needs_mal_id = bool(mal_id and current_mal_id != mal_id)
        needs_al_id  = bool(al_id  and current_al_id  != al_id)
        # ------ Lazy description fetch -----------------------------------
        description: "str | None" = None
        if update_desc and not (person.get("description") or "").strip():
-            if entity_id:
+            description = _build_character_description(details) or None
-                if kind == "character":
+        need_desc = bool(description)
                    details = resolver.get_character_details(entity_id)
                    if details:
                        description = _build_character_description(details) or None
                else:
                    details = resolver.get_person_details(entity_id)
                    if details:
                        description = _build_person_description(details) or None
        needs_desc = bool(description)
        # ------ Metadata update ------------------------------------------
        changed = False
-        if needs_mal_id or needs_al_id or needs_desc:
+        if need_id or need_desc:
-            payload: dict = {
+            payload = {
                "id":               person_id,
-                "name":             person_name,
+                "name":             person.get("name") or "",
-                # MUST stay a boolean — the cover image itself is uploaded
+                # MUST stay a boolean — the cover is uploaded separately.
                # separately via POST /api/Upload/person (below).  Putting a
                # URL here makes Kavita reject the whole payload with HTTP 400.
                "coverImageLocked": bool(person.get("coverImageLocked", False)),
                "aliases":          person.get("aliases") or [],
                "description":      description or person.get("description"),
-                "malId":    mal_id if needs_mal_id else (current_mal_id or None),
+                "malId":     tracker_id if source == "mal"
-                "aniListId": al_id if needs_al_id  else (current_al_id  or None),
+                             else (person.get("malId") or None),
                "aniListId": tracker_id if source == "al"
                             else (person.get("aniListId") or None),
            }
            try:
-                self._client.update_person(payload)
+                with item.measure("person_update"):
                    self._client.update_person(payload)
                changed = True
-            except requests.RequestException as e:
+            except requests.RequestException as exc:
-                if errors is not None:
+                errors.append(f"update failed #{person_id} "
-                    errors.append(
+                              f"'{person.get('name')}': {exc}")
                        f"Person/update failed for #{person_id} "
                        f"'{person_name}': {e}")
-        # ------ Cover image upload ----------------------------------------
+        # Cover: upload when not locked and not already set for this id.
        # Upload whenever:
        #   - caller requested cover updates
        #   - cover is NOT locked (user did not manually pin it)
        #   - we have not already uploaded this exact tracker entity's image
        #     (i.e. the tracked ID differs OR there is no cover yet).
        if update_cover and not person.get("coverImageLocked"):
-            image_url = mal_entry.get("image_url")
+            image_url = details.get("image_url")
-            already_uploaded = (
+            already = bool(current) and bool(person.get("coverImage"))
-                entity_id is not None
+            if image_url and not already:
                and (current_mal_id == mal_id or current_al_id == al_id)
                and bool(person.get("coverImage"))
            )
            if image_url and not already_uploaded:
                try:
-                    self._client.upload_person_cover(person_id, image_url)
+                    with item.measure("cover_upload"):
                        self._client.upload_person_cover(person_id, image_url)
                    changed = True
-                except requests.RequestException as e:
+                except requests.RequestException as exc:
-                    if errors is not None:
+                    errors.append(f"cover upload failed #{person_id} "
-                        errors.append(
+                                  f"'{person.get('name')}': {exc}")
                            f"cover upload failed for #{person_id} "
                            f"'{person_name}' ({image_url}): {e}")
-        return changed
+        return "updated" if changed else "skipped"
 # --------------------------------------------------------------------------
-# Module helpers: description builders
+# Module helper: character description builder
 # --------------------------------------------------------------------------
 def _format_birthday(birthday: str) -> str:
    """Converts an ISO 8601 birthday string to "D Month YYYY"."""
    if not birthday:
        return ""
    try:
        dt = datetime.date.fromisoformat(birthday.split("T")[0])
        return f"{dt.day} {dt.strftime('%B %Y')}"
    except (ValueError, AttributeError):
        return ""
 def _build_character_description(details: dict) -> str:
    """
-    Builds a Kavita-safe HTML description for a MAL character.
+    Builds a Kavita-safe HTML description for a MAL / AniList character.
-    Top line: "Favorites: N" as a link to the character's MAL page.
+    Top line: "Favorites: N" linked to the character page (when available).
    Remainder: the character's `about` text converted to HTML paragraphs.
    """
    parts: list[str] = []
    url = details.get("url") or ""
    favorites = details.get("favorites")
    if url and favorites is not None:
-        parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
+        parts.append(f'<p><a href="{url}" target="_blank">'
-    about = (details.get("about") or "").strip()
+                     f'Favorites: {favorites:,}</a></p>')
    if about:
        parts.append(paragraphs_to_html(about))
    return "<br>".join(parts)
 def _build_person_description(details: dict) -> str:
    """
    Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
    Renders a summary table (given name, family name, birthday, website,
    member favorites) followed by the `about` biography as HTML paragraphs.
    """
    _TD = 'style="padding-right:1.5em"'
    rows: list[str] = []
    given = (details.get("given_name") or "").strip()
    family = (details.get("family_name") or "").strip()
    birthday = details.get("birthday") or ""
    favorites = details.get("favorites")
    website = (details.get("website_url") or "").strip()
    url = (details.get("url") or "").strip()
    if given:
        rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
    if family:
        rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
    bday_str = _format_birthday(birthday)
    if bday_str:
        rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
    if website:
        rows.append(
            f'<tr><td {_TD}>Website</td>'
            f'<td><a href="{website}">{website}</a></td></tr>'
        )
    if favorites is not None:
        fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
                    else f"{favorites:,}")
        rows.append(
            f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
    parts: list[str] = []
    if rows:
        parts.append(f'<table>{"".join(rows)}</table>')
    about = (details.get("about") or "").strip()
    if about:
        parts.append(paragraphs_to_html(about))
@@ -418,18 +231,7 @@ if __name__ == "__main__":
    client = KavitaClient(os.environ["KAVITA_URL"],
                          os.environ["KAVITA_API_KEY"])
    updater = KavitaPersonUpdater(client)
-
+    report = updater.update_all_persons(trigger="cron")
-    mal = MALResolver()
+    print(report)
-    mal_id = mal.find_mal_id("よふかしのうた")
+    for err in report["errors"]:
-    print("MAL ID:", mal_id)
+        print("  ", err)
    if mal_id:
        result = updater.update_for_manga(mal_id)
        print("Characters:", {k: v for k, v in result["characters"].items()
                              if k != "errors"})
        print("Staff     :", {k: v for k, v in result["staff"].items()
                              if k != "errors"})
        # Surface any non-fatal upload / API errors for debugging
        for section in ("characters", "staff"):
            for err in result[section].get("errors", []):
                print(f"[{section}] {err}")
@@ -151,9 +151,10 @@ class MangaBakaWorksResolver:
        Returns volume-level works for a series, filtered to those that have
        a usable cover image.
-        Non-empty results are cached per series; empty results are not, so
+        Results are cached per series — including empty results, so a series
-        works added on MangaBaka later become visible without restarting
+        without works is not re-paginated for every chapter of a move run.
-        the (long-running) process.
+        The periodic cover updater calls clear_cache() before each scan, so
        works added on MangaBaka later are still picked up there.
        """
        if not series_id:
            return []
@@ -165,8 +166,7 @@ class MangaBakaWorksResolver:
        # Discard works that carry no usable cover
        works_with_cover = [w for w in all_works if w.get("images")]
-        if works_with_cover:
+        self._cache[series_id] = works_with_cover
            self._cache[series_id] = works_with_cover
        return works_with_cover
    def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
@@ -228,10 +228,10 @@ class MangaBakaWorksResolver:
            if url:
                result[norm] = url
-        # Empty results are not cached — covers added on MangaBaka later
+        # Cache even an empty result so a series without volume images is not
-        # become visible without restarting the long-running process.
+        # re-paginated for every chapter.  The periodic cover updater clears
-        if result:
+        # this cache before each scan, so newly added images are still found.
-            self._images_cache[series_id] = result
+        self._images_cache[series_id] = result
        return result
    def get_cover_for_volume_from_images(self, series_id: str,
@@ -0,0 +1,254 @@
 """
 perf_stats.py
 =============
 Generic run/step performance profiler with JSON persistence, shared by the
 move pipeline and the periodic updaters (volume/cover, persons).
 Each run is a tree of *items* (e.g. series -> chapter, or one person) and
 every item carries named *step* timings.  A run also carries free-form
 ``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
 updater).
 Data model (one entry per run, newest first)::
    {
      "runs": [
        {
          "runId":        "…",
          "startedAt":    1700000000,
          "finishedAt":   1700000123,
          "totalSeconds": 123.4,
          "meta":         {"trigger": "cron"},
          "itemCount":    2,              # top-level items
          "leafCount":    31,            # items without children
          "stepTotals":   {"cover": 41.2, "image_dimensions": 55.8, ...},
          "items": [
            {"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
             "steps": {"fetch_metadata": 1.2},
             "items": [
               {"label": "1", "totalSeconds": 11.5, "ok": true,
                "steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
             ]}
          ]
        }
      ]
    }
 Usage::
    perf = PerfStats(path)                       # path=None -> disabled
    run = perf.begin_run(meta={"trigger": "cron"})
    item = run.begin_item("Call of the Night")
    with item.measure("fetch_metadata"):
        ...
    chap = item.begin_item("1")
    with chap.measure("pack_cbz"):
        ...
    chap.finish()
    item.finish()                                # flushes the run to disk
    run.finish()
 When ``path`` is None every recorder is a no-op and nothing is written, so
 the profiler can be left permanently wired in at negligible cost.  The run
 is flushed after every top-level item finishes, so a long run is observable
 live and survives a crash mid-run.
 """
 from __future__ import annotations
 import json
 import threading
 import time
 import uuid
 from contextlib import contextmanager
 from pathlib import Path
 # Keep the JSON small: only the most recent runs are retained on disk.
 _MAX_RUNS = 30
 class _StepTimer:
    """
    Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
    wall-clock lifetime.  ``enabled=False`` turns every method into a no-op.
    """
    def __init__(self, enabled: bool = True):
        self.steps: dict[str, float] = {}
        self._enabled = enabled
        self._t0 = time.monotonic()
    @contextmanager
    def measure(self, name: str):
        """Context manager timing a named step (accumulates on repeat use)."""
        if not self._enabled:
            yield
            return
        start = time.monotonic()
        try:
            yield
        finally:
            self.steps[name] = round(
                self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
    def elapsed(self) -> float:
        return round(time.monotonic() - self._t0, 4)
 class ItemRecorder(_StepTimer):
    """
    One node in a run's item tree.  Has its own step timings and may contain
    nested child items (e.g. a series item containing chapter items).
    """
    def __init__(self, run: "RunRecorder", label: str, *,
                 parent: "ItemRecorder | None" = None,
                 enabled: bool = True):
        super().__init__(enabled)
        self._run = run
        self._label = label
        self._parent = parent
        self._children: list[dict] = []
    def begin_item(self, label: str) -> "ItemRecorder":
        return ItemRecorder(self._run, label, parent=self,
                            enabled=self._enabled)
    def finish(self, *, ok: bool = True) -> None:
        if not self._enabled:
            return
        node = {
            "label":        self._label,
            "totalSeconds": self.elapsed(),
            "ok":           ok,
            "steps":        self.steps,
            "items":        self._children,
        }
        if self._parent is not None:
            self._parent._children.append(node)
        else:
            # Top-level item: attach to the run and persist progress.
            self._run._items.append(node)
            self._run.flush()
 class RunRecorder:
    """Top-level recorder for one full run."""
    def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
                 enabled: bool = True):
        self._stats = stats
        self._enabled = enabled
        self._meta = meta or {}
        self._items: list[dict] = []
        self._started = time.time()
        self._t0 = time.monotonic()
        self._run_id = uuid.uuid4().hex
    def begin_item(self, label: str) -> ItemRecorder:
        return ItemRecorder(self, label, parent=None, enabled=self._enabled)
    def _snapshot(self) -> dict:
        step_totals: dict[str, float] = {}
        leaf_count = 0
        def walk(node: dict) -> None:
            nonlocal leaf_count
            for step, secs in node["steps"].items():
                step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
            if node["items"]:
                for child in node["items"]:
                    walk(child)
            else:
                leaf_count += 1
        for item in self._items:
            walk(item)
        return {
            "runId":        self._run_id,
            "startedAt":    round(self._started),
            "finishedAt":   round(time.time()),
            "totalSeconds": round(time.monotonic() - self._t0, 4),
            "meta":         self._meta,
            "itemCount":    len(self._items),
            "leafCount":    leaf_count,
            "stepTotals":   step_totals,
            "items":        self._items,
        }
    def flush(self) -> "dict | None":
        """Writes the run's current state to disk (upsert by runId)."""
        if not self._enabled:
            return None
        run = self._snapshot()
        self._stats._upsert_run(run)
        return run
    def finish(self) -> "dict | None":
        """Persists the final run state.  Returns the run dict."""
        return self.flush()
 class PerfStats:
    """
    Profiler facade + JSON persistence.
    Parameters
    ----------
    path : Destination JSON file.  None disables the profiler entirely
           (every recorder becomes a no-op and nothing is written).
    """
    def __init__(self, path=None):
        self._path = Path(path) if path else None
        self._lock = threading.Lock()
    @property
    def enabled(self) -> bool:
        return self._path is not None
    def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
        return RunRecorder(self, meta=meta, enabled=self.enabled)
    # ------------------------------------------------------------------
    # Read / write
    # ------------------------------------------------------------------
    def all(self) -> dict:
        """Returns the persisted runs ({"runs": [...]}); newest first."""
        if not self._path or not self._path.is_file():
            return {"runs": []}
        try:
            with self._path.open("r", encoding="utf-8") as f:
                data = json.load(f)
        except (OSError, json.JSONDecodeError):
            return {"runs": []}
        if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
            return {"runs": []}
        return data
    def _upsert_run(self, run: dict) -> None:
        """
        Inserts a new run (newest first) or replaces the existing entry with
        the same runId — so incremental flushes during a run update one entry
        rather than appending a duplicate after every item.
        """
        if not self._path:
            return
        with self._lock:
            runs = self.all()["runs"]
            run_id = run.get("runId")
            for i, existing in enumerate(runs):
                if existing.get("runId") == run_id:
                    runs[i] = run
                    break
            else:
                runs.insert(0, run)         # newest first
                del runs[_MAX_RUNS:]        # cap history
            self._path.parent.mkdir(parents=True, exist_ok=True)
            tmp = self._path.with_suffix(self._path.suffix + ".tmp")
            with tmp.open("w", encoding="utf-8") as f:
                json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
            tmp.replace(self._path)
@@ -0,0 +1,160 @@
 """
 perf_web_page.py
 ================
 Shared HTML page for browsing PerfStats output, used by both container web
 UIs.  ``render_perf_page(name, tabs)`` returns a standalone page that loads
 ``/api/perf/<name>`` and renders each run's step totals plus the nested item
 tree (series -> chapter, or one person, …) and the run trigger from meta.
 ``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
 available perf datasets in that container.
 """
 from __future__ import annotations
 _PERF_PAGE = """<!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>__PERF_NAME__ performance</title>
  <style>
    body  { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
    h1    { margin: 0 0 1rem; font-size: 1.4rem; }
    h2    { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
    a     { color:#60a5fa; text-decoration:none; }
    a:hover { text-decoration:underline; }
    .tabs { margin-bottom:1rem; }
    .tabs a { margin-right:1rem; }
    .tabs a.active { font-weight:bold; text-decoration:underline; }
    .bar  { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
    select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
    .summary { color:#9ca3af; margin:.3rem 0 1rem; }
    table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
    th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
    th { background:#1d1d1d; }
    td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
    .barcell { position:relative; }
    .barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
    .barcell span { position:relative; z-index:1; }
    details { margin:.2rem 0 .2rem 1rem; }
    summary { cursor:pointer; padding:.2rem 0; }
    .chip { color:#9ca3af; font-size:.85rem; }
    .err  { color:#f87171; }
  </style>
 </head>
 <body>
 <h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
 <div class="tabs">__PERF_TABS__</div>
 <div class="bar">
  <label>Run: <select id="runSelect"></select></label>
  <button id="reload">Reload</button>
  <span class="summary" id="summary"></span>
 </div>
 <div id="content"></div>
 <script>
 const PERF_NAME = "__PERF_NAME__";
 let runs = [];
 for (const a of document.querySelectorAll(".tabs a")) {
  if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
 }
 function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
 function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
 function esc(s) {
  return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
 }
 function stepTable(totals, grandTotal) {
  const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
  if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
  const max = entries[0][1] || 1;
  let rows = "";
  for (const [name, secs] of entries) {
    const pct = grandTotal ? (secs / grandTotal * 100) : 0;
    const w = (secs / max * 100);
    rows += "<tr><td>" + esc(name) + "</td>"
         + "<td class='num'>" + fmtSecs(secs) + "</td>"
         + "<td class='num'>" + pct.toFixed(1) + "%</td>"
         + "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
         + "<span>&nbsp;</span></td></tr>";
  }
  return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
       + "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
       + rows + "</tbody></table>";
 }
 // Renders one item node (and its children) as a nested <details> block.
 function itemNode(it) {
  const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
    .map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "—";
  const head = "<summary><b>" + esc(it.label) + "</b>"
    + (it.ok === false ? " <span class=err>(failed)</span>" : "")
    + " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
  const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
  const body = kids.map(itemNode).join("");
  return "<details>" + head + body + "</details>";
 }
 function renderRun(run) {
  const c = document.getElementById("content");
  if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
  const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
  document.getElementById("summary").textContent =
    fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
    + run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
  let html = "<h2>Steps (summed over all items)</h2>"
           + stepTable(run.stepTotals, run.totalSeconds)
           + "<h2>Detail</h2>";
  const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
  html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
  c.innerHTML = html;
 }
 function renderSelect() {
  const sel = document.getElementById("runSelect");
  sel.innerHTML = "";
  runs.forEach((r, i) => {
    const o = document.createElement("option");
    o.value = i;
    const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
    o.textContent = fmtTime(r.startedAt) + "  (" + fmtSecs(r.totalSeconds) + ")" + trig;
    sel.appendChild(o);
  });
 }
 async function load() {
  const r = await fetch("/api/perf/" + PERF_NAME);
  const data = await r.json();
  runs = data.runs || [];
  renderSelect();
  renderRun(runs[0]);
 }
 document.getElementById("runSelect").addEventListener("change", e => {
  renderRun(runs[e.target.value]);
 });
 document.getElementById("reload").addEventListener("click", load);
 load();
 </script>
 </body>
 </html>
 """
 def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
    """
    Returns the perf page HTML for dataset ``name``.
    tabs : list of (label, dataset_name) for the cross-link bar.
    """
    tab_html = " ".join(
        f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
    return (_PERF_PAGE
            .replace("__PERF_TABS__", tab_html)
            .replace("__PERF_NAME__", name))
@@ -70,3 +70,30 @@ def person_name_with_id(name: str, *,
    if al_id:
        return f"{name} (AL {al_id})"
    return name
 # Matches the suffix produced by person_name_with_id at the end of a name.
 _TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
 def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
    """
    Inverse of person_name_with_id: extracts the tracker id from a
    disambiguated Kavita person name.
    "Rem (MAL 118737)" -> ("mal", 118737)
    "Subaru (AL 88311)" -> ("al", 88311)
    "Kotoyama"          -> None   (no id suffix — e.g. an author/staff record)
    Returns ("mal" | "al", id) or None.
    """
    if not name:
        return None
    m = _TRACKER_ID_RE.search(name)
    if not m:
        return None
    source = "mal" if m.group(1).upper() == "MAL" else "al"
    try:
        return source, int(m.group(2))
    except ValueError:
        return None
@@ -192,14 +192,9 @@ class LightNovelOrchestrator:
        except Exception as exc:
            return {"ok": False, "error": f"series update failed: {exc}"}
-        # Persons
+        # Person sync no longer runs per series — it has its own global,
-        try:
+        # id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
-            person_report = self._person_updater.update_for_manga(
+        # on a separate cron schedule.
                built.get("malId"),
                al_manga_id=built.get("anilistId"),
            )
        except Exception as exc:
            person_report = {"error": str(exc)}
        # Relationships + collection
        try:
@@ -221,10 +216,20 @@ class LightNovelOrchestrator:
            "title":         cached_title,
            "mangabakaId":   built.get("mangabakaId"),
            "series":        series_report,
            "persons":       person_report,
            "relationships": relation_report,
        }
    # ------------------------------------------------------------------
    # Person sync (global, id-based — independent of series updates)
    # ------------------------------------------------------------------
    def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
        """
        Runs the global, id-based person updater over every Kavita person.
        Covers both manga and light-novel libraries in one pass.
        """
        return self._person_updater.update_all_persons(
            trigger=trigger, perf=perf)
    def update_all(self, library_ids: "list[int] | None") -> dict:
        """Updates every cached series in the given libraries."""
        if library_ids is None:
@@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 from LightNovelMetadataBuilder import pick_thumbnail_url
 from PerfWebPage import render_perf_page
 # Only the person dataset exists in the LN container.
 _PERF_TABS = [("persons", "person")]
 def _int_list(values) -> list[int]:
@@ -97,7 +101,9 @@ _INDEX_HTML = r"""<!doctype html>
  <button id="reload">Reload</button>
  <button id="build">Match all in libraries</button>
  <button id="updateAll" class="success">Update all in libraries</button>
  <button id="syncPersons">Sync persons</button>
  <button id="batchSave" class="primary">Save dirty (0)</button>
  <a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
  <span class="status" id="status"></span>
 </div>
@@ -497,12 +503,25 @@ async function startUpdateAll() {
  }
 }
 async function startSyncPersons() {
  if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
  setStatus("Person sync started");
  try {
    const r = await fetch("/api/persons/sync", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    startPolling();
  } catch (err) {
    setStatus("Person sync failed: " + err.message);
  }
 }
 document.getElementById("filter").addEventListener("input", applyFilter);
 document.getElementById("libraries").addEventListener("change", applyFilter);
 document.getElementById("reload").addEventListener("click", load);
 document.getElementById("batchSave").addEventListener("click", batchSave);
 document.getElementById("build").addEventListener("click", startBuild);
 document.getElementById("updateAll").addEventListener("click", startUpdateAll);
 document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
 for (const th of document.querySelectorAll("th.sortable")) {
  th.addEventListener("click", () => {
    const col = th.dataset.col;
@@ -581,11 +600,13 @@ class MatchesWebApp:
    def __init__(self, cache: MatchesCache, *,
                 orchestrator=None,
                 default_library_ids: "list[int] | None" = None,
                 person_perf=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._orchestrator = orchestrator
        self._defaults = list(default_library_ids or [])
        self._person_perf = person_perf
        self._host = host
        self._port = port
        self._job = _JobState()
@@ -757,8 +778,38 @@ class MatchesWebApp:
                return Response("a job is already running", status=409)
            return jsonify({"started": label})
        @app.post("/api/persons/sync")
        def api_persons_sync():
            if self._orchestrator is None:
                return Response("no orchestrator configured", status=503)
            def task(job: _JobState):
                report = self._orchestrator.sync_persons(
                    trigger="ln", perf=self._person_perf)
                job.append(f"updated={report['updated']} "
                           f"skipped={report['skipped']} "
                           f"not_found={report['not_found']} "
                           f"conflicts={report['conflicts']}")
                for err in report.get("errors", []):
                    job.append(f"  {err}")
            if not self._job.start("person sync", task):
                return Response("a job is already running", status=409)
            return jsonify({"started": "person sync"})
        @app.get("/api/status")
        def api_status():
            snap = self._job.snapshot()
            snap["defaults"] = self._defaults
            return jsonify(snap)
        @app.get("/perf")
        @app.get("/perf/<name>")
        def perf_page(name: str = "person") -> Response:
            return Response(render_perf_page(name, _PERF_TABS),
                            mimetype="text/html; charset=utf-8")
        @app.get("/api/perf/<name>")
        def api_perf(name: str):
            stats = self._person_perf if name == "person" else None
            return jsonify(stats.all() if stats is not None else {"runs": []})
@@ -40,6 +40,7 @@ from __future__ import annotations
 import re
 import sys
 import xml.etree.ElementTree as ET
 from contextlib import contextmanager
 from pathlib import Path
 import requests
@@ -65,6 +66,16 @@ except ImportError:
    _HAS_PIL = False
@contextmanager
 def _no_measure():
    """No-op stand-in for a perf recorder's measure() context manager."""
    yield
 # Sentinel marking a per-chapter memo slot as "not computed yet".
 _UNSET = object()
 # --------------------------------------------------------------------------
 # Constants
 # --------------------------------------------------------------------------
@@ -218,10 +229,22 @@ class ComicInfoBuilder:
        self._matches_cache = matches_cache
        self._cover_cache = cover_cache or _default_cover_cache()
        # Optional performance recorder (duck-typed: any object with a
        # .measure(name) context manager).  The mover sets this per chapter;
        # when None, _measure() is a no-op so the builder stays decoupled
        # from PerfStats and works standalone (e.g. the cover updater).
        self.perf = None
        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
        self._cover_path: "Path | None" = None
        self._suwayomi_data: dict = {}
        # Per-chapter memo for _determine_volume (resolved up to 3x/chapter
        # otherwise: cover download, explicit volume step, XML build).
        self._volume_memo = _UNSET
        # Per-series cache for full series fetches by id (parent series for
        # SeriesGroup, merged-series redirects) — reused across all chapters.
        self._series_by_id_cache: dict[str, dict] = {}
    # ----- Repr -----------------------------------------------------------
    def __repr__(self) -> str:
@@ -261,6 +284,13 @@ class ComicInfoBuilder:
        self._pages = []
        self._cover_path = None
        self._suwayomi_data = {}
        self._volume_memo = _UNSET
    def _measure(self, name: str):
        """Times a named step on the attached recorder; no-op when unset."""
        if self.perf is not None:
            return self.perf.measure(name)
        return _no_measure()
    # ======================================================================
    # Public XML functions
@@ -305,11 +335,13 @@ class ComicInfoBuilder:
        if not folder.is_dir():
            raise NotADirectoryError(f"Folder not found: {folder}")
-        self._suwayomi_data = self._read_existing_comicinfo(folder)
+        with self._measure("read_comicinfo"):
            self._suwayomi_data = self._read_existing_comicinfo(folder)
        self._cover_path = None
        if download_cover:
-            self._cover_path = self._download_cover(folder, cover_filename)
+            with self._measure("cover"):
                self._cover_path = self._download_cover(folder, cover_filename)
        cover_resolved = self._cover_path.resolve() if self._cover_path else None
        story_images: list[Path] = []
@@ -329,20 +361,23 @@ class ComicInfoBuilder:
        ordered.extend((img, "Story") for img in story_images)
        self._pages = []
-        for index, (img_path, page_type) in enumerate(ordered):
+        # Probing every page for its pixel dimensions reads each file — on a
-            width, height = self._image_dimensions(img_path)
+        # network share this is often the dominant per-chapter cost.
-            try:
+        with self._measure("image_dimensions"):
-                size = img_path.stat().st_size
+            for index, (img_path, page_type) in enumerate(ordered):
-            except OSError:
+                width, height = self._image_dimensions(img_path)
-                size = None
+                try:
-            self._pages.append({
+                    size = img_path.stat().st_size
-                "image": index,
+                except OSError:
-                "type": page_type,
+                    size = None
-                "width": width,
+                self._pages.append({
-                "height": height,
+                    "image": index,
-                "size": size,
+                    "type": page_type,
-                "double": bool(width and height and width > height),
+                    "width": width,
-            })
+                    "height": height,
                    "size": size,
                    "double": bool(width and height and width > height),
                })
        return {
            "page_count": len(self._pages),
@@ -413,12 +448,20 @@ class ComicInfoBuilder:
        return series
    def _fetch_series_by_id(self, series_id) -> dict:
        # Cached per builder (i.e. per series): SeriesGroup resolution calls
        # this for the parent on every chapter — without the cache that is
        # one MangaBaka request per chapter for the same parent id.
        key = str(series_id)
        cached = self._series_by_id_cache.get(key)
        if cached is not None:
            return cached
        url = f"{self.api_base_url}/series/{series_id}"
        resp = self._session.get(url, timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data")
        if not data:
            raise RuntimeError(f"Series with ID {series_id} not found.")
        self._series_by_id_cache[key] = data
        return data
    # ======================================================================
@@ -554,6 +597,18 @@ class ComicInfoBuilder:
    # Volume determination
    # ======================================================================
    def _determine_volume(self) -> "str | None":
        """
        Resolves the volume for the current chapter, memoized per chapter.
        The result is reused across the three call sites per chapter (cover
        download, explicit volume step, XML build); the memo is cleared
        whenever the chapter or manga title changes (see _clear_results).
        """
        if self._volume_memo is _UNSET:
            self._volume_memo = self._resolve_volume()
        return self._volume_memo
    def _resolve_volume(self) -> "str | None":
        """
        Resolves the volume for the current chapter via MangaDex.
        Falls back to estimation when the chapter is absent from MangaDex.
@@ -45,7 +45,6 @@ from __future__ import annotations
 import io
 import sys
 import threading
 import xml.etree.ElementTree as ET
 import zipfile
 from datetime import datetime
@@ -67,7 +66,7 @@ from MatchesCache import MatchesCache
 from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
                           _sanitize_dirname, _normalise_volume_value)
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
-from CronSchedule import CronSchedule
+from PerfStats import PerfStats
 from CoverCache import CoverCache, _IMAGE_EXTS
 try:
@@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater:
    request_timeout  : HTTP timeout in seconds.
    log_path         : File that receives one line per updated chapter.
                       Default: <kavita_path>/volume_updater.log
    schedule         : Cron expression (5 fields) defining when scans run,
                       e.g. "0 19 * * 1,4" = 19:00 every Monday and
                       Thursday.  Evaluated in local time — set the TZ env
                       var inside Docker.  Default: "0 19 * * 1,4".
    cover_cache_dir  : Directory for the persistent cover cache.  None ->
                       temporary cache, deleted at process exit.
    perf_stats       : Optional PerfStats instance for per-step timing.
    Scheduling lives outside this class (see CronRunner); call update_all()
    on whatever cadence you like.
    """
    def __init__(self,
@@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater:
                 request_timeout: int = 30,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 log_path=None,
-                 schedule: str = "0 19 * * 1,4",
+                 cover_cache_dir=None,
-                 cover_cache_dir=None):
+                 perf_stats: "PerfStats | None" = None):
        self._dst = Path(kavita_path)
        self._matches_cache = matches_cache
        self._language = language
@@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater:
        self._api_base_url = api_base_url.rstrip("/")
        self._log_path = (Path(log_path) if log_path
                          else self._dst / "volume_updater.log")
-        self._cron = CronSchedule(schedule)
+        self._perf = perf_stats or PerfStats(None)
        session = requests.Session()
        session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater:
        self._cover_cache = CoverCache(
            cover_cache_dir, session=session, request_timeout=request_timeout)
        self._stop = threading.Event()
        self._thread: "threading.Thread | None" = None
    # ------------------------------------------------------------------
    # Cron API (mirrors SuwayomiFolderWatcher)
    # ------------------------------------------------------------------
    def start(self) -> None:
        """Starts the periodic scan thread.  Non-blocking."""
        if self._thread is not None and self._thread.is_alive():
            return
        self._stop.clear()
        self._thread = threading.Thread(
            target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
        self._thread.start()
        print(f"[{_now()}] [updater] scanning {self._dst} "
              f"on cron '{self._cron.expression}'", flush=True)
    def stop(self) -> None:
        """Stops the scan thread (current scan finishes its series first)."""
        self._stop.set()
        if self._thread is not None:
            self._thread.join(timeout=10)
    def wait(self) -> None:
        """Blocks the calling thread until stop() is invoked."""
        self._stop.wait()
    def _loop(self) -> None:
        while not self._stop.is_set():
            next_run = self._cron.next_after(datetime.now())
            wait = max(0.0, (next_run - datetime.now()).total_seconds())
            print(f"[{_now()}] [updater] next scheduled scan: "
                  f"{next_run.isoformat(timespec='minutes')}", flush=True)
            if self._stop.wait(wait):
                break
            try:
                summary = self.update_all()
                print(f"[{_now()}] [updater] scan done: "
                      f"{summary['series_updated']} series / "
                      f"{summary['chapters_updated']} chapters updated",
                      flush=True)
            except Exception as exc:
                print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
    # ------------------------------------------------------------------
    # Public scan API
    # ------------------------------------------------------------------
@@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater:
        self._vol_resolver.clear_cache()
        self._works_resolver.clear_cache()
-        for series_dir in sorted(self._dst.iterdir()):
+        run = self._perf.begin_run()
-            if self._stop.is_set():
+        try:
-                break
+            for series_dir in sorted(self._dst.iterdir()):
-            if not series_dir.is_dir():
+                if not series_dir.is_dir():
-                continue
+                    continue
-            summary["series_scanned"] += 1
+                summary["series_scanned"] += 1
-            try:
+                try:
-                updated = self.update_series(series_dir)
+                    updated = self.update_series(series_dir, run)
-            except Exception as exc:
+                except Exception as exc:
-                print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
+                    print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
-                continue
+                    continue
-            if updated:
+                if updated:
-                summary["series_updated"] += 1
+                    summary["series_updated"] += 1
-                summary["chapters_updated"] += updated
+                    summary["chapters_updated"] += updated
        finally:
            run.finish()
        return summary
-    def update_series(self, series_dir: Path) -> int:
+    def update_series(self, series_dir: Path, run=None) -> int:
        """
        Updates one series folder.  Returns the number of updated chapters.
@@ -300,20 +256,24 @@ class KavitaVolumeCoverUpdater:
        md = builder.fetch_metadata()
        series_id = str(md.get("id") or "")
        series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
        # Resolve volumes for all null-volume chapters first (API only).
        updates: dict[str, dict] = {}   # num -> {"volume": str, "cover": tuple|None}
-        for num in sorted(missing, key=_chapter_sort_value):
+        with series_rec.measure("resolve_volumes"):
-            builder.chapter = num
+            for num in sorted(missing, key=_chapter_sort_value):
-            try:
+                builder.chapter = num
-                volume = builder._determine_volume()
+                try:
-            except Exception:
+                    volume = builder._determine_volume()
-                volume = None
+                except Exception:
-            if not volume:
+                    volume = None
-                continue
+                if not volume:
-            updates[num] = {"volume": volume,
+                    continue
-                            "cover": self._fetch_cover(series_id, volume)}
+                updates[num] = {"volume": volume,
                                "cover": self._fetch_cover(series_id, volume)}
        if not updates:
            series_rec.finish(ok=True)
            return 0
        first = min(chapters, key=_chapter_sort_value)
@@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater:
                continue
            # The first chapter gets a full metadata rebuild (Kavita reads
            # series metadata from it); other chapters only a volume edit.
-            ok, cover_swapped = self._apply_update(
+            chap_rec = series_rec.begin_item(num)
-                cbz, builder, num,
+            with chap_rec.measure("archive_rewrite"):
-                volume=up["volume"], cover=up["cover"],
+                ok, cover_swapped = self._apply_update(
-                full_rebuild=(num == first))
+                    cbz, builder, num,
                    volume=up["volume"], cover=up["cover"],
                    full_rebuild=(num == first))
            chap_rec.finish(ok=ok)
            if not ok:
                continue
            entry["volume"] = _normalise_volume_value(up["volume"])
@@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater:
            first_entry = chapters.get(first) or {}
            cbz = series_dir / (first_entry.get("archiveName") or "")
            if first_entry.get("archiveName") and cbz.is_file():
-                ok, _ = self._apply_update(
+                chap_rec = series_rec.begin_item(f"{first} (refresh)")
-                    cbz, builder, first,
+                with chap_rec.measure("archive_rewrite"):
-                    volume=None, cover=None, full_rebuild=True)
+                    ok, _ = self._apply_update(
                        cbz, builder, first,
                        volume=None, cover=None, full_rebuild=True)
                chap_rec.finish(ok=ok)
                if ok:
                    self._log(f"{series_dir.name} | chapter {first} | "
                              f"first-chapter metadata refreshed | {cbz.name}")
        if updated:
            _save_chapter_index(series_dir, index)
        series_rec.finish(ok=True)
        return updated
    # ------------------------------------------------------------------
@@ -545,10 +512,7 @@ if __name__ == "__main__":
        matches_cache=MatchesCache(MATCHES_PATH),
    )
-    # One-shot scan (no cron thread):
+    # One-shot scan.  Scheduling is handled externally via CronRunner
    # (see main_manga.py).
    summary = updater.update_all()
    print(f"\n[updater] {summary}")
    # Or run on the cron schedule (default: 19:00 every Mon + Thu):
    # updater.start()
    # updater.wait()
@@ -93,6 +93,9 @@ class MangaDexVolumeResolver:
        self._cache: dict[str, dict] = {}
        # Cache: manga_id -> {relation_type: [title, ...]}
        self._relations_cache: dict[str, dict] = {}
        # Cache: title_lower -> manga_id (or None) — avoids repeating the
        # MangaDex search for every chapter of the same series.
        self._id_cache: dict[str, "str | None"] = {}
    # ----------------------------------------------------------------------
    # Locate the manga ID
@@ -105,15 +108,25 @@ class MangaDexVolumeResolver:
        if not title or not title.strip():
            return None
-        resp = self._session.get(
+        key = title.strip().lower()
-            f"{self.base_url}/manga",
+        if key in self._id_cache:
-            params={"title": title, "limit": 5,
+            return self._id_cache[key]
-                    "contentRating[]": ["safe", "suggestive",
+
-                                        "erotica", "pornographic"]},
+        try:
-            timeout=self.request_timeout)
+            resp = self._session.get(
-        resp.raise_for_status()
+                f"{self.base_url}/manga",
-        results = resp.json().get("data") or []
+                params={"title": title, "limit": 5,
                        "contentRating[]": ["safe", "suggestive",
                                            "erotica", "pornographic"]},
                timeout=self.request_timeout)
            resp.raise_for_status()
            results = resp.json().get("data") or []
        except requests.RequestException:
            # Don't cache transient failures — allow a retry next time.
            return None
        if not results:
            self._id_cache[key] = None
            return None
        def score(entry) -> float:
@@ -130,7 +143,9 @@ class MangaDexVolumeResolver:
            return best
        results.sort(key=score, reverse=True)
-        return results[0].get("id")
+        manga_id = results[0].get("id")
        self._id_cache[key] = manga_id
        return manga_id
    # ----------------------------------------------------------------------
    # Main function: retrieve and return volume / chapter data
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
 from MatchesCache import MatchesCache
 from ComicInfoBuilder import _pick_thumbnail_url
 from PerfWebPage import render_perf_page
 # Cross-link tabs shown on every perf page in the manga container.
 _PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
 _INDEX_HTML = """<!doctype html>
@@ -71,6 +75,8 @@ _INDEX_HTML = """<!doctype html>
  <button id="batchSave" class="primary">Save dirty (0)</button>
  <button id="build">Build all (rescan)</button>
  <button id="move">Start move</button>
  <button id="syncPersons">Sync persons</button>
  <a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
  <span class="status" id="status"></span>
 </div>
@@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
    btn.disabled = false;
  }
 });
 document.getElementById("syncPersons").addEventListener("click", async () => {
  if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
  const btn = document.getElementById("syncPersons");
  btn.disabled = true;
  setStatus("Syncing persons… (running on the server)");
  try {
    const r = await fetch("/api/persons/sync", { method: "POST" });
    if (!r.ok) throw new Error(await r.text());
    const d = await r.json();
    setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
      + d.not_found + " not found, " + d.conflicts + " conflicts");
  } catch (err) {
    setStatus("Person sync failed: " + err.message);
  } finally {
    btn.disabled = false;
  }
 });
 for (const th of document.querySelectorAll("th.sortable")) {
  th.addEventListener("click", () => {
    const col = th.dataset.col;
@@ -357,6 +380,8 @@ load();
 """
 class MatchesWebApp:
    """
    Flask app exposing the MatchesCache. `mover` is required when you want
@@ -367,14 +392,22 @@ class MatchesWebApp:
    def __init__(self, cache: MatchesCache, *,
                 mover=None,
                 person_updater=None,
                 person_trigger: str = "web",
                 perf_stats=None,
                 host: str = "0.0.0.0",
                 port: int = 8080):
        self._cache = cache
        self._mover = mover
        self._person_updater = person_updater
        self._person_trigger = person_trigger
        # perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
        self._perf = perf_stats or {}
        self._host = host
        self._port = port
        self._build_lock = threading.Lock()
        self._move_lock  = threading.Lock()
        self._person_lock = threading.Lock()
        self._app = Flask(__name__)
        self._thread: "threading.Thread | None" = None
        self._register_routes()
@@ -498,3 +531,31 @@ class MatchesWebApp:
            finally:
                self._move_lock.release()
            return jsonify({"results": results})
        @app.post("/api/persons/sync")
        def api_persons_sync():
            if self._person_updater is None:
                return Response("no person updater configured", status=503)
            if not self._person_lock.acquire(blocking=False):
                return Response("person sync already running", status=409)
            try:
                report = self._person_updater.update_all_persons(
                    trigger=self._person_trigger,
                    perf=self._perf.get("person"))
            except Exception as exc:
                return Response(f"person sync failed: {exc}", status=500)
            finally:
                self._person_lock.release()
            return jsonify(report)
        # Perf pages: /perf (move) + /perf/<name> for the updaters.
        @app.get("/perf")
        @app.get("/perf/<name>")
        def perf_page(name: str = "move") -> Response:
            return Response(render_perf_page(name, _PERF_TABS),
                            mimetype="text/html; charset=utf-8")
        @app.get("/api/perf/<name>")
        def api_perf(name: str):
            stats = self._perf.get(name)
            return jsonify(stats.all() if stats is not None else {"runs": []})
@@ -64,11 +64,10 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
 from MangaBakaWorksResolver import MangaBakaWorksResolver
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from KavitaClient import KavitaClient
 from KavitaPersonUpdater import KavitaPersonUpdater
 from MatchesCache import MatchesCache
 from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from CoverCache import CoverCache, _IMAGE_EXTS
 from PerfStats import PerfStats
 _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
@@ -305,28 +304,30 @@ class SuwayomiMover:
                      Expected layout: <root>/<Source>/<Title>/<Chapter N>/
    kavita_path     : Root of the Kavita library.
                      Series sub-directories are created automatically.
    kavita_base_url : Kavita server URL — required only for person sync,
                      e.g. "http://192.168.2.2:5000".
    kavita_api_key  : Kavita API key   — required only for person sync.
    language        : ComicInfo LanguageISO and SeriesSort language ("en").
    request_timeout : HTTP timeout in seconds for all API / image requests.
    delete_source   : Remove the source chapter folder after successful pack.
    cover_cache_dir : Directory for the persistent cover cache.  None ->
                      temporary cache, deleted at process exit.
    perf_stats      : Optional PerfStats instance for per-step timing.  None
                      (default) disables profiling.
    Note: Kavita person sync is no longer done here — it runs as a separate,
    global, id-based updater on its own cron schedule (KavitaPersonUpdater).
    The mover only touches MangaBaka / MangaDex / MAL / AniList.
    """
    def __init__(self,
                 suwayomi_path,
                 kavita_path,
                 *,
                 kavita_base_url: "str | None" = None,
                 kavita_api_key: "str | None" = None,
                 language: str = "en",
                 request_timeout: int = 30,
                 delete_source: bool = True,
                 matches_cache: "MatchesCache | None" = None,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
-                 cover_cache_dir=None):
+                 cover_cache_dir=None,
                 perf_stats: "PerfStats | None" = None):
        self._src = Path(suwayomi_path)
        self._dst = Path(kavita_path)
        self._language = language
@@ -334,6 +335,7 @@ class SuwayomiMover:
        self._delete_source = delete_source
        self._matches_cache = matches_cache
        self._api_base_url = api_base_url.rstrip("/")
        self._perf = perf_stats or PerfStats(None)
        # Shared HTTP session and resolvers — reused across all series/chapters
        # to maximise cache hits and minimise API round-trips.
@@ -352,16 +354,6 @@ class SuwayomiMover:
        self._cover_cache = CoverCache(
            cover_cache_dir, session=session, request_timeout=request_timeout)
        self._person_updater: "KavitaPersonUpdater | None" = None
        if kavita_base_url and kavita_api_key:
            kavita_client = KavitaClient(
                kavita_base_url, kavita_api_key,
                request_timeout=request_timeout)
            self._person_updater = KavitaPersonUpdater(
                kavita_client,
                mal_resolver=self._mal,
                al_resolver=self._al)
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
@@ -376,15 +368,19 @@ class SuwayomiMover:
        dict from _process_series_dir.
        """
        results: dict = {}
-        for source_dir in sorted(self._src.iterdir()):
+        run = self._perf.begin_run()
-            if not source_dir.is_dir():
+        try:
-                continue
+            for source_dir in sorted(self._src.iterdir()):
-            for manga_dir in sorted(source_dir.iterdir()):
+                if not source_dir.is_dir():
                if not manga_dir.is_dir():
                    continue
-                title = manga_dir.name
+                for manga_dir in sorted(source_dir.iterdir()):
-                print(f"[SuwayomiMover] {title}")
+                    if not manga_dir.is_dir():
-                results[title] = self._process_series_dir(manga_dir)
+                        continue
                    title = manga_dir.name
                    print(f"[SuwayomiMover] {title}")
                    results[title] = self._process_series_dir(manga_dir, run)
        finally:
            run.finish()
        return results
    def process_series(self, manga_title: str) -> dict:
@@ -400,7 +396,11 @@ class SuwayomiMover:
                continue
            candidate = source_dir / manga_title
            if candidate.is_dir():
-                return self._process_series_dir(candidate)
+                run = self._perf.begin_run()
                try:
                    return self._process_series_dir(candidate, run)
                finally:
                    run.finish()
        raise FileNotFoundError(
            f"No Suwayomi directory found for '{manga_title}' under {self._src}")
@@ -487,8 +487,9 @@ class SuwayomiMover:
    # ------------------------------------------------------------------
    # Internal: series
    # ------------------------------------------------------------------
-    def _process_series_dir(self, manga_dir: Path) -> dict:
+    def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
        manga_title = manga_dir.name
        series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
        chapter_dirs = sorted(
            (d for d in manga_dir.iterdir() if d.is_dir()),
@@ -539,7 +540,8 @@ class SuwayomiMover:
        md: "dict | None" = None
        mangabaka_title = manga_title
        try:
-            md = builder.fetch_metadata()
+            with series_rec.measure("fetch_metadata"):
                md = builder.fetch_metadata()
            mangabaka_title = md.get("title") or manga_title
        except Exception as exc:
            print(f"  [warn] metadata fetch failed: {exc}")
@@ -571,7 +573,7 @@ class SuwayomiMover:
        chapter_results: list[dict] = []
        for chapter_dir, _fields, chapter_num in pending:
            result = self._process_chapter(
-                builder, chapter_num, chapter_dir, dest_series)
+                builder, chapter_num, chapter_dir, dest_series, series_rec)
            chapter_results.append(result)
            status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
            print(f"  Chapter {chapter_num}: {status}")
@@ -582,25 +584,11 @@ class SuwayomiMover:
                }
                _save_chapter_index(dest_series, chapter_index)
-        # Sync Kavita persons once per series.
+        # Person sync no longer runs here — it has its own global,
-        # Both MAL and AniList IDs come from MangaBaka's source map;
+        # id-based updater on a separate cron schedule (see
-        # AniList is used as fallback when MAL returns no characters/staff.
+        # KavitaPersonUpdater.update_all_persons).
-        person_result: "dict | None" = None
+        series_rec.finish()
-        if self._person_updater:
+        return {"chapters": chapter_results}
            mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
                      or self._mal.find_mal_id(builder_title))
            al_id  = ComicInfoBuilder._al_id_from_source(md) if md else None
            if mal_id or al_id:
                try:
                    person_result = self._person_updater.update_for_manga(
                        mal_id, al_manga_id=al_id)
                    print(f"  Persons: chars={person_result['characters'].get('updated')} "
                          f"staff={person_result['staff'].get('updated')}")
                except Exception as exc:
                    person_result = {"error": str(exc)}
                    print(f"  Persons: ERROR {exc}")
        return {"chapters": chapter_results, "persons": person_result}
    # ------------------------------------------------------------------
    # Internal: chapter
@@ -609,7 +597,8 @@ class SuwayomiMover:
                         builder: ComicInfoBuilder,
                         chapter_num: str,
                         chapter_dir: Path,
-                         dest_series: Path) -> dict:
+                         dest_series: Path,
                         series_rec=None) -> dict:
        """
        Generates ComicInfo.xml for one chapter, packs it to CBZ, and
        optionally removes the source folder.
@@ -619,6 +608,11 @@ class SuwayomiMover:
        <Pages> element correctly points to the front cover).
        """
        cbz_path = dest_series / f"{chapter_dir.name}.cbz"
        chap_rec = (series_rec or self._perf.begin_run().begin_item("")
                    ).begin_item(chapter_num)
        # add_pages_from_folder records its own sub-steps on this recorder.
        builder.perf = chap_rec
        ok = False
        try:
            builder.chapter = chapter_num
            builder.add_pages_from_folder(chapter_dir, cover_filename="000")
@@ -626,32 +620,35 @@ class SuwayomiMover:
            # by add_pages_from_folder, so it's effectively free.  Used by
            # the chapter index in the Kavita destination folder.
            try:
-                volume = builder._determine_volume()
+                with chap_rec.measure("volume"):
                    volume = builder._determine_volume()
            except Exception:
                volume = None
-            builder.save_xml(chapter_dir)
+            with chap_rec.measure("save_xml"):
-            _pack_to_cbz(chapter_dir, cbz_path)
+                builder.save_xml(chapter_dir)
            with chap_rec.measure("pack_cbz"):
                _pack_to_cbz(chapter_dir, cbz_path)
            if self._delete_source:
-                shutil.rmtree(chapter_dir)
+                with chap_rec.measure("delete_source"):
                    shutil.rmtree(chapter_dir)
            ok = True
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": True, "volume": volume}
        except Exception as exc:
            return {"chapter": chapter_num, "cbz": str(cbz_path),
                    "ok": False, "error": str(exc)}
        finally:
            builder.perf = None
            chap_rec.finish(ok=ok)
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
    import os
    # Local (no-Docker) smoke test.  Adjust paths to your environment.
    # Set the KAVITA_API_KEY env var — never commit API keys to the repo.
    SUWAYOMI_PATH = r"M:\config\downloads\mangas"
    KAVITA_PATH   = r"\\192.168.2.2\root\ServerData\Kavita\test"
    KAVITA_URL    = "http://192.168.2.2:5000"
    KAVITA_KEY    = os.environ.get("KAVITA_API_KEY", "")
    # matches.json lives next to this script during local testing.
    MATCHES_PATH  = Path(__file__).resolve().parent.parent / "matches.json"
@@ -660,8 +657,6 @@ if __name__ == "__main__":
    mover = SuwayomiMover(
        SUWAYOMI_PATH,
        KAVITA_PATH,
        kavita_base_url=KAVITA_URL,
        kavita_api_key=KAVITA_KEY,
        delete_source=False,
        matches_cache=matches_cache,
    )