diff --git a/.env.example b/.env.example index e3b464f..a33a046 100644 --- a/.env.example +++ b/.env.example @@ -11,12 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config MANGA_WEB_PORT=8080 SETTLE_SECONDS=600 DELETE_SOURCE=true +# Periodic updaters (volume/cover + global person sync) run together on +# this cron. Sundays 10:00. Person updater also covers LN libraries. UPDATER_ENABLED=true -UPDATER_SCHEDULE=0 19 * * 1,4 +UPDATER_SCHEDULE=0 10 * * 0 COVER_CACHE_PATH=/config/covers +PERF_PATH=/config/perf_stats.json +VOLUME_PERF_PATH=/config/volume_perf_stats.json +PERSON_PERF_PATH=/config/person_perf_stats.json # Light-novel container (kavita-lightnovel-metadata-fetcher) HOST_LN_CONFIG_PATH=/path/to/ln-config LN_WEB_PORT=8081 LN_LIBRARY_IDS=3,5 +LN_UPDATER_ENABLED=true diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index bde59bb..1e8d272 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -13,15 +13,18 @@ services: SETTLE_SECONDS: "${SETTLE_SECONDS:-600}" DELETE_SOURCE: "${DELETE_SOURCE:-true}" MATCH_PATH: "/config/matches.json" - # Volume/cover back-fill updater + # Periodic updaters (volume/cover back-fill + global person sync) run + # together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ) UPDATER_ENABLED: "${UPDATER_ENABLED:-true}" - # Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday - # (local time, see TZ) - UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}" + UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}" UPDATER_LOG: "/config/volume_updater.log" # Persistent cover cache (empty = temp dir, deleted on container stop) COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}" - # Timezone for the cron schedule — without this 19:00 means 19:00 UTC + # Per-step timing stats (viewable at /perf, /perf/volume, /perf/person) + PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}" + VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}" + PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}" + # Timezone for the cron schedule — without this 10:00 means 10:00 UTC TZ: "${TZ:-Europe/Berlin}" ports: - "${MANGA_WEB_PORT:-8080}:8080" @@ -43,6 +46,10 @@ services: LIBRARY_IDS: "${LN_LIBRARY_IDS}" LANGUAGE: "${LANGUAGE:-en}" MATCH_PATH: "/config/matches.json" + # Global person sync on cron (same default cadence as the manga side) + UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}" + UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}" + PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}" TZ: "${TZ:-Europe/Berlin}" ports: - "${LN_WEB_PORT:-8081}:8080" diff --git a/main_ln.py b/main_ln.py index 792a6af..f86fa45 100644 --- a/main_ln.py +++ b/main_ln.py @@ -27,6 +27,12 @@ Environment variables MATCH_PATH default /config/matches.json WEB_PORT default 8080 WEB_HOST default 0.0.0.0 + UPDATER_ENABLED default true (run the person updater on cron) + UPDATER_SCHEDULE cron expression for the person updater, + default "0 10 * * 0" = Sundays 10:00 + (local time — set TZ inside the container!) + PERSON_PERF_PATH JSON file for person updater timing. + Default /config/person_perf_stats.json """ from __future__ import annotations @@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln")) from MatchesCache import MatchesCache # noqa: E402 from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402 +from PerfStats import PerfStats # noqa: E402 +from CronRunner import CronRunner # noqa: E402 + + +def _env_bool(name: str, default: bool) -> bool: + raw = os.environ.get(name) + if raw is None: + return default + return raw.strip().lower() in ("1", "true", "yes", "y", "on") def _env_str(name: str, default: "str | None" = None, @@ -98,6 +113,10 @@ def main() -> int: web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_port = _env_int("WEB_PORT", 8080) library_ids = _env_int_list("LIBRARY_IDS") + updater_enabled = _env_bool("UPDATER_ENABLED", True) + updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0") + person_perf_path = _env_str("PERSON_PERF_PATH", + "/config/person_perf_stats.json") or None print(f"[main] kavita url = {kavita_url}", flush=True) print(f"[main] language = {language}", flush=True) @@ -107,6 +126,7 @@ def main() -> int: print(f"[main] web = {web_host}:{web_port}", flush=True) cache = MatchesCache(match_path) + person_perf = PerfStats(person_perf_path) orchestrator = LightNovelOrchestrator( kavita_url=kavita_url, kavita_api_key=kavita_api_key, @@ -118,9 +138,22 @@ def main() -> int: app = MatchesWebApp( cache, orchestrator=orchestrator, default_library_ids=library_ids, + person_perf=person_perf, host=web_host, port=web_port, ) app.start() + + if updater_enabled: + try: + CronRunner( + updater_schedule, + lambda: orchestrator.sync_persons(trigger="cron", + perf=person_perf), + name="person-updater").start() + except ValueError as exc: + print(f"[main] UPDATER_SCHEDULE invalid ({exc}); " + f"scheduled person sync DISABLED", flush=True) + app.wait() return 0 diff --git a/main_manga.py b/main_manga.py index a055700..b8dc0d0 100644 --- a/main_manga.py +++ b/main_manga.py @@ -28,13 +28,19 @@ Environment variables MATCH_PATH default /config/matches.json WEB_PORT default 8080 (Flask web UI for matches.json) WEB_HOST default 0.0.0.0 - UPDATER_ENABLED default true (volume/cover back-fill cron) - UPDATER_SCHEDULE cron expression for the updater scans, - default "0 19 * * 1,4" = 19:00 every Mon + Thu + UPDATER_ENABLED default true (run volume/cover + person updaters on cron) + UPDATER_SCHEDULE cron expression for the periodic updaters, + default "0 10 * * 0" = Sundays 10:00 (local time — set TZ inside the container!) UPDATER_LOG default /config/volume_updater.log COVER_CACHE_PATH directory for the persistent cover cache; empty (default) = temporary cache, deleted on exit + PERF_PATH JSON file for per-step move timing stats. + Default /config/perf_stats.json (empty disables it) + VOLUME_PERF_PATH JSON file for volume/cover updater timing. + Default /config/volume_perf_stats.json + PERSON_PERF_PATH JSON file for person updater timing. + Default /config/person_perf_stats.json """ from __future__ import annotations @@ -42,7 +48,6 @@ from __future__ import annotations import os import sys from pathlib import Path - try: from dotenv import load_dotenv load_dotenv() @@ -61,6 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401 from MatchesCache import MatchesCache # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 +from KavitaClient import KavitaClient # noqa: E402 +from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402 +from PerfStats import PerfStats # noqa: E402 +from CronRunner import CronRunner # noqa: E402 def _env_str(name: str, default: "str | None" = None, @@ -104,9 +113,14 @@ def main() -> int: web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_port = _env_int("WEB_PORT", 8080) updater_enabled = _env_bool("UPDATER_ENABLED", True) - updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4") + updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0") updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log") cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None + perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None + volume_perf_path = _env_str("VOLUME_PERF_PATH", + "/config/volume_perf_stats.json") or None + person_perf_path = _env_str("PERSON_PERF_PATH", + "/config/person_perf_stats.json") or None print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True) @@ -118,40 +132,62 @@ def main() -> int: print(f"[main] web = {web_host}:{web_port}", flush=True) matches_cache = MatchesCache(match_path) + perf_move = PerfStats(perf_path) + perf_volume = PerfStats(volume_perf_path) + perf_person = PerfStats(person_perf_path) mover = SuwayomiMover( suwayomi_path, kavita_path, - kavita_base_url=kavita_url, - kavita_api_key=kavita_api_key, language=language, request_timeout=request_timeout, delete_source=delete_source, matches_cache=matches_cache, cover_cache_dir=cover_cache_path, + perf_stats=perf_move, ) + # Standalone, global, id-based person updater (manga + LN libraries). + person_updater = None + if kavita_api_key: + kavita_client = KavitaClient(kavita_url, kavita_api_key, + request_timeout=request_timeout) + person_updater = KavitaPersonUpdater(kavita_client) + # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds) - web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port) + web_app = MatchesWebApp( + matches_cache, mover=mover, + person_updater=person_updater, person_trigger="web", + perf_stats={"move": perf_move, "volume": perf_volume, + "person": perf_person}, + host=web_host, port=web_port) web_app.start() if updater_enabled: + updater = KavitaVolumeCoverUpdater( + kavita_path, + matches_cache=matches_cache, + language=language, + request_timeout=request_timeout, + log_path=updater_log, + cover_cache_dir=cover_cache_path, + perf_stats=perf_volume, + ) + + def _scheduled_job(): + updater.update_all() + if person_updater is not None: + person_updater.update_all_persons(trigger="cron", + perf=perf_person) + try: - updater = KavitaVolumeCoverUpdater( - kavita_path, - matches_cache=matches_cache, - language=language, - request_timeout=request_timeout, - log_path=updater_log, - schedule=updater_schedule, - cover_cache_dir=cover_cache_path, - ) - updater.start() + CronRunner(updater_schedule, _scheduled_job, + name="updaters").start() except ValueError as exc: # Invalid cron expression — keep the service up, just without - # the updater, and make the config error obvious in the logs. + # the scheduled updaters, and surface the config error. print(f"[main] UPDATER_SCHEDULE invalid ({exc}); " - f"volume/cover updater DISABLED", flush=True) + f"scheduled updaters DISABLED", flush=True) # watcher.start() # watcher.wait() # blocks until stop() is called via a signal diff --git a/src/CronRunner.py b/src/CronRunner.py new file mode 100644 index 0000000..e8e2a50 --- /dev/null +++ b/src/CronRunner.py @@ -0,0 +1,87 @@ +""" +cron_runner.py +============== + +Runs a single callable on a cron schedule on a background thread. + +Decouples *what* runs from *when*: both the manga container (volume/cover +updater + person updater) and the LN container (person updater) schedule +their work through this one helper, using a shared ``CronSchedule`` for the +``next_after`` arithmetic. + +Usage:: + + runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00 + runner.start() + ... + runner.stop() + +When the schedule string is invalid, the CronSchedule constructor raises +ValueError — the caller decides whether to disable the runner or fall back. +The schedule is evaluated in local time (set TZ inside the container). +""" + +from __future__ import annotations + +import threading +from datetime import datetime + +from CronSchedule import CronSchedule + + +def _now() -> str: + return datetime.now().isoformat(timespec="seconds") + + +class CronRunner: + """ + Fires ``job()`` whenever the cron ``schedule`` elapses. + + Parameters + ---------- + schedule : 5-field cron expression (see CronSchedule). + job : Zero-arg callable invoked on each scheduled tick. Exceptions + are caught and logged so a failing run does not kill the loop. + name : Thread name (for logs). + """ + + def __init__(self, schedule: str, job, *, name: str = "CronRunner"): + self._cron = CronSchedule(schedule) + self._job = job + self._name = name + self._stop = threading.Event() + self._thread: "threading.Thread | None" = None + + def start(self) -> None: + """Starts the scheduling thread. Non-blocking.""" + if self._thread is not None and self._thread.is_alive(): + return + self._stop.clear() + self._thread = threading.Thread( + target=self._loop, name=self._name, daemon=True) + self._thread.start() + print(f"[{_now()}] [{self._name}] scheduled on " + f"cron '{self._cron.expression}'", flush=True) + + def stop(self) -> None: + """Signals the loop to stop (a job already running finishes first).""" + self._stop.set() + if self._thread is not None: + self._thread.join(timeout=10) + + def wait(self) -> None: + """Blocks the calling thread until stop() is invoked.""" + self._stop.wait() + + def _loop(self) -> None: + while not self._stop.is_set(): + next_run = self._cron.next_after(datetime.now()) + wait = max(0.0, (next_run - datetime.now()).total_seconds()) + print(f"[{_now()}] [{self._name}] next run: " + f"{next_run.isoformat(timespec='minutes')}", flush=True) + if self._stop.wait(wait): + break + try: + self._job() + except Exception as exc: + print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True) diff --git a/src/KavitaClient.py b/src/KavitaClient.py index 6a980f6..400e87f 100644 --- a/src/KavitaClient.py +++ b/src/KavitaClient.py @@ -204,6 +204,31 @@ class KavitaClient: r.raise_for_status() return r.json() or [] + def list_all_persons(self, *, page_size: int = 200) -> list[dict]: + """ + Returns every PersonDto in the instance. + + Pages through POST /api/Person/all (the browse endpoint) with an + empty filter until an empty page is returned — same paging pattern + as list_series_in_library. + """ + results: list[dict] = [] + page = 1 + while True: + r = self._session.post( + f"{self._base}/api/Person/all", + params={"PageNumber": page, "PageSize": page_size}, + json={}, timeout=self._timeout) + r.raise_for_status() + chunk = r.json() or [] + if not chunk: + break + results.extend(chunk) + if len(chunk) < page_size: + break + page += 1 + return results + def update_person(self, payload: dict) -> None: """Writes a person record (malId, aniListId, description, …).""" r = self._session.post(f"{self._base}/api/Person/update", diff --git a/src/KavitaPersonUpdater.py b/src/KavitaPersonUpdater.py index ae0036b..5d2e30c 100644 --- a/src/KavitaPersonUpdater.py +++ b/src/KavitaPersonUpdater.py @@ -2,407 +2,220 @@ kavita_person_updater.py ======================== -Synchronises Kavita person / character records with MyAnimeList data. +Synchronises Kavita character person-records with MyAnimeList / AniList data. -For every character and staff member that MAL knows about for a given manga -the updater: - 1. Searches Kavita for a matching Person record (by name similarity / - alias match, configurable threshold). - 2. Sets the MAL ID on the Kavita person if it is not yet linked. - 3. Uploads the MAL profile image when the cover is not locked and has - not been set in a previous sync run. - 4. Populates the description field when Kavita has none and MAL provides - an 'about' text (requires an extra Jikan request per character; only - performed when update_descriptions=True). +Global, id-based mode +--------------------- +Kavita person-records are created with a disambiguated name carrying the +tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into +ComicInfo ; light novels: written by the metadata builder). +``update_all_persons`` walks **every** person in the Kavita instance, reads +that id from the name, looks the character up on MAL / AniList by id, and +writes back: + + * the tracker id into the ``malId`` / ``aniListId`` field (when still empty), + * a description (when the record has none), + * the profile image (when not locked and not already set). + +Persons whose name carries no id (authors / staff, which are not +disambiguated) are skipped. A record already linked to a *different* +tracker id than its name says is reported as a conflict and left untouched. + +This mode is format-independent (it only does id lookups, never title +searches) so a single pass covers both the manga and light-novel libraries. All HTTP traffic to Kavita goes through the shared :class:`KavitaClient` -(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`). +(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`). Tested against Kavita 0.9.0.2. """ from __future__ import annotations -import datetime - import requests from KavitaClient import KavitaClient from MALResolver import MALResolver from AniListResolver import AniListResolver -from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id +from PerfStats import PerfStats +from TextUtils import paragraphs_to_html, parse_person_tracker_id class KavitaPersonUpdater: """ - Syncs Kavita Person records with MyAnimeList data. + Syncs Kavita character person-records with MAL / AniList data, keyed by + the tracker id embedded in each person's name. Parameters ---------- - client : Shared KavitaClient (session, auth, cover uploads) - mal_resolver : Shared MALResolver singleton (created automatically if omitted) - al_resolver : Shared AniListResolver singleton (created automatically if omitted) - min_name_score : Minimum difflib similarity ratio (0–1) required to accept a - Kavita person as a match for a MAL name. Default 0.80. + client : Shared KavitaClient (session, auth, cover uploads). + mal_resolver : Shared MALResolver singleton (created if omitted). + al_resolver : Shared AniListResolver singleton (created if omitted). """ def __init__(self, client: KavitaClient, *, mal_resolver: "MALResolver | None" = None, - al_resolver: "AniListResolver | None" = None, - min_name_score: float = 0.80): + al_resolver: "AniListResolver | None" = None): self._client = client - self._min_score = min_name_score self._mal = mal_resolver or MALResolver() self._al = al_resolver or AniListResolver() - # Cache: normalised name -> list of PersonDto dicts (best matches first) - self._person_search_cache: dict[str, list[dict]] = {} - # ------------------------------------------------------------------ - # Public: combined update + # Public: global person sync # ------------------------------------------------------------------ - def update_for_manga(self, mal_manga_id: "int | None", *, - al_manga_id: "int | None" = None, - update_covers: bool = True, - update_descriptions: bool = True) -> dict: + def update_all_persons(self, *, + trigger: str = "cron", + perf: "PerfStats | None" = None, + update_covers: bool = True, + update_descriptions: bool = True) -> dict: """ - Runs a full update pass for both characters and staff of the manga. - MAL is tried first; AniList is used as fallback when MAL returns nothing. + Walks every Kavita person, syncing the ones whose name carries a + tracker character id. - Returns - ------- - { - "characters": {"updated": n, "skipped": n, "not_found": n}, - "staff": {"updated": n, "skipped": n, "not_found": n}, - } + Parameters + ---------- + trigger : Source that started this run ("cron" | "web" | "ln") — + recorded in the perf-stats run meta. + perf : Optional PerfStats for per-person step timing. + + Returns {"trigger", "updated", "skipped", "not_found", + "conflicts", "errors"}. """ - return { - "characters": self.update_characters( - mal_manga_id, al_manga_id=al_manga_id, - update_covers=update_covers, - update_descriptions=update_descriptions), - "staff": self.update_staff( - mal_manga_id, al_manga_id=al_manga_id, - update_covers=update_covers, - update_descriptions=update_descriptions), - } + perf = perf or PerfStats(None) + run = perf.begin_run(meta={"trigger": trigger}) + result: dict = {"trigger": trigger, "updated": 0, "skipped": 0, + "not_found": 0, "conflicts": 0, "errors": []} - # ------------------------------------------------------------------ - # Public: character update - # ------------------------------------------------------------------ - def update_characters(self, mal_manga_id: "int | None", *, - al_manga_id: "int | None" = None, - update_covers: bool = True, - update_descriptions: bool = True) -> dict: - """ - Updates Kavita persons that match MAL/AniList characters for the manga. - MAL is tried first; AniList is the fallback when MAL returns nothing. + try: + persons = self._client.list_all_persons() + except requests.RequestException as exc: + result["errors"].append(f"list persons failed: {exc}") + run.finish() + return result - Returns {"updated": n, "skipped": n, "not_found": n}. - """ - entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else [] - resolver = self._mal - if not entries and al_manga_id: - entries = self._al.get_characters_detailed(al_manga_id) - resolver = self._al - return self._sync_entries(entries, "character", resolver, - update_covers=update_covers, - update_descriptions=update_descriptions) - - # ------------------------------------------------------------------ - # Public: staff update - # ------------------------------------------------------------------ - def update_staff(self, mal_manga_id: "int | None", *, - al_manga_id: "int | None" = None, - update_covers: bool = True, - update_descriptions: bool = True) -> dict: - """ - Updates Kavita persons that match MAL/AniList staff for the manga. - MAL is tried first; AniList is the fallback when MAL returns nothing. - - Returns {"updated": n, "skipped": n, "not_found": n}. - """ - entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else [] - resolver = self._mal - if not entries and al_manga_id: - entries = self._al.get_staff_detailed(al_manga_id) - resolver = self._al - return self._sync_entries(entries, "staff", resolver, - update_covers=update_covers, - update_descriptions=update_descriptions) - - # ------------------------------------------------------------------ - # Public: cache management - # ------------------------------------------------------------------ - def clear_cache(self) -> None: - """Clears the Kavita person search cache.""" - self._person_search_cache.clear() - - # ------------------------------------------------------------------ - # Internal: main sync loop - # ------------------------------------------------------------------ - def _sync_entries(self, entries: list[dict], kind: str, resolver, *, - update_covers: bool, - update_descriptions: bool) -> dict: - result: dict = {"updated": 0, "skipped": 0, "not_found": 0, - "errors": []} - for entry in entries: - name = (entry.get("name") or "").strip() - raw_name = (entry.get("raw_name") or "").strip() - if not name and not raw_name: + for person in persons: + name = (person.get("name") or "").strip() + parsed = parse_person_tracker_id(name) + if not parsed: + result["skipped"] += 1 # author/staff or un-tagged continue - if kind == "character": - # Characters are stored under their disambiguated name - # ("Rem (MAL 118737)") — see person_name_with_id. The - # series metadata write creates the person under exactly - # this name, so only that form is searched. - search_names = [person_name_with_id( - name, mal_id=entry.get("mal_id"), - al_id=entry.get("al_id"))] - else: - # Staff: cleaned (XML-safe) name first; if Kavita stores - # the legacy comma form, retry with the raw MAL name. - search_names = [name] - if raw_name and raw_name != name: - search_names.append(raw_name) - - matches: list[dict] = [] - for search_name in search_names: - if not search_name: - continue - matches = self._find_kavita_person(search_name) - if matches: - break - - if not matches: - result["not_found"] += 1 - continue - - changed = self._apply_mal_data( - matches[0], entry, kind, resolver, - update_cover=update_covers, - update_desc=update_descriptions, - errors=result["errors"]) - result["updated" if changed else "skipped"] += 1 + source, tracker_id = parsed + item = run.begin_item(name) + ok = True + try: + category = self._apply_to_person( + person, source, tracker_id, item, + update_cover=update_covers, + update_desc=update_descriptions, + errors=result["errors"]) + result[category] += 1 + ok = category != "conflicts" + except Exception as exc: + result["errors"].append(f"{name}: {exc}") + ok = False + finally: + item.finish(ok=ok) + run.finish() + print(f"[persons] trigger={trigger} updated={result['updated']} " + f"skipped={result['skipped']} not_found={result['not_found']} " + f"conflicts={result['conflicts']} errors={len(result['errors'])}", + flush=True) return result # ------------------------------------------------------------------ - # Internal: Kavita person search + # Internal: apply tracker data to one person # ------------------------------------------------------------------ - def _find_kavita_person(self, name: str) -> list[dict]: + def _apply_to_person(self, person: dict, source: str, tracker_id: int, + item, *, update_cover: bool, update_desc: bool, + errors: list) -> str: """ - Searches Kavita for persons matching `name`. - - Checks both the main name and any stored aliases. - Returns persons sorted by similarity, filtered by min_name_score. - Results are cached per (normalised) query name. + Applies MAL/AniList character data to one Kavita person. + Returns the result category: "updated" | "skipped" | "not_found" + | "conflicts". """ - key = name.lower().strip() - if key in self._person_search_cache: - return self._person_search_cache[key] - - try: - persons = self._client.search_persons(name) - except requests.RequestException: - self._person_search_cache[key] = [] - return [] - - scored = [] - for p in persons: - candidates = [p.get("name")] + list(p.get("aliases") or []) - scored.append((best_similarity(key, candidates), p)) - scored.sort(key=lambda pair: pair[0], reverse=True) - filtered = [p for score, p in scored if score >= self._min_score] - self._person_search_cache[key] = filtered - return filtered - - # ------------------------------------------------------------------ - # Internal: apply MAL data to a single Kavita person - # ------------------------------------------------------------------ - def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str, - resolver, *, - update_cover: bool, update_desc: bool, - errors: "list | None" = None) -> bool: - """ - Applies tracker data (MAL or AniList) to one Kavita person record. - - Fields updated - -------------- - - malId : set when the entry carries a MAL ID and it differs - - aniListId : set when the entry carries an AniList ID and it differs - - description: set when empty and the tracker provides a description - - cover image: uploaded when not locked and no prior sync cover exists - - Returns True if any change was made. Failures are appended to the - `errors` list (if provided) instead of being silently swallowed. - """ - person_id: "int | None" = person.get("id") + person_id = person.get("id") if not person_id: - return False + return "skipped" - person_name = person.get("name") or "" + resolver = self._mal if source == "mal" else self._al + id_field = "malId" if source == "mal" else "aniListId" + current = person.get(id_field) or 0 - # Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id. - mal_id: "int | None" = mal_entry.get("mal_id") - al_id: "int | None" = mal_entry.get("al_id") - entity_id = mal_id or al_id # used for resolver detail calls + # The name is authoritative; a record linked to a different id is a + # data conflict — never overwrite it. + if current and current != tracker_id: + errors.append( + f"conflict: '{person.get('name')}' (#{person_id}) has " + f"{id_field}={current} but name says {tracker_id} — skipped") + return "conflicts" - current_mal_id: int = person.get("malId") or 0 - current_al_id: int = person.get("aniListId") or 0 + with item.measure("detail_fetch"): + details = resolver.get_character_details(tracker_id) + if not details: + return "not_found" - # Collision guard: the Kavita person is already linked to a - # *different* tracker entity — same display name, different - # character/person. Never overwrite; first writer wins. - if ((mal_id and current_mal_id and current_mal_id != mal_id) - or (al_id and current_al_id and current_al_id != al_id)): - if errors is not None: - errors.append( - f"conflict: '{person_name}' (#{person_id}) is linked to " - f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} " - f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} " - f"— skipped") - return False - - needs_mal_id = bool(mal_id and current_mal_id != mal_id) - needs_al_id = bool(al_id and current_al_id != al_id) - - # ------ Lazy description fetch ----------------------------------- - description: "str | None" = None + need_id = not current # write id when still missing + description = None if update_desc and not (person.get("description") or "").strip(): - if entity_id: - if kind == "character": - details = resolver.get_character_details(entity_id) - if details: - description = _build_character_description(details) or None - else: - details = resolver.get_person_details(entity_id) - if details: - description = _build_person_description(details) or None + description = _build_character_description(details) or None + need_desc = bool(description) - needs_desc = bool(description) - - # ------ Metadata update ------------------------------------------ changed = False - if needs_mal_id or needs_al_id or needs_desc: - payload: dict = { + if need_id or need_desc: + payload = { "id": person_id, - "name": person_name, - # MUST stay a boolean — the cover image itself is uploaded - # separately via POST /api/Upload/person (below). Putting a - # URL here makes Kavita reject the whole payload with HTTP 400. + "name": person.get("name") or "", + # MUST stay a boolean — the cover is uploaded separately. "coverImageLocked": bool(person.get("coverImageLocked", False)), "aliases": person.get("aliases") or [], "description": description or person.get("description"), - "malId": mal_id if needs_mal_id else (current_mal_id or None), - "aniListId": al_id if needs_al_id else (current_al_id or None), + "malId": tracker_id if source == "mal" + else (person.get("malId") or None), + "aniListId": tracker_id if source == "al" + else (person.get("aniListId") or None), } try: - self._client.update_person(payload) + with item.measure("person_update"): + self._client.update_person(payload) changed = True - except requests.RequestException as e: - if errors is not None: - errors.append( - f"Person/update failed for #{person_id} " - f"'{person_name}': {e}") + except requests.RequestException as exc: + errors.append(f"update failed #{person_id} " + f"'{person.get('name')}': {exc}") - # ------ Cover image upload ---------------------------------------- - # Upload whenever: - # - caller requested cover updates - # - cover is NOT locked (user did not manually pin it) - # - we have not already uploaded this exact tracker entity's image - # (i.e. the tracked ID differs OR there is no cover yet). + # Cover: upload when not locked and not already set for this id. if update_cover and not person.get("coverImageLocked"): - image_url = mal_entry.get("image_url") - already_uploaded = ( - entity_id is not None - and (current_mal_id == mal_id or current_al_id == al_id) - and bool(person.get("coverImage")) - ) - if image_url and not already_uploaded: + image_url = details.get("image_url") + already = bool(current) and bool(person.get("coverImage")) + if image_url and not already: try: - self._client.upload_person_cover(person_id, image_url) + with item.measure("cover_upload"): + self._client.upload_person_cover(person_id, image_url) changed = True - except requests.RequestException as e: - if errors is not None: - errors.append( - f"cover upload failed for #{person_id} " - f"'{person_name}' ({image_url}): {e}") + except requests.RequestException as exc: + errors.append(f"cover upload failed #{person_id} " + f"'{person.get('name')}': {exc}") - return changed + return "updated" if changed else "skipped" # -------------------------------------------------------------------------- -# Module helpers: description builders +# Module helper: character description builder # -------------------------------------------------------------------------- -def _format_birthday(birthday: str) -> str: - """Converts an ISO 8601 birthday string to "D Month YYYY".""" - if not birthday: - return "" - try: - dt = datetime.date.fromisoformat(birthday.split("T")[0]) - return f"{dt.day} {dt.strftime('%B %Y')}" - except (ValueError, AttributeError): - return "" - - def _build_character_description(details: dict) -> str: """ - Builds a Kavita-safe HTML description for a MAL character. + Builds a Kavita-safe HTML description for a MAL / AniList character. - Top line: "Favorites: N" as a link to the character's MAL page. + Top line: "Favorites: N" linked to the character page (when available). Remainder: the character's `about` text converted to HTML paragraphs. """ parts: list[str] = [] url = details.get("url") or "" favorites = details.get("favorites") if url and favorites is not None: - parts.append(f'

Favorites: {favorites:,}

') - about = (details.get("about") or "").strip() - if about: - parts.append(paragraphs_to_html(about)) - return "
".join(parts) - - -def _build_person_description(details: dict) -> str: - """ - Builds a Kavita-safe HTML description for a MAL person (mangaka / staff). - - Renders a summary table (given name, family name, birthday, website, - member favorites) followed by the `about` biography as HTML paragraphs. - """ - _TD = 'style="padding-right:1.5em"' - rows: list[str] = [] - - given = (details.get("given_name") or "").strip() - family = (details.get("family_name") or "").strip() - birthday = details.get("birthday") or "" - favorites = details.get("favorites") - website = (details.get("website_url") or "").strip() - url = (details.get("url") or "").strip() - - if given: - rows.append(f"Given name{given}") - if family: - rows.append(f"Family name{family}") - bday_str = _format_birthday(birthday) - if bday_str: - rows.append(f"Birthday{bday_str}") - if website: - rows.append( - f'Website' - f'{website}' - ) - if favorites is not None: - fav_cell = (f'{favorites:,}' if url - else f"{favorites:,}") - rows.append( - f"Member Favorites{fav_cell}") - - parts: list[str] = [] - if rows: - parts.append(f'{"".join(rows)}
') + parts.append(f'

' + f'Favorites: {favorites:,}

') about = (details.get("about") or "").strip() if about: parts.append(paragraphs_to_html(about)) @@ -418,18 +231,7 @@ if __name__ == "__main__": client = KavitaClient(os.environ["KAVITA_URL"], os.environ["KAVITA_API_KEY"]) updater = KavitaPersonUpdater(client) - - mal = MALResolver() - mal_id = mal.find_mal_id("よふかしのうた") - print("MAL ID:", mal_id) - - if mal_id: - result = updater.update_for_manga(mal_id) - print("Characters:", {k: v for k, v in result["characters"].items() - if k != "errors"}) - print("Staff :", {k: v for k, v in result["staff"].items() - if k != "errors"}) - # Surface any non-fatal upload / API errors for debugging - for section in ("characters", "staff"): - for err in result[section].get("errors", []): - print(f"[{section}] {err}") + report = updater.update_all_persons(trigger="cron") + print(report) + for err in report["errors"]: + print(" ", err) diff --git a/src/MangaBakaWorksResolver.py b/src/MangaBakaWorksResolver.py index 5415071..fa3fbc5 100644 --- a/src/MangaBakaWorksResolver.py +++ b/src/MangaBakaWorksResolver.py @@ -151,9 +151,10 @@ class MangaBakaWorksResolver: Returns volume-level works for a series, filtered to those that have a usable cover image. - Non-empty results are cached per series; empty results are not, so - works added on MangaBaka later become visible without restarting - the (long-running) process. + Results are cached per series — including empty results, so a series + without works is not re-paginated for every chapter of a move run. + The periodic cover updater calls clear_cache() before each scan, so + works added on MangaBaka later are still picked up there. """ if not series_id: return [] @@ -165,8 +166,7 @@ class MangaBakaWorksResolver: # Discard works that carry no usable cover works_with_cover = [w for w in all_works if w.get("images")] - if works_with_cover: - self._cache[series_id] = works_with_cover + self._cache[series_id] = works_with_cover return works_with_cover def get_work_for_volume(self, series_id: str, volume) -> "dict | None": @@ -228,10 +228,10 @@ class MangaBakaWorksResolver: if url: result[norm] = url - # Empty results are not cached — covers added on MangaBaka later - # become visible without restarting the long-running process. - if result: - self._images_cache[series_id] = result + # Cache even an empty result so a series without volume images is not + # re-paginated for every chapter. The periodic cover updater clears + # this cache before each scan, so newly added images are still found. + self._images_cache[series_id] = result return result def get_cover_for_volume_from_images(self, series_id: str, diff --git a/src/PerfStats.py b/src/PerfStats.py new file mode 100644 index 0000000..03b016d --- /dev/null +++ b/src/PerfStats.py @@ -0,0 +1,254 @@ +""" +perf_stats.py +============= + +Generic run/step performance profiler with JSON persistence, shared by the +move pipeline and the periodic updaters (volume/cover, persons). + +Each run is a tree of *items* (e.g. series -> chapter, or one person) and +every item carries named *step* timings. A run also carries free-form +``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person +updater). + +Data model (one entry per run, newest first):: + + { + "runs": [ + { + "runId": "…", + "startedAt": 1700000000, + "finishedAt": 1700000123, + "totalSeconds": 123.4, + "meta": {"trigger": "cron"}, + "itemCount": 2, # top-level items + "leafCount": 31, # items without children + "stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...}, + "items": [ + {"label": "Call of the Night", "totalSeconds": 60.2, "ok": true, + "steps": {"fetch_metadata": 1.2}, + "items": [ + {"label": "1", "totalSeconds": 11.5, "ok": true, + "steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []} + ]} + ] + } + ] + } + +Usage:: + + perf = PerfStats(path) # path=None -> disabled + run = perf.begin_run(meta={"trigger": "cron"}) + item = run.begin_item("Call of the Night") + with item.measure("fetch_metadata"): + ... + chap = item.begin_item("1") + with chap.measure("pack_cbz"): + ... + chap.finish() + item.finish() # flushes the run to disk + run.finish() + +When ``path`` is None every recorder is a no-op and nothing is written, so +the profiler can be left permanently wired in at negligible cost. The run +is flushed after every top-level item finishes, so a long run is observable +live and survives a crash mid-run. +""" + +from __future__ import annotations + +import json +import threading +import time +import uuid +from contextlib import contextmanager +from pathlib import Path + + +# Keep the JSON small: only the most recent runs are retained on disk. +_MAX_RUNS = 30 + + +class _StepTimer: + """ + Base recorder: accumulates ``{step_name: seconds}`` and tracks its own + wall-clock lifetime. ``enabled=False`` turns every method into a no-op. + """ + + def __init__(self, enabled: bool = True): + self.steps: dict[str, float] = {} + self._enabled = enabled + self._t0 = time.monotonic() + + @contextmanager + def measure(self, name: str): + """Context manager timing a named step (accumulates on repeat use).""" + if not self._enabled: + yield + return + start = time.monotonic() + try: + yield + finally: + self.steps[name] = round( + self.steps.get(name, 0.0) + (time.monotonic() - start), 4) + + def elapsed(self) -> float: + return round(time.monotonic() - self._t0, 4) + + +class ItemRecorder(_StepTimer): + """ + One node in a run's item tree. Has its own step timings and may contain + nested child items (e.g. a series item containing chapter items). + """ + + def __init__(self, run: "RunRecorder", label: str, *, + parent: "ItemRecorder | None" = None, + enabled: bool = True): + super().__init__(enabled) + self._run = run + self._label = label + self._parent = parent + self._children: list[dict] = [] + + def begin_item(self, label: str) -> "ItemRecorder": + return ItemRecorder(self._run, label, parent=self, + enabled=self._enabled) + + def finish(self, *, ok: bool = True) -> None: + if not self._enabled: + return + node = { + "label": self._label, + "totalSeconds": self.elapsed(), + "ok": ok, + "steps": self.steps, + "items": self._children, + } + if self._parent is not None: + self._parent._children.append(node) + else: + # Top-level item: attach to the run and persist progress. + self._run._items.append(node) + self._run.flush() + + +class RunRecorder: + """Top-level recorder for one full run.""" + + def __init__(self, stats: "PerfStats", meta: "dict | None" = None, + enabled: bool = True): + self._stats = stats + self._enabled = enabled + self._meta = meta or {} + self._items: list[dict] = [] + self._started = time.time() + self._t0 = time.monotonic() + self._run_id = uuid.uuid4().hex + + def begin_item(self, label: str) -> ItemRecorder: + return ItemRecorder(self, label, parent=None, enabled=self._enabled) + + def _snapshot(self) -> dict: + step_totals: dict[str, float] = {} + leaf_count = 0 + + def walk(node: dict) -> None: + nonlocal leaf_count + for step, secs in node["steps"].items(): + step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4) + if node["items"]: + for child in node["items"]: + walk(child) + else: + leaf_count += 1 + + for item in self._items: + walk(item) + + return { + "runId": self._run_id, + "startedAt": round(self._started), + "finishedAt": round(time.time()), + "totalSeconds": round(time.monotonic() - self._t0, 4), + "meta": self._meta, + "itemCount": len(self._items), + "leafCount": leaf_count, + "stepTotals": step_totals, + "items": self._items, + } + + def flush(self) -> "dict | None": + """Writes the run's current state to disk (upsert by runId).""" + if not self._enabled: + return None + run = self._snapshot() + self._stats._upsert_run(run) + return run + + def finish(self) -> "dict | None": + """Persists the final run state. Returns the run dict.""" + return self.flush() + + +class PerfStats: + """ + Profiler facade + JSON persistence. + + Parameters + ---------- + path : Destination JSON file. None disables the profiler entirely + (every recorder becomes a no-op and nothing is written). + """ + + def __init__(self, path=None): + self._path = Path(path) if path else None + self._lock = threading.Lock() + + @property + def enabled(self) -> bool: + return self._path is not None + + def begin_run(self, meta: "dict | None" = None) -> RunRecorder: + return RunRecorder(self, meta=meta, enabled=self.enabled) + + # ------------------------------------------------------------------ + # Read / write + # ------------------------------------------------------------------ + def all(self) -> dict: + """Returns the persisted runs ({"runs": [...]}); newest first.""" + if not self._path or not self._path.is_file(): + return {"runs": []} + try: + with self._path.open("r", encoding="utf-8") as f: + data = json.load(f) + except (OSError, json.JSONDecodeError): + return {"runs": []} + if not isinstance(data, dict) or not isinstance(data.get("runs"), list): + return {"runs": []} + return data + + def _upsert_run(self, run: dict) -> None: + """ + Inserts a new run (newest first) or replaces the existing entry with + the same runId — so incremental flushes during a run update one entry + rather than appending a duplicate after every item. + """ + if not self._path: + return + with self._lock: + runs = self.all()["runs"] + run_id = run.get("runId") + for i, existing in enumerate(runs): + if existing.get("runId") == run_id: + runs[i] = run + break + else: + runs.insert(0, run) # newest first + del runs[_MAX_RUNS:] # cap history + self._path.parent.mkdir(parents=True, exist_ok=True) + tmp = self._path.with_suffix(self._path.suffix + ".tmp") + with tmp.open("w", encoding="utf-8") as f: + json.dump({"runs": runs}, f, ensure_ascii=False, indent=2) + tmp.replace(self._path) diff --git a/src/PerfWebPage.py b/src/PerfWebPage.py new file mode 100644 index 0000000..619de73 --- /dev/null +++ b/src/PerfWebPage.py @@ -0,0 +1,160 @@ +""" +perf_web_page.py +================ + +Shared HTML page for browsing PerfStats output, used by both container web +UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads +``/api/perf/`` and renders each run's step totals plus the nested item +tree (series -> chapter, or one person, …) and the run trigger from meta. + +``tabs`` is a list of ``(label, name)`` pairs for cross-links between the +available perf datasets in that container. +""" + +from __future__ import annotations + + +_PERF_PAGE = """ + + + + __PERF_NAME__ performance + + + +

Performance: __PERF_NAME__ ◂ back

+
__PERF_TABS__
+
+ + + +
+ +
+ + + + +""" + + +def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str: + """ + Returns the perf page HTML for dataset ``name``. + + tabs : list of (label, dataset_name) for the cross-link bar. + """ + tab_html = " ".join( + f'{label}' for label, n in tabs) + return (_PERF_PAGE + .replace("__PERF_TABS__", tab_html) + .replace("__PERF_NAME__", name)) diff --git a/src/TextUtils.py b/src/TextUtils.py index 2bfe7ec..2a5e50e 100644 --- a/src/TextUtils.py +++ b/src/TextUtils.py @@ -70,3 +70,30 @@ def person_name_with_id(name: str, *, if al_id: return f"{name} (AL {al_id})" return name + + +# Matches the suffix produced by person_name_with_id at the end of a name. +_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE) + + +def parse_person_tracker_id(name: str) -> "tuple[str, int] | None": + """ + Inverse of person_name_with_id: extracts the tracker id from a + disambiguated Kavita person name. + + "Rem (MAL 118737)" -> ("mal", 118737) + "Subaru (AL 88311)" -> ("al", 88311) + "Kotoyama" -> None (no id suffix — e.g. an author/staff record) + + Returns ("mal" | "al", id) or None. + """ + if not name: + return None + m = _TRACKER_ID_RE.search(name) + if not m: + return None + source = "mal" if m.group(1).upper() == "MAL" else "al" + try: + return source, int(m.group(2)) + except ValueError: + return None diff --git a/src/ln/LightNovelOrchestrator.py b/src/ln/LightNovelOrchestrator.py index 339819e..f866490 100644 --- a/src/ln/LightNovelOrchestrator.py +++ b/src/ln/LightNovelOrchestrator.py @@ -192,14 +192,9 @@ class LightNovelOrchestrator: except Exception as exc: return {"ok": False, "error": f"series update failed: {exc}"} - # Persons - try: - person_report = self._person_updater.update_for_manga( - built.get("malId"), - al_manga_id=built.get("anilistId"), - ) - except Exception as exc: - person_report = {"error": str(exc)} + # Person sync no longer runs per series — it has its own global, + # id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons) + # on a separate cron schedule. # Relationships + collection try: @@ -221,10 +216,20 @@ class LightNovelOrchestrator: "title": cached_title, "mangabakaId": built.get("mangabakaId"), "series": series_report, - "persons": person_report, "relationships": relation_report, } + # ------------------------------------------------------------------ + # Person sync (global, id-based — independent of series updates) + # ------------------------------------------------------------------ + def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict: + """ + Runs the global, id-based person updater over every Kavita person. + Covers both manga and light-novel libraries in one pass. + """ + return self._person_updater.update_all_persons( + trigger=trigger, perf=perf) + def update_all(self, library_ids: "list[int] | None") -> dict: """Updates every cached series in the given libraries.""" if library_ids is None: diff --git a/src/ln/MatchesWebApp.py b/src/ln/MatchesWebApp.py index faa7caa..b57bf86 100644 --- a/src/ln/MatchesWebApp.py +++ b/src/ln/MatchesWebApp.py @@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response from MatchesCache import MatchesCache from LightNovelMetadataBuilder import pick_thumbnail_url +from PerfWebPage import render_perf_page + +# Only the person dataset exists in the LN container. +_PERF_TABS = [("persons", "person")] def _int_list(values) -> list[int]: @@ -97,7 +101,9 @@ _INDEX_HTML = r""" + + Performance ▸ @@ -497,12 +503,25 @@ async function startUpdateAll() { } } +async function startSyncPersons() { + if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return; + setStatus("Person sync started"); + try { + const r = await fetch("/api/persons/sync", { method: "POST" }); + if (!r.ok) throw new Error(await r.text()); + startPolling(); + } catch (err) { + setStatus("Person sync failed: " + err.message); + } +} + document.getElementById("filter").addEventListener("input", applyFilter); document.getElementById("libraries").addEventListener("change", applyFilter); document.getElementById("reload").addEventListener("click", load); document.getElementById("batchSave").addEventListener("click", batchSave); document.getElementById("build").addEventListener("click", startBuild); document.getElementById("updateAll").addEventListener("click", startUpdateAll); +document.getElementById("syncPersons").addEventListener("click", startSyncPersons); for (const th of document.querySelectorAll("th.sortable")) { th.addEventListener("click", () => { const col = th.dataset.col; @@ -581,11 +600,13 @@ class MatchesWebApp: def __init__(self, cache: MatchesCache, *, orchestrator=None, default_library_ids: "list[int] | None" = None, + person_perf=None, host: str = "0.0.0.0", port: int = 8080): self._cache = cache self._orchestrator = orchestrator self._defaults = list(default_library_ids or []) + self._person_perf = person_perf self._host = host self._port = port self._job = _JobState() @@ -757,8 +778,38 @@ class MatchesWebApp: return Response("a job is already running", status=409) return jsonify({"started": label}) + @app.post("/api/persons/sync") + def api_persons_sync(): + if self._orchestrator is None: + return Response("no orchestrator configured", status=503) + + def task(job: _JobState): + report = self._orchestrator.sync_persons( + trigger="ln", perf=self._person_perf) + job.append(f"updated={report['updated']} " + f"skipped={report['skipped']} " + f"not_found={report['not_found']} " + f"conflicts={report['conflicts']}") + for err in report.get("errors", []): + job.append(f" {err}") + + if not self._job.start("person sync", task): + return Response("a job is already running", status=409) + return jsonify({"started": "person sync"}) + @app.get("/api/status") def api_status(): snap = self._job.snapshot() snap["defaults"] = self._defaults return jsonify(snap) + + @app.get("/perf") + @app.get("/perf/") + def perf_page(name: str = "person") -> Response: + return Response(render_perf_page(name, _PERF_TABS), + mimetype="text/html; charset=utf-8") + + @app.get("/api/perf/") + def api_perf(name: str): + stats = self._person_perf if name == "person" else None + return jsonify(stats.all() if stats is not None else {"runs": []}) diff --git a/src/manga/ComicInfoBuilder.py b/src/manga/ComicInfoBuilder.py index d337f36..50d09a0 100644 --- a/src/manga/ComicInfoBuilder.py +++ b/src/manga/ComicInfoBuilder.py @@ -40,6 +40,7 @@ from __future__ import annotations import re import sys import xml.etree.ElementTree as ET +from contextlib import contextmanager from pathlib import Path import requests @@ -65,6 +66,16 @@ except ImportError: _HAS_PIL = False +@contextmanager +def _no_measure(): + """No-op stand-in for a perf recorder's measure() context manager.""" + yield + + +# Sentinel marking a per-chapter memo slot as "not computed yet". +_UNSET = object() + + # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- @@ -218,10 +229,22 @@ class ComicInfoBuilder: self._matches_cache = matches_cache self._cover_cache = cover_cache or _default_cover_cache() + # Optional performance recorder (duck-typed: any object with a + # .measure(name) context manager). The mover sets this per chapter; + # when None, _measure() is a no-op so the builder stays decoupled + # from PerfStats and works standalone (e.g. the cover updater). + self.perf = None + self._metadata: "dict | None" = None self._pages: list[dict] = [] self._cover_path: "Path | None" = None self._suwayomi_data: dict = {} + # Per-chapter memo for _determine_volume (resolved up to 3x/chapter + # otherwise: cover download, explicit volume step, XML build). + self._volume_memo = _UNSET + # Per-series cache for full series fetches by id (parent series for + # SeriesGroup, merged-series redirects) — reused across all chapters. + self._series_by_id_cache: dict[str, dict] = {} # ----- Repr ----------------------------------------------------------- def __repr__(self) -> str: @@ -261,6 +284,13 @@ class ComicInfoBuilder: self._pages = [] self._cover_path = None self._suwayomi_data = {} + self._volume_memo = _UNSET + + def _measure(self, name: str): + """Times a named step on the attached recorder; no-op when unset.""" + if self.perf is not None: + return self.perf.measure(name) + return _no_measure() # ====================================================================== # Public XML functions @@ -305,11 +335,13 @@ class ComicInfoBuilder: if not folder.is_dir(): raise NotADirectoryError(f"Folder not found: {folder}") - self._suwayomi_data = self._read_existing_comicinfo(folder) + with self._measure("read_comicinfo"): + self._suwayomi_data = self._read_existing_comicinfo(folder) self._cover_path = None if download_cover: - self._cover_path = self._download_cover(folder, cover_filename) + with self._measure("cover"): + self._cover_path = self._download_cover(folder, cover_filename) cover_resolved = self._cover_path.resolve() if self._cover_path else None story_images: list[Path] = [] @@ -329,20 +361,23 @@ class ComicInfoBuilder: ordered.extend((img, "Story") for img in story_images) self._pages = [] - for index, (img_path, page_type) in enumerate(ordered): - width, height = self._image_dimensions(img_path) - try: - size = img_path.stat().st_size - except OSError: - size = None - self._pages.append({ - "image": index, - "type": page_type, - "width": width, - "height": height, - "size": size, - "double": bool(width and height and width > height), - }) + # Probing every page for its pixel dimensions reads each file — on a + # network share this is often the dominant per-chapter cost. + with self._measure("image_dimensions"): + for index, (img_path, page_type) in enumerate(ordered): + width, height = self._image_dimensions(img_path) + try: + size = img_path.stat().st_size + except OSError: + size = None + self._pages.append({ + "image": index, + "type": page_type, + "width": width, + "height": height, + "size": size, + "double": bool(width and height and width > height), + }) return { "page_count": len(self._pages), @@ -413,12 +448,20 @@ class ComicInfoBuilder: return series def _fetch_series_by_id(self, series_id) -> dict: + # Cached per builder (i.e. per series): SeriesGroup resolution calls + # this for the parent on every chapter — without the cache that is + # one MangaBaka request per chapter for the same parent id. + key = str(series_id) + cached = self._series_by_id_cache.get(key) + if cached is not None: + return cached url = f"{self.api_base_url}/series/{series_id}" resp = self._session.get(url, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") if not data: raise RuntimeError(f"Series with ID {series_id} not found.") + self._series_by_id_cache[key] = data return data # ====================================================================== @@ -554,6 +597,18 @@ class ComicInfoBuilder: # Volume determination # ====================================================================== def _determine_volume(self) -> "str | None": + """ + Resolves the volume for the current chapter, memoized per chapter. + + The result is reused across the three call sites per chapter (cover + download, explicit volume step, XML build); the memo is cleared + whenever the chapter or manga title changes (see _clear_results). + """ + if self._volume_memo is _UNSET: + self._volume_memo = self._resolve_volume() + return self._volume_memo + + def _resolve_volume(self) -> "str | None": """ Resolves the volume for the current chapter via MangaDex. Falls back to estimation when the chapter is absent from MangaDex. diff --git a/src/manga/KavitaVolumeCoverUpdater.py b/src/manga/KavitaVolumeCoverUpdater.py index aba084e..6273724 100644 --- a/src/manga/KavitaVolumeCoverUpdater.py +++ b/src/manga/KavitaVolumeCoverUpdater.py @@ -45,7 +45,6 @@ from __future__ import annotations import io import sys -import threading import xml.etree.ElementTree as ET import zipfile from datetime import datetime @@ -67,7 +66,7 @@ from MatchesCache import MatchesCache from SuwayomiMover import (_load_chapter_index, _save_chapter_index, _sanitize_dirname, _normalise_volume_value) from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit -from CronSchedule import CronSchedule +from PerfStats import PerfStats from CoverCache import CoverCache, _IMAGE_EXTS try: @@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater: request_timeout : HTTP timeout in seconds. log_path : File that receives one line per updated chapter. Default: /volume_updater.log - schedule : Cron expression (5 fields) defining when scans run, - e.g. "0 19 * * 1,4" = 19:00 every Monday and - Thursday. Evaluated in local time — set the TZ env - var inside Docker. Default: "0 19 * * 1,4". cover_cache_dir : Directory for the persistent cover cache. None -> temporary cache, deleted at process exit. + perf_stats : Optional PerfStats instance for per-step timing. + + Scheduling lives outside this class (see CronRunner); call update_all() + on whatever cadence you like. """ def __init__(self, @@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater: request_timeout: int = 30, api_base_url: str = "https://api.mangabaka.dev/v1", log_path=None, - schedule: str = "0 19 * * 1,4", - cover_cache_dir=None): + cover_cache_dir=None, + perf_stats: "PerfStats | None" = None): self._dst = Path(kavita_path) self._matches_cache = matches_cache self._language = language @@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater: self._api_base_url = api_base_url.rstrip("/") self._log_path = (Path(log_path) if log_path else self._dst / "volume_updater.log") - self._cron = CronSchedule(schedule) + self._perf = perf_stats or PerfStats(None) session = requests.Session() session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0") @@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater: self._cover_cache = CoverCache( cover_cache_dir, session=session, request_timeout=request_timeout) - self._stop = threading.Event() - self._thread: "threading.Thread | None" = None - - # ------------------------------------------------------------------ - # Cron API (mirrors SuwayomiFolderWatcher) - # ------------------------------------------------------------------ - def start(self) -> None: - """Starts the periodic scan thread. Non-blocking.""" - if self._thread is not None and self._thread.is_alive(): - return - self._stop.clear() - self._thread = threading.Thread( - target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True) - self._thread.start() - print(f"[{_now()}] [updater] scanning {self._dst} " - f"on cron '{self._cron.expression}'", flush=True) - - def stop(self) -> None: - """Stops the scan thread (current scan finishes its series first).""" - self._stop.set() - if self._thread is not None: - self._thread.join(timeout=10) - - def wait(self) -> None: - """Blocks the calling thread until stop() is invoked.""" - self._stop.wait() - - def _loop(self) -> None: - while not self._stop.is_set(): - next_run = self._cron.next_after(datetime.now()) - wait = max(0.0, (next_run - datetime.now()).total_seconds()) - print(f"[{_now()}] [updater] next scheduled scan: " - f"{next_run.isoformat(timespec='minutes')}", flush=True) - if self._stop.wait(wait): - break - - try: - summary = self.update_all() - print(f"[{_now()}] [updater] scan done: " - f"{summary['series_updated']} series / " - f"{summary['chapters_updated']} chapters updated", - flush=True) - except Exception as exc: - print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True) - # ------------------------------------------------------------------ # Public scan API # ------------------------------------------------------------------ @@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater: self._vol_resolver.clear_cache() self._works_resolver.clear_cache() - for series_dir in sorted(self._dst.iterdir()): - if self._stop.is_set(): - break - if not series_dir.is_dir(): - continue - summary["series_scanned"] += 1 - try: - updated = self.update_series(series_dir) - except Exception as exc: - print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True) - continue - if updated: - summary["series_updated"] += 1 - summary["chapters_updated"] += updated + run = self._perf.begin_run() + try: + for series_dir in sorted(self._dst.iterdir()): + if not series_dir.is_dir(): + continue + summary["series_scanned"] += 1 + try: + updated = self.update_series(series_dir, run) + except Exception as exc: + print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True) + continue + if updated: + summary["series_updated"] += 1 + summary["chapters_updated"] += updated + finally: + run.finish() return summary - def update_series(self, series_dir: Path) -> int: + def update_series(self, series_dir: Path, run=None) -> int: """ Updates one series folder. Returns the number of updated chapters. @@ -300,20 +256,24 @@ class KavitaVolumeCoverUpdater: md = builder.fetch_metadata() series_id = str(md.get("id") or "") + series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name) + # Resolve volumes for all null-volume chapters first (API only). updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None} - for num in sorted(missing, key=_chapter_sort_value): - builder.chapter = num - try: - volume = builder._determine_volume() - except Exception: - volume = None - if not volume: - continue - updates[num] = {"volume": volume, - "cover": self._fetch_cover(series_id, volume)} + with series_rec.measure("resolve_volumes"): + for num in sorted(missing, key=_chapter_sort_value): + builder.chapter = num + try: + volume = builder._determine_volume() + except Exception: + volume = None + if not volume: + continue + updates[num] = {"volume": volume, + "cover": self._fetch_cover(series_id, volume)} if not updates: + series_rec.finish(ok=True) return 0 first = min(chapters, key=_chapter_sort_value) @@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater: continue # The first chapter gets a full metadata rebuild (Kavita reads # series metadata from it); other chapters only a volume edit. - ok, cover_swapped = self._apply_update( - cbz, builder, num, - volume=up["volume"], cover=up["cover"], - full_rebuild=(num == first)) + chap_rec = series_rec.begin_item(num) + with chap_rec.measure("archive_rewrite"): + ok, cover_swapped = self._apply_update( + cbz, builder, num, + volume=up["volume"], cover=up["cover"], + full_rebuild=(num == first)) + chap_rec.finish(ok=ok) if not ok: continue entry["volume"] = _normalise_volume_value(up["volume"]) @@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater: first_entry = chapters.get(first) or {} cbz = series_dir / (first_entry.get("archiveName") or "") if first_entry.get("archiveName") and cbz.is_file(): - ok, _ = self._apply_update( - cbz, builder, first, - volume=None, cover=None, full_rebuild=True) + chap_rec = series_rec.begin_item(f"{first} (refresh)") + with chap_rec.measure("archive_rewrite"): + ok, _ = self._apply_update( + cbz, builder, first, + volume=None, cover=None, full_rebuild=True) + chap_rec.finish(ok=ok) if ok: self._log(f"{series_dir.name} | chapter {first} | " f"first-chapter metadata refreshed | {cbz.name}") if updated: _save_chapter_index(series_dir, index) + series_rec.finish(ok=True) return updated # ------------------------------------------------------------------ @@ -545,10 +512,7 @@ if __name__ == "__main__": matches_cache=MatchesCache(MATCHES_PATH), ) - # One-shot scan (no cron thread): + # One-shot scan. Scheduling is handled externally via CronRunner + # (see main_manga.py). summary = updater.update_all() print(f"\n[updater] {summary}") - - # Or run on the cron schedule (default: 19:00 every Mon + Thu): - # updater.start() - # updater.wait() diff --git a/src/manga/MangadexVolumeResolver.py b/src/manga/MangadexVolumeResolver.py index 1c7c544..b121233 100644 --- a/src/manga/MangadexVolumeResolver.py +++ b/src/manga/MangadexVolumeResolver.py @@ -93,6 +93,9 @@ class MangaDexVolumeResolver: self._cache: dict[str, dict] = {} # Cache: manga_id -> {relation_type: [title, ...]} self._relations_cache: dict[str, dict] = {} + # Cache: title_lower -> manga_id (or None) — avoids repeating the + # MangaDex search for every chapter of the same series. + self._id_cache: dict[str, "str | None"] = {} # ---------------------------------------------------------------------- # Locate the manga ID @@ -105,15 +108,25 @@ class MangaDexVolumeResolver: if not title or not title.strip(): return None - resp = self._session.get( - f"{self.base_url}/manga", - params={"title": title, "limit": 5, - "contentRating[]": ["safe", "suggestive", - "erotica", "pornographic"]}, - timeout=self.request_timeout) - resp.raise_for_status() - results = resp.json().get("data") or [] + key = title.strip().lower() + if key in self._id_cache: + return self._id_cache[key] + + try: + resp = self._session.get( + f"{self.base_url}/manga", + params={"title": title, "limit": 5, + "contentRating[]": ["safe", "suggestive", + "erotica", "pornographic"]}, + timeout=self.request_timeout) + resp.raise_for_status() + results = resp.json().get("data") or [] + except requests.RequestException: + # Don't cache transient failures — allow a retry next time. + return None + if not results: + self._id_cache[key] = None return None def score(entry) -> float: @@ -130,7 +143,9 @@ class MangaDexVolumeResolver: return best results.sort(key=score, reverse=True) - return results[0].get("id") + manga_id = results[0].get("id") + self._id_cache[key] = manga_id + return manga_id # ---------------------------------------------------------------------- # Main function: retrieve and return volume / chapter data diff --git a/src/manga/MatchesWebApp.py b/src/manga/MatchesWebApp.py index b4734c3..748f8c2 100644 --- a/src/manga/MatchesWebApp.py +++ b/src/manga/MatchesWebApp.py @@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response from MatchesCache import MatchesCache from ComicInfoBuilder import _pick_thumbnail_url +from PerfWebPage import render_perf_page + +# Cross-link tabs shown on every perf page in the manga container. +_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")] _INDEX_HTML = """ @@ -71,6 +75,8 @@ _INDEX_HTML = """ + + Performance ▸ @@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => { btn.disabled = false; } }); +document.getElementById("syncPersons").addEventListener("click", async () => { + if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return; + const btn = document.getElementById("syncPersons"); + btn.disabled = true; + setStatus("Syncing persons… (running on the server)"); + try { + const r = await fetch("/api/persons/sync", { method: "POST" }); + if (!r.ok) throw new Error(await r.text()); + const d = await r.json(); + setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, " + + d.not_found + " not found, " + d.conflicts + " conflicts"); + } catch (err) { + setStatus("Person sync failed: " + err.message); + } finally { + btn.disabled = false; + } +}); for (const th of document.querySelectorAll("th.sortable")) { th.addEventListener("click", () => { const col = th.dataset.col; @@ -357,6 +380,8 @@ load(); """ + + class MatchesWebApp: """ Flask app exposing the MatchesCache. `mover` is required when you want @@ -367,14 +392,22 @@ class MatchesWebApp: def __init__(self, cache: MatchesCache, *, mover=None, + person_updater=None, + person_trigger: str = "web", + perf_stats=None, host: str = "0.0.0.0", port: int = 8080): self._cache = cache self._mover = mover + self._person_updater = person_updater + self._person_trigger = person_trigger + # perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}. + self._perf = perf_stats or {} self._host = host self._port = port self._build_lock = threading.Lock() self._move_lock = threading.Lock() + self._person_lock = threading.Lock() self._app = Flask(__name__) self._thread: "threading.Thread | None" = None self._register_routes() @@ -498,3 +531,31 @@ class MatchesWebApp: finally: self._move_lock.release() return jsonify({"results": results}) + + @app.post("/api/persons/sync") + def api_persons_sync(): + if self._person_updater is None: + return Response("no person updater configured", status=503) + if not self._person_lock.acquire(blocking=False): + return Response("person sync already running", status=409) + try: + report = self._person_updater.update_all_persons( + trigger=self._person_trigger, + perf=self._perf.get("person")) + except Exception as exc: + return Response(f"person sync failed: {exc}", status=500) + finally: + self._person_lock.release() + return jsonify(report) + + # Perf pages: /perf (move) + /perf/ for the updaters. + @app.get("/perf") + @app.get("/perf/") + def perf_page(name: str = "move") -> Response: + return Response(render_perf_page(name, _PERF_TABS), + mimetype="text/html; charset=utf-8") + + @app.get("/api/perf/") + def api_perf(name: str): + stats = self._perf.get(name) + return jsonify(stats.all() if stats is not None else {"runs": []}) diff --git a/src/manga/SuwayomiMover.py b/src/manga/SuwayomiMover.py index 51ae101..5cd31ad 100644 --- a/src/manga/SuwayomiMover.py +++ b/src/manga/SuwayomiMover.py @@ -64,11 +64,10 @@ from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver -from KavitaClient import KavitaClient -from KavitaPersonUpdater import KavitaPersonUpdater from MatchesCache import MatchesCache from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from CoverCache import CoverCache, _IMAGE_EXTS +from PerfStats import PerfStats _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') @@ -305,28 +304,30 @@ class SuwayomiMover: Expected layout: ///<Chapter N>/ kavita_path : Root of the Kavita library. Series sub-directories are created automatically. - kavita_base_url : Kavita server URL — required only for person sync, - e.g. "http://192.168.2.2:5000". - kavita_api_key : Kavita API key — required only for person sync. language : ComicInfo LanguageISO and SeriesSort language ("en"). request_timeout : HTTP timeout in seconds for all API / image requests. delete_source : Remove the source chapter folder after successful pack. cover_cache_dir : Directory for the persistent cover cache. None -> temporary cache, deleted at process exit. + perf_stats : Optional PerfStats instance for per-step timing. None + (default) disables profiling. + + Note: Kavita person sync is no longer done here — it runs as a separate, + global, id-based updater on its own cron schedule (KavitaPersonUpdater). + The mover only touches MangaBaka / MangaDex / MAL / AniList. """ def __init__(self, suwayomi_path, kavita_path, *, - kavita_base_url: "str | None" = None, - kavita_api_key: "str | None" = None, language: str = "en", request_timeout: int = 30, delete_source: bool = True, matches_cache: "MatchesCache | None" = None, api_base_url: str = "https://api.mangabaka.dev/v1", - cover_cache_dir=None): + cover_cache_dir=None, + perf_stats: "PerfStats | None" = None): self._src = Path(suwayomi_path) self._dst = Path(kavita_path) self._language = language @@ -334,6 +335,7 @@ class SuwayomiMover: self._delete_source = delete_source self._matches_cache = matches_cache self._api_base_url = api_base_url.rstrip("/") + self._perf = perf_stats or PerfStats(None) # Shared HTTP session and resolvers — reused across all series/chapters # to maximise cache hits and minimise API round-trips. @@ -352,16 +354,6 @@ class SuwayomiMover: self._cover_cache = CoverCache( cover_cache_dir, session=session, request_timeout=request_timeout) - self._person_updater: "KavitaPersonUpdater | None" = None - if kavita_base_url and kavita_api_key: - kavita_client = KavitaClient( - kavita_base_url, kavita_api_key, - request_timeout=request_timeout) - self._person_updater = KavitaPersonUpdater( - kavita_client, - mal_resolver=self._mal, - al_resolver=self._al) - # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ @@ -376,15 +368,19 @@ class SuwayomiMover: dict from _process_series_dir. """ results: dict = {} - for source_dir in sorted(self._src.iterdir()): - if not source_dir.is_dir(): - continue - for manga_dir in sorted(source_dir.iterdir()): - if not manga_dir.is_dir(): + run = self._perf.begin_run() + try: + for source_dir in sorted(self._src.iterdir()): + if not source_dir.is_dir(): continue - title = manga_dir.name - print(f"[SuwayomiMover] {title}") - results[title] = self._process_series_dir(manga_dir) + for manga_dir in sorted(source_dir.iterdir()): + if not manga_dir.is_dir(): + continue + title = manga_dir.name + print(f"[SuwayomiMover] {title}") + results[title] = self._process_series_dir(manga_dir, run) + finally: + run.finish() return results def process_series(self, manga_title: str) -> dict: @@ -400,7 +396,11 @@ class SuwayomiMover: continue candidate = source_dir / manga_title if candidate.is_dir(): - return self._process_series_dir(candidate) + run = self._perf.begin_run() + try: + return self._process_series_dir(candidate, run) + finally: + run.finish() raise FileNotFoundError( f"No Suwayomi directory found for '{manga_title}' under {self._src}") @@ -487,8 +487,9 @@ class SuwayomiMover: # ------------------------------------------------------------------ # Internal: series # ------------------------------------------------------------------ - def _process_series_dir(self, manga_dir: Path) -> dict: + def _process_series_dir(self, manga_dir: Path, run=None) -> dict: manga_title = manga_dir.name + series_rec = (run or self._perf.begin_run()).begin_item(manga_title) chapter_dirs = sorted( (d for d in manga_dir.iterdir() if d.is_dir()), @@ -539,7 +540,8 @@ class SuwayomiMover: md: "dict | None" = None mangabaka_title = manga_title try: - md = builder.fetch_metadata() + with series_rec.measure("fetch_metadata"): + md = builder.fetch_metadata() mangabaka_title = md.get("title") or manga_title except Exception as exc: print(f" [warn] metadata fetch failed: {exc}") @@ -571,7 +573,7 @@ class SuwayomiMover: chapter_results: list[dict] = [] for chapter_dir, _fields, chapter_num in pending: result = self._process_chapter( - builder, chapter_num, chapter_dir, dest_series) + builder, chapter_num, chapter_dir, dest_series, series_rec) chapter_results.append(result) status = "ok" if result["ok"] else f"ERROR: {result.get('error')}" print(f" Chapter {chapter_num}: {status}") @@ -582,25 +584,11 @@ class SuwayomiMover: } _save_chapter_index(dest_series, chapter_index) - # Sync Kavita persons once per series. - # Both MAL and AniList IDs come from MangaBaka's source map; - # AniList is used as fallback when MAL returns no characters/staff. - person_result: "dict | None" = None - if self._person_updater: - mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None) - or self._mal.find_mal_id(builder_title)) - al_id = ComicInfoBuilder._al_id_from_source(md) if md else None - if mal_id or al_id: - try: - person_result = self._person_updater.update_for_manga( - mal_id, al_manga_id=al_id) - print(f" Persons: chars={person_result['characters'].get('updated')} " - f"staff={person_result['staff'].get('updated')}") - except Exception as exc: - person_result = {"error": str(exc)} - print(f" Persons: ERROR {exc}") - - return {"chapters": chapter_results, "persons": person_result} + # Person sync no longer runs here — it has its own global, + # id-based updater on a separate cron schedule (see + # KavitaPersonUpdater.update_all_persons). + series_rec.finish() + return {"chapters": chapter_results} # ------------------------------------------------------------------ # Internal: chapter @@ -609,7 +597,8 @@ class SuwayomiMover: builder: ComicInfoBuilder, chapter_num: str, chapter_dir: Path, - dest_series: Path) -> dict: + dest_series: Path, + series_rec=None) -> dict: """ Generates ComicInfo.xml for one chapter, packs it to CBZ, and optionally removes the source folder. @@ -619,6 +608,11 @@ class SuwayomiMover: <Pages> element correctly points to the front cover). """ cbz_path = dest_series / f"{chapter_dir.name}.cbz" + chap_rec = (series_rec or self._perf.begin_run().begin_item("") + ).begin_item(chapter_num) + # add_pages_from_folder records its own sub-steps on this recorder. + builder.perf = chap_rec + ok = False try: builder.chapter = chapter_num builder.add_pages_from_folder(chapter_dir, cover_filename="000") @@ -626,32 +620,35 @@ class SuwayomiMover: # by add_pages_from_folder, so it's effectively free. Used by # the chapter index in the Kavita destination folder. try: - volume = builder._determine_volume() + with chap_rec.measure("volume"): + volume = builder._determine_volume() except Exception: volume = None - builder.save_xml(chapter_dir) - _pack_to_cbz(chapter_dir, cbz_path) + with chap_rec.measure("save_xml"): + builder.save_xml(chapter_dir) + with chap_rec.measure("pack_cbz"): + _pack_to_cbz(chapter_dir, cbz_path) if self._delete_source: - shutil.rmtree(chapter_dir) + with chap_rec.measure("delete_source"): + shutil.rmtree(chapter_dir) + ok = True return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True, "volume": volume} except Exception as exc: return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": False, "error": str(exc)} + finally: + builder.perf = None + chap_rec.finish(ok=ok) # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": - import os - # Local (no-Docker) smoke test. Adjust paths to your environment. - # Set the KAVITA_API_KEY env var — never commit API keys to the repo. SUWAYOMI_PATH = r"M:\config\downloads\mangas" KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test" - KAVITA_URL = "http://192.168.2.2:5000" - KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "") # matches.json lives next to this script during local testing. MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json" @@ -660,8 +657,6 @@ if __name__ == "__main__": mover = SuwayomiMover( SUWAYOMI_PATH, KAVITA_PATH, - kavita_base_url=KAVITA_URL, - kavita_api_key=KAVITA_KEY, delete_source=False, matches_cache=matches_cache, )