Performance and Person Updater Improvements #7

Merged
johannesbot merged 4 commits from time-measurement into master 2026-06-16 18:46:56 +02:00
16 changed files with 984 additions and 920 deletions
Showing only changes of commit 6ca1a245a3 - Show all commits
+6 -1
View File
@@ -11,13 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config
MANGA_WEB_PORT=8080 MANGA_WEB_PORT=8080
SETTLE_SECONDS=600 SETTLE_SECONDS=600
DELETE_SOURCE=true DELETE_SOURCE=true
# Periodic updaters (volume/cover + global person sync) run together on
# this cron. Sundays 10:00. Person updater also covers LN libraries.
UPDATER_ENABLED=true UPDATER_ENABLED=true
UPDATER_SCHEDULE=0 19 * * 1,4 UPDATER_SCHEDULE=0 10 * * 0
COVER_CACHE_PATH=/config/covers COVER_CACHE_PATH=/config/covers
PERF_PATH=/config/perf_stats.json PERF_PATH=/config/perf_stats.json
VOLUME_PERF_PATH=/config/volume_perf_stats.json
PERSON_PERF_PATH=/config/person_perf_stats.json
# Light-novel container (kavita-lightnovel-metadata-fetcher) # Light-novel container (kavita-lightnovel-metadata-fetcher)
HOST_LN_CONFIG_PATH=/path/to/ln-config HOST_LN_CONFIG_PATH=/path/to/ln-config
LN_WEB_PORT=8081 LN_WEB_PORT=8081
LN_LIBRARY_IDS=3,5 LN_LIBRARY_IDS=3,5
LN_UPDATER_ENABLED=true
+11 -6
View File
@@ -13,17 +13,18 @@ services:
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}" SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}" DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "/config/matches.json" MATCH_PATH: "/config/matches.json"
# Volume/cover back-fill updater # Periodic updaters (volume/cover back-fill + global person sync) run
# together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}" UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_LOG: "/config/volume_updater.log" UPDATER_LOG: "/config/volume_updater.log"
# Persistent cover cache (empty = temp dir, deleted on container stop) # Persistent cover cache (empty = temp dir, deleted on container stop)
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}" COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
# Per-step move timing stats (viewable at /perf); empty disables it # Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}" PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
# Timezone for the cron schedule — without this 10:00 means 10:00 UTC
TZ: "${TZ:-Europe/Berlin}" TZ: "${TZ:-Europe/Berlin}"
ports: ports:
- "${MANGA_WEB_PORT:-8080}:8080" - "${MANGA_WEB_PORT:-8080}:8080"
@@ -45,6 +46,10 @@ services:
LIBRARY_IDS: "${LN_LIBRARY_IDS}" LIBRARY_IDS: "${LN_LIBRARY_IDS}"
LANGUAGE: "${LANGUAGE:-en}" LANGUAGE: "${LANGUAGE:-en}"
MATCH_PATH: "/config/matches.json" MATCH_PATH: "/config/matches.json"
# Global person sync on cron (same default cadence as the manga side)
UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}"
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
TZ: "${TZ:-Europe/Berlin}" TZ: "${TZ:-Europe/Berlin}"
ports: ports:
- "${LN_WEB_PORT:-8081}:8080" - "${LN_WEB_PORT:-8081}:8080"
+33
View File
@@ -27,6 +27,12 @@ Environment variables
MATCH_PATH default /config/matches.json MATCH_PATH default /config/matches.json
WEB_PORT default 8080 WEB_PORT default 8080
WEB_HOST default 0.0.0.0 WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (run the person updater on cron)
UPDATER_SCHEDULE cron expression for the person updater,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
""" """
from __future__ import annotations from __future__ import annotations
@@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln"))
from MatchesCache import MatchesCache # noqa: E402 from MatchesCache import MatchesCache # noqa: E402
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402 from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -98,6 +113,10 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080) web_port = _env_int("WEB_PORT", 8080)
library_ids = _env_int_list("LIBRARY_IDS") library_ids = _env_int_list("LIBRARY_IDS")
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] kavita url = {kavita_url}", flush=True) print(f"[main] kavita url = {kavita_url}", flush=True)
print(f"[main] language = {language}", flush=True) print(f"[main] language = {language}", flush=True)
@@ -107,6 +126,7 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True) print(f"[main] web = {web_host}:{web_port}", flush=True)
cache = MatchesCache(match_path) cache = MatchesCache(match_path)
person_perf = PerfStats(person_perf_path)
orchestrator = LightNovelOrchestrator( orchestrator = LightNovelOrchestrator(
kavita_url=kavita_url, kavita_url=kavita_url,
kavita_api_key=kavita_api_key, kavita_api_key=kavita_api_key,
@@ -118,9 +138,22 @@ def main() -> int:
app = MatchesWebApp( app = MatchesWebApp(
cache, orchestrator=orchestrator, cache, orchestrator=orchestrator,
default_library_ids=library_ids, default_library_ids=library_ids,
person_perf=person_perf,
host=web_host, port=web_port, host=web_host, port=web_port,
) )
app.start() app.start()
if updater_enabled:
try:
CronRunner(
updater_schedule,
lambda: orchestrator.sync_persons(trigger="cron",
perf=person_perf),
name="person-updater").start()
except ValueError as exc:
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"scheduled person sync DISABLED", flush=True)
app.wait() app.wait()
return 0 return 0
+46 -16
View File
@@ -28,15 +28,19 @@ Environment variables
MATCH_PATH default /config/matches.json MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json) WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0 WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (volume/cover back-fill cron) UPDATER_ENABLED default true (run volume/cover + person updaters on cron)
UPDATER_SCHEDULE cron expression for the updater scans, UPDATER_SCHEDULE cron expression for the periodic updaters,
default "0 19 * * 1,4" = 19:00 every Mon + Thu default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!) (local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log UPDATER_LOG default /config/volume_updater.log
COVER_CACHE_PATH directory for the persistent cover cache; COVER_CACHE_PATH directory for the persistent cover cache;
empty (default) = temporary cache, deleted on exit empty (default) = temporary cache, deleted on exit
PERF_PATH JSON file for per-step move timing stats; PERF_PATH JSON file for per-step move timing stats.
empty disables profiling. Default /config/perf_stats.json Default /config/perf_stats.json (empty disables it)
VOLUME_PERF_PATH JSON file for volume/cover updater timing.
Default /config/volume_perf_stats.json
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
""" """
from __future__ import annotations from __future__ import annotations
@@ -62,7 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
from MatchesCache import MatchesCache # noqa: E402 from MatchesCache import MatchesCache # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
from KavitaClient import KavitaClient # noqa: E402
from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402
from PerfStats import PerfStats # noqa: E402 from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -106,10 +113,14 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080) web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True) updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 1,4") updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log") updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
volume_perf_path = _env_str("VOLUME_PERF_PATH",
"/config/volume_perf_stats.json") or None
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True)
@@ -121,43 +132,62 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True) print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path) matches_cache = MatchesCache(match_path)
perf_stats = PerfStats(perf_path) perf_move = PerfStats(perf_path)
perf_volume = PerfStats(volume_perf_path)
perf_person = PerfStats(person_perf_path)
mover = SuwayomiMover( mover = SuwayomiMover(
suwayomi_path, kavita_path, suwayomi_path, kavita_path,
kavita_base_url=kavita_url,
kavita_api_key=kavita_api_key,
language=language, language=language,
request_timeout=request_timeout, request_timeout=request_timeout,
delete_source=delete_source, delete_source=delete_source,
matches_cache=matches_cache, matches_cache=matches_cache,
cover_cache_dir=cover_cache_path, cover_cache_dir=cover_cache_path,
perf_stats=perf_stats, perf_stats=perf_move,
) )
# Standalone, global, id-based person updater (manga + LN libraries).
person_updater = None
if kavita_api_key:
kavita_client = KavitaClient(kavita_url, kavita_api_key,
request_timeout=request_timeout)
person_updater = KavitaPersonUpdater(kavita_client)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds) # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
web_app = MatchesWebApp(matches_cache, mover=mover, perf_stats=perf_stats, web_app = MatchesWebApp(
matches_cache, mover=mover,
person_updater=person_updater, person_trigger="web",
perf_stats={"move": perf_move, "volume": perf_volume,
"person": perf_person},
host=web_host, port=web_port) host=web_host, port=web_port)
web_app.start() web_app.start()
if updater_enabled: if updater_enabled:
try:
updater = KavitaVolumeCoverUpdater( updater = KavitaVolumeCoverUpdater(
kavita_path, kavita_path,
matches_cache=matches_cache, matches_cache=matches_cache,
language=language, language=language,
request_timeout=request_timeout, request_timeout=request_timeout,
log_path=updater_log, log_path=updater_log,
schedule=updater_schedule,
cover_cache_dir=cover_cache_path, cover_cache_dir=cover_cache_path,
perf_stats=perf_volume,
) )
updater.start()
def _scheduled_job():
updater.update_all()
if person_updater is not None:
person_updater.update_all_persons(trigger="cron",
perf=perf_person)
try:
CronRunner(updater_schedule, _scheduled_job,
name="updaters").start()
except ValueError as exc: except ValueError as exc:
# Invalid cron expression — keep the service up, just without # Invalid cron expression — keep the service up, just without
# the updater, and make the config error obvious in the logs. # the scheduled updaters, and surface the config error.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); " print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"volume/cover updater DISABLED", flush=True) f"scheduled updaters DISABLED", flush=True)
# watcher.start() # watcher.start()
# watcher.wait() # blocks until stop() is called via a signal # watcher.wait() # blocks until stop() is called via a signal
+87
View File
@@ -0,0 +1,87 @@
"""
cron_runner.py
==============
Runs a single callable on a cron schedule on a background thread.
Decouples *what* runs from *when*: both the manga container (volume/cover
updater + person updater) and the LN container (person updater) schedule
their work through this one helper, using a shared ``CronSchedule`` for the
``next_after`` arithmetic.
Usage::
runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00
runner.start()
...
runner.stop()
When the schedule string is invalid, the CronSchedule constructor raises
ValueError — the caller decides whether to disable the runner or fall back.
The schedule is evaluated in local time (set TZ inside the container).
"""
from __future__ import annotations
import threading
from datetime import datetime
from CronSchedule import CronSchedule
def _now() -> str:
return datetime.now().isoformat(timespec="seconds")
class CronRunner:
"""
Fires ``job()`` whenever the cron ``schedule`` elapses.
Parameters
----------
schedule : 5-field cron expression (see CronSchedule).
job : Zero-arg callable invoked on each scheduled tick. Exceptions
are caught and logged so a failing run does not kill the loop.
name : Thread name (for logs).
"""
def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
self._cron = CronSchedule(schedule)
self._job = job
self._name = name
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
def start(self) -> None:
"""Starts the scheduling thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name=self._name, daemon=True)
self._thread.start()
print(f"[{_now()}] [{self._name}] scheduled on "
f"cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Signals the loop to stop (a job already running finishes first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [{self._name}] next run: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
self._job()
except Exception as exc:
print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
+25
View File
@@ -204,6 +204,31 @@ class KavitaClient:
r.raise_for_status() r.raise_for_status()
return r.json() or [] return r.json() or []
def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
"""
Returns every PersonDto in the instance.
Pages through POST /api/Person/all (the browse endpoint) with an
empty filter until an empty page is returned — same paging pattern
as list_series_in_library.
"""
results: list[dict] = []
page = 1
while True:
r = self._session.post(
f"{self._base}/api/Person/all",
params={"PageNumber": page, "PageSize": page_size},
json={}, timeout=self._timeout)
r.raise_for_status()
chunk = r.json() or []
if not chunk:
break
results.extend(chunk)
if len(chunk) < page_size:
break
page += 1
return results
def update_person(self, payload: dict) -> None: def update_person(self, payload: dict) -> None:
"""Writes a person record (malId, aniListId, description, …).""" """Writes a person record (malId, aniListId, description, …)."""
r = self._session.post(f"{self._base}/api/Person/update", r = self._session.post(f"{self._base}/api/Person/update",
+131 -329
View File
@@ -2,407 +2,220 @@
kavita_person_updater.py kavita_person_updater.py
======================== ========================
Synchronises Kavita person / character records with MyAnimeList data. Synchronises Kavita character person-records with MyAnimeList / AniList data.
For every character and staff member that MAL knows about for a given manga Global, id-based mode
the updater: ---------------------
1. Searches Kavita for a matching Person record (by name similarity / Kavita person-records are created with a disambiguated name carrying the
alias match, configurable threshold). tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
2. Sets the MAL ID on the Kavita person if it is not yet linked. ComicInfo <Characters>; light novels: written by the metadata builder).
3. Uploads the MAL profile image when the cover is not locked and has ``update_all_persons`` walks **every** person in the Kavita instance, reads
not been set in a previous sync run. that id from the name, looks the character up on MAL / AniList by id, and
4. Populates the description field when Kavita has none and MAL provides writes back:
an 'about' text (requires an extra Jikan request per character; only
performed when update_descriptions=True). * the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
* a description (when the record has none),
* the profile image (when not locked and not already set).
Persons whose name carries no id (authors / staff, which are not
disambiguated) are skipped. A record already linked to a *different*
tracker id than its name says is reported as a conflict and left untouched.
This mode is format-independent (it only does id lookups, never title
searches) so a single pass covers both the manga and light-novel libraries.
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient` All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`). (`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
Tested against Kavita 0.9.0.2. Tested against Kavita 0.9.0.2.
""" """
from __future__ import annotations from __future__ import annotations
import datetime
import requests import requests
from KavitaClient import KavitaClient from KavitaClient import KavitaClient
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id from PerfStats import PerfStats
from TextUtils import paragraphs_to_html, parse_person_tracker_id
class KavitaPersonUpdater: class KavitaPersonUpdater:
""" """
Syncs Kavita Person records with MyAnimeList data. Syncs Kavita character person-records with MAL / AniList data, keyed by
the tracker id embedded in each person's name.
Parameters Parameters
---------- ----------
client : Shared KavitaClient (session, auth, cover uploads) client : Shared KavitaClient (session, auth, cover uploads).
mal_resolver : Shared MALResolver singleton (created automatically if omitted) mal_resolver : Shared MALResolver singleton (created if omitted).
al_resolver : Shared AniListResolver singleton (created automatically if omitted) al_resolver : Shared AniListResolver singleton (created if omitted).
min_name_score : Minimum difflib similarity ratio (01) required to accept a
Kavita person as a match for a MAL name. Default 0.80.
""" """
def __init__(self, client: KavitaClient, *, def __init__(self, client: KavitaClient, *,
mal_resolver: "MALResolver | None" = None, mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None, al_resolver: "AniListResolver | None" = None):
min_name_score: float = 0.80):
self._client = client self._client = client
self._min_score = min_name_score
self._mal = mal_resolver or MALResolver() self._mal = mal_resolver or MALResolver()
self._al = al_resolver or AniListResolver() self._al = al_resolver or AniListResolver()
# Cache: normalised name -> list of PersonDto dicts (best matches first)
self._person_search_cache: dict[str, list[dict]] = {}
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public: combined update # Public: global person sync
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def update_for_manga(self, mal_manga_id: "int | None", *, def update_all_persons(self, *,
al_manga_id: "int | None" = None, trigger: str = "cron",
perf: "PerfStats | None" = None,
update_covers: bool = True, update_covers: bool = True,
update_descriptions: bool = True) -> dict: update_descriptions: bool = True) -> dict:
""" """
Runs a full update pass for both characters and staff of the manga. Walks every Kavita person, syncing the ones whose name carries a
MAL is tried first; AniList is used as fallback when MAL returns nothing. tracker character id.
Returns Parameters
------- ----------
{ trigger : Source that started this run ("cron" | "web" | "ln") —
"characters": {"updated": n, "skipped": n, "not_found": n}, recorded in the perf-stats run meta.
"staff": {"updated": n, "skipped": n, "not_found": n}, perf : Optional PerfStats for per-person step timing.
}
Returns {"trigger", "updated", "skipped", "not_found",
"conflicts", "errors"}.
""" """
return { perf = perf or PerfStats(None)
"characters": self.update_characters( run = perf.begin_run(meta={"trigger": trigger})
mal_manga_id, al_manga_id=al_manga_id, result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
update_covers=update_covers, "not_found": 0, "conflicts": 0, "errors": []}
update_descriptions=update_descriptions),
"staff": self.update_staff(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
}
# ------------------------------------------------------------------ try:
# Public: character update persons = self._client.list_all_persons()
# ------------------------------------------------------------------ except requests.RequestException as exc:
def update_characters(self, mal_manga_id: "int | None", *, result["errors"].append(f"list persons failed: {exc}")
al_manga_id: "int | None" = None, run.finish()
update_covers: bool = True, return result
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList characters for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}. for person in persons:
""" name = (person.get("name") or "").strip()
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else [] parsed = parse_person_tracker_id(name)
resolver = self._mal if not parsed:
if not entries and al_manga_id: result["skipped"] += 1 # author/staff or un-tagged
entries = self._al.get_characters_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "character", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: staff update
# ------------------------------------------------------------------
def update_staff(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList staff for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_staff_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "staff", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears the Kavita person search cache."""
self._person_search_cache.clear()
# ------------------------------------------------------------------
# Internal: main sync loop
# ------------------------------------------------------------------
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
update_covers: bool,
update_descriptions: bool) -> dict:
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
"errors": []}
for entry in entries:
name = (entry.get("name") or "").strip()
raw_name = (entry.get("raw_name") or "").strip()
if not name and not raw_name:
continue continue
if kind == "character": source, tracker_id = parsed
# Characters are stored under their disambiguated name item = run.begin_item(name)
# ("Rem (MAL 118737)") — see person_name_with_id. The ok = True
# series metadata write creates the person under exactly try:
# this name, so only that form is searched. category = self._apply_to_person(
search_names = [person_name_with_id( person, source, tracker_id, item,
name, mal_id=entry.get("mal_id"),
al_id=entry.get("al_id"))]
else:
# Staff: cleaned (XML-safe) name first; if Kavita stores
# the legacy comma form, retry with the raw MAL name.
search_names = [name]
if raw_name and raw_name != name:
search_names.append(raw_name)
matches: list[dict] = []
for search_name in search_names:
if not search_name:
continue
matches = self._find_kavita_person(search_name)
if matches:
break
if not matches:
result["not_found"] += 1
continue
changed = self._apply_mal_data(
matches[0], entry, kind, resolver,
update_cover=update_covers, update_cover=update_covers,
update_desc=update_descriptions, update_desc=update_descriptions,
errors=result["errors"]) errors=result["errors"])
result["updated" if changed else "skipped"] += 1 result[category] += 1
ok = category != "conflicts"
except Exception as exc:
result["errors"].append(f"{name}: {exc}")
ok = False
finally:
item.finish(ok=ok)
run.finish()
print(f"[persons] trigger={trigger} updated={result['updated']} "
f"skipped={result['skipped']} not_found={result['not_found']} "
f"conflicts={result['conflicts']} errors={len(result['errors'])}",
flush=True)
return result return result
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: Kavita person search # Internal: apply tracker data to one person
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _find_kavita_person(self, name: str) -> list[dict]: def _apply_to_person(self, person: dict, source: str, tracker_id: int,
item, *, update_cover: bool, update_desc: bool,
errors: list) -> str:
""" """
Searches Kavita for persons matching `name`. Applies MAL/AniList character data to one Kavita person.
Returns the result category: "updated" | "skipped" | "not_found"
Checks both the main name and any stored aliases. | "conflicts".
Returns persons sorted by similarity, filtered by min_name_score.
Results are cached per (normalised) query name.
""" """
key = name.lower().strip() person_id = person.get("id")
if key in self._person_search_cache:
return self._person_search_cache[key]
try:
persons = self._client.search_persons(name)
except requests.RequestException:
self._person_search_cache[key] = []
return []
scored = []
for p in persons:
candidates = [p.get("name")] + list(p.get("aliases") or [])
scored.append((best_similarity(key, candidates), p))
scored.sort(key=lambda pair: pair[0], reverse=True)
filtered = [p for score, p in scored if score >= self._min_score]
self._person_search_cache[key] = filtered
return filtered
# ------------------------------------------------------------------
# Internal: apply MAL data to a single Kavita person
# ------------------------------------------------------------------
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
resolver, *,
update_cover: bool, update_desc: bool,
errors: "list | None" = None) -> bool:
"""
Applies tracker data (MAL or AniList) to one Kavita person record.
Fields updated
--------------
- malId : set when the entry carries a MAL ID and it differs
- aniListId : set when the entry carries an AniList ID and it differs
- description: set when empty and the tracker provides a description
- cover image: uploaded when not locked and no prior sync cover exists
Returns True if any change was made. Failures are appended to the
`errors` list (if provided) instead of being silently swallowed.
"""
person_id: "int | None" = person.get("id")
if not person_id: if not person_id:
return False return "skipped"
person_name = person.get("name") or "" resolver = self._mal if source == "mal" else self._al
id_field = "malId" if source == "mal" else "aniListId"
current = person.get(id_field) or 0
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id. # The name is authoritative; a record linked to a different id is a
mal_id: "int | None" = mal_entry.get("mal_id") # data conflict — never overwrite it.
al_id: "int | None" = mal_entry.get("al_id") if current and current != tracker_id:
entity_id = mal_id or al_id # used for resolver detail calls
current_mal_id: int = person.get("malId") or 0
current_al_id: int = person.get("aniListId") or 0
# Collision guard: the Kavita person is already linked to a
# *different* tracker entity — same display name, different
# character/person. Never overwrite; first writer wins.
if ((mal_id and current_mal_id and current_mal_id != mal_id)
or (al_id and current_al_id and current_al_id != al_id)):
if errors is not None:
errors.append( errors.append(
f"conflict: '{person_name}' (#{person_id}) is linked to " f"conflict: '{person.get('name')}' (#{person_id}) has "
f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} " f"{id_field}={current} but name says {tracker_id} — skipped")
f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} " return "conflicts"
f"— skipped")
return False
needs_mal_id = bool(mal_id and current_mal_id != mal_id) with item.measure("detail_fetch"):
needs_al_id = bool(al_id and current_al_id != al_id) details = resolver.get_character_details(tracker_id)
if not details:
return "not_found"
# ------ Lazy description fetch ----------------------------------- need_id = not current # write id when still missing
description: "str | None" = None description = None
if update_desc and not (person.get("description") or "").strip(): if update_desc and not (person.get("description") or "").strip():
if entity_id:
if kind == "character":
details = resolver.get_character_details(entity_id)
if details:
description = _build_character_description(details) or None description = _build_character_description(details) or None
else: need_desc = bool(description)
details = resolver.get_person_details(entity_id)
if details:
description = _build_person_description(details) or None
needs_desc = bool(description)
# ------ Metadata update ------------------------------------------
changed = False changed = False
if needs_mal_id or needs_al_id or needs_desc: if need_id or need_desc:
payload: dict = { payload = {
"id": person_id, "id": person_id,
"name": person_name, "name": person.get("name") or "",
# MUST stay a boolean — the cover image itself is uploaded # MUST stay a boolean — the cover is uploaded separately.
# separately via POST /api/Upload/person (below). Putting a
# URL here makes Kavita reject the whole payload with HTTP 400.
"coverImageLocked": bool(person.get("coverImageLocked", False)), "coverImageLocked": bool(person.get("coverImageLocked", False)),
"aliases": person.get("aliases") or [], "aliases": person.get("aliases") or [],
"description": description or person.get("description"), "description": description or person.get("description"),
"malId": mal_id if needs_mal_id else (current_mal_id or None), "malId": tracker_id if source == "mal"
"aniListId": al_id if needs_al_id else (current_al_id or None), else (person.get("malId") or None),
"aniListId": tracker_id if source == "al"
else (person.get("aniListId") or None),
} }
try: try:
with item.measure("person_update"):
self._client.update_person(payload) self._client.update_person(payload)
changed = True changed = True
except requests.RequestException as e: except requests.RequestException as exc:
if errors is not None: errors.append(f"update failed #{person_id} "
errors.append( f"'{person.get('name')}': {exc}")
f"Person/update failed for #{person_id} "
f"'{person_name}': {e}")
# ------ Cover image upload ---------------------------------------- # Cover: upload when not locked and not already set for this id.
# Upload whenever:
# - caller requested cover updates
# - cover is NOT locked (user did not manually pin it)
# - we have not already uploaded this exact tracker entity's image
# (i.e. the tracked ID differs OR there is no cover yet).
if update_cover and not person.get("coverImageLocked"): if update_cover and not person.get("coverImageLocked"):
image_url = mal_entry.get("image_url") image_url = details.get("image_url")
already_uploaded = ( already = bool(current) and bool(person.get("coverImage"))
entity_id is not None if image_url and not already:
and (current_mal_id == mal_id or current_al_id == al_id)
and bool(person.get("coverImage"))
)
if image_url and not already_uploaded:
try: try:
with item.measure("cover_upload"):
self._client.upload_person_cover(person_id, image_url) self._client.upload_person_cover(person_id, image_url)
changed = True changed = True
except requests.RequestException as e: except requests.RequestException as exc:
if errors is not None: errors.append(f"cover upload failed #{person_id} "
errors.append( f"'{person.get('name')}': {exc}")
f"cover upload failed for #{person_id} "
f"'{person_name}' ({image_url}): {e}")
return changed return "updated" if changed else "skipped"
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Module helpers: description builders # Module helper: character description builder
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
def _format_birthday(birthday: str) -> str:
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
if not birthday:
return ""
try:
dt = datetime.date.fromisoformat(birthday.split("T")[0])
return f"{dt.day} {dt.strftime('%B %Y')}"
except (ValueError, AttributeError):
return ""
def _build_character_description(details: dict) -> str: def _build_character_description(details: dict) -> str:
""" """
Builds a Kavita-safe HTML description for a MAL character. Builds a Kavita-safe HTML description for a MAL / AniList character.
Top line: "Favorites: N" as a link to the character's MAL page. Top line: "Favorites: N" linked to the character page (when available).
Remainder: the character's `about` text converted to HTML paragraphs. Remainder: the character's `about` text converted to HTML paragraphs.
""" """
parts: list[str] = [] parts: list[str] = []
url = details.get("url") or "" url = details.get("url") or ""
favorites = details.get("favorites") favorites = details.get("favorites")
if url and favorites is not None: if url and favorites is not None:
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>') parts.append(f'<p><a href="{url}" target="_blank">'
about = (details.get("about") or "").strip() f'Favorites: {favorites:,}</a></p>')
if about:
parts.append(paragraphs_to_html(about))
return "<br>".join(parts)
def _build_person_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
Renders a summary table (given name, family name, birthday, website,
member favorites) followed by the `about` biography as HTML paragraphs.
"""
_TD = 'style="padding-right:1.5em"'
rows: list[str] = []
given = (details.get("given_name") or "").strip()
family = (details.get("family_name") or "").strip()
birthday = details.get("birthday") or ""
favorites = details.get("favorites")
website = (details.get("website_url") or "").strip()
url = (details.get("url") or "").strip()
if given:
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
if family:
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
bday_str = _format_birthday(birthday)
if bday_str:
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
if website:
rows.append(
f'<tr><td {_TD}>Website</td>'
f'<td><a href="{website}">{website}</a></td></tr>'
)
if favorites is not None:
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
else f"{favorites:,}")
rows.append(
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
parts: list[str] = []
if rows:
parts.append(f'<table>{"".join(rows)}</table>')
about = (details.get("about") or "").strip() about = (details.get("about") or "").strip()
if about: if about:
parts.append(paragraphs_to_html(about)) parts.append(paragraphs_to_html(about))
@@ -418,18 +231,7 @@ if __name__ == "__main__":
client = KavitaClient(os.environ["KAVITA_URL"], client = KavitaClient(os.environ["KAVITA_URL"],
os.environ["KAVITA_API_KEY"]) os.environ["KAVITA_API_KEY"])
updater = KavitaPersonUpdater(client) updater = KavitaPersonUpdater(client)
report = updater.update_all_persons(trigger="cron")
mal = MALResolver() print(report)
mal_id = mal.find_mal_id("よふかしのうた") for err in report["errors"]:
print("MAL ID:", mal_id) print(" ", err)
if mal_id:
result = updater.update_for_manga(mal_id)
print("Characters:", {k: v for k, v in result["characters"].items()
if k != "errors"})
print("Staff :", {k: v for k, v in result["staff"].items()
if k != "errors"})
# Surface any non-fatal upload / API errors for debugging
for section in ("characters", "staff"):
for err in result[section].get("errors", []):
print(f"[{section}] {err}")
+254
View File
@@ -0,0 +1,254 @@
"""
perf_stats.py
=============
Generic run/step performance profiler with JSON persistence, shared by the
move pipeline and the periodic updaters (volume/cover, persons).
Each run is a tree of *items* (e.g. series -> chapter, or one person) and
every item carries named *step* timings. A run also carries free-form
``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
updater).
Data model (one entry per run, newest first)::
{
"runs": [
{
"runId": "",
"startedAt": 1700000000,
"finishedAt": 1700000123,
"totalSeconds": 123.4,
"meta": {"trigger": "cron"},
"itemCount": 2, # top-level items
"leafCount": 31, # items without children
"stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...},
"items": [
{"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
"steps": {"fetch_metadata": 1.2},
"items": [
{"label": "1", "totalSeconds": 11.5, "ok": true,
"steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
]}
]
}
]
}
Usage::
perf = PerfStats(path) # path=None -> disabled
run = perf.begin_run(meta={"trigger": "cron"})
item = run.begin_item("Call of the Night")
with item.measure("fetch_metadata"):
...
chap = item.begin_item("1")
with chap.measure("pack_cbz"):
...
chap.finish()
item.finish() # flushes the run to disk
run.finish()
When ``path`` is None every recorder is a no-op and nothing is written, so
the profiler can be left permanently wired in at negligible cost. The run
is flushed after every top-level item finishes, so a long run is observable
live and survives a crash mid-run.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ItemRecorder(_StepTimer):
"""
One node in a run's item tree. Has its own step timings and may contain
nested child items (e.g. a series item containing chapter items).
"""
def __init__(self, run: "RunRecorder", label: str, *,
parent: "ItemRecorder | None" = None,
enabled: bool = True):
super().__init__(enabled)
self._run = run
self._label = label
self._parent = parent
self._children: list[dict] = []
def begin_item(self, label: str) -> "ItemRecorder":
return ItemRecorder(self._run, label, parent=self,
enabled=self._enabled)
def finish(self, *, ok: bool = True) -> None:
if not self._enabled:
return
node = {
"label": self._label,
"totalSeconds": self.elapsed(),
"ok": ok,
"steps": self.steps,
"items": self._children,
}
if self._parent is not None:
self._parent._children.append(node)
else:
# Top-level item: attach to the run and persist progress.
self._run._items.append(node)
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full run."""
def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._meta = meta or {}
self._items: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
self._run_id = uuid.uuid4().hex
def begin_item(self, label: str) -> ItemRecorder:
return ItemRecorder(self, label, parent=None, enabled=self._enabled)
def _snapshot(self) -> dict:
step_totals: dict[str, float] = {}
leaf_count = 0
def walk(node: dict) -> None:
nonlocal leaf_count
for step, secs in node["steps"].items():
step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
if node["items"]:
for child in node["items"]:
walk(child)
else:
leaf_count += 1
for item in self._items:
walk(item)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"meta": self._meta,
"itemCount": len(self._items),
"leafCount": leaf_count,
"stepTotals": step_totals,
"items": self._items,
}
def flush(self) -> "dict | None":
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> "dict | None":
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
return RunRecorder(self, meta=meta, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every item.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+160
View File
@@ -0,0 +1,160 @@
"""
perf_web_page.py
================
Shared HTML page for browsing PerfStats output, used by both container web
UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads
``/api/perf/<name>`` and renders each run's step totals plus the nested item
tree (series -> chapter, or one person, …) and the run trigger from meta.
``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
available perf datasets in that container.
"""
from __future__ import annotations
_PERF_PAGE = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>__PERF_NAME__ performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.tabs { margin-bottom:1rem; }
.tabs a { margin-right:1rem; }
.tabs a.active { font-weight:bold; text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.2rem 0 .2rem 1rem; }
summary { cursor:pointer; padding:.2rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
<div class="tabs">__PERF_TABS__</div>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
const PERF_NAME = "__PERF_NAME__";
let runs = [];
for (const a of document.querySelectorAll(".tabs a")) {
if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
}
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function esc(s) {
return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
}
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + esc(name) + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
// Renders one item node (and its children) as a nested <details> block.
function itemNode(it) {
const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "";
const head = "<summary><b>" + esc(it.label) + "</b>"
+ (it.ok === false ? " <span class=err>(failed)</span>" : "")
+ " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
const body = kids.map(itemNode).join("");
return "<details>" + head + body + "</details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
let html = "<h2>Steps (summed over all items)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Detail</h2>";
const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")" + trig;
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf/" + PERF_NAME);
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
"""
Returns the perf page HTML for dataset ``name``.
tabs : list of (label, dataset_name) for the cross-link bar.
"""
tab_html = " ".join(
f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
return (_PERF_PAGE
.replace("__PERF_TABS__", tab_html)
.replace("__PERF_NAME__", name))
+27
View File
@@ -70,3 +70,30 @@ def person_name_with_id(name: str, *,
if al_id: if al_id:
return f"{name} (AL {al_id})" return f"{name} (AL {al_id})"
return name return name
# Matches the suffix produced by person_name_with_id at the end of a name.
_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
"""
Inverse of person_name_with_id: extracts the tracker id from a
disambiguated Kavita person name.
"Rem (MAL 118737)" -> ("mal", 118737)
"Subaru (AL 88311)" -> ("al", 88311)
"Kotoyama" -> None (no id suffix — e.g. an author/staff record)
Returns ("mal" | "al", id) or None.
"""
if not name:
return None
m = _TRACKER_ID_RE.search(name)
if not m:
return None
source = "mal" if m.group(1).upper() == "MAL" else "al"
try:
return source, int(m.group(2))
except ValueError:
return None
+14 -9
View File
@@ -192,14 +192,9 @@ class LightNovelOrchestrator:
except Exception as exc: except Exception as exc:
return {"ok": False, "error": f"series update failed: {exc}"} return {"ok": False, "error": f"series update failed: {exc}"}
# Persons # Person sync no longer runs per series — it has its own global,
try: # id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
person_report = self._person_updater.update_for_manga( # on a separate cron schedule.
built.get("malId"),
al_manga_id=built.get("anilistId"),
)
except Exception as exc:
person_report = {"error": str(exc)}
# Relationships + collection # Relationships + collection
try: try:
@@ -221,10 +216,20 @@ class LightNovelOrchestrator:
"title": cached_title, "title": cached_title,
"mangabakaId": built.get("mangabakaId"), "mangabakaId": built.get("mangabakaId"),
"series": series_report, "series": series_report,
"persons": person_report,
"relationships": relation_report, "relationships": relation_report,
} }
# ------------------------------------------------------------------
# Person sync (global, id-based — independent of series updates)
# ------------------------------------------------------------------
def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
"""
Runs the global, id-based person updater over every Kavita person.
Covers both manga and light-novel libraries in one pass.
"""
return self._person_updater.update_all_persons(
trigger=trigger, perf=perf)
def update_all(self, library_ids: "list[int] | None") -> dict: def update_all(self, library_ids: "list[int] | None") -> dict:
"""Updates every cached series in the given libraries.""" """Updates every cached series in the given libraries."""
if library_ids is None: if library_ids is None:
+51
View File
@@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from LightNovelMetadataBuilder import pick_thumbnail_url from LightNovelMetadataBuilder import pick_thumbnail_url
from PerfWebPage import render_perf_page
# Only the person dataset exists in the LN container.
_PERF_TABS = [("persons", "person")]
def _int_list(values) -> list[int]: def _int_list(values) -> list[int]:
@@ -97,7 +101,9 @@ _INDEX_HTML = r"""<!doctype html>
<button id="reload">Reload</button> <button id="reload">Reload</button>
<button id="build">Match all in libraries</button> <button id="build">Match all in libraries</button>
<button id="updateAll" class="success">Update all in libraries</button> <button id="updateAll" class="success">Update all in libraries</button>
<button id="syncPersons">Sync persons</button>
<button id="batchSave" class="primary">Save dirty (0)</button> <button id="batchSave" class="primary">Save dirty (0)</button>
<a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span> <span class="status" id="status"></span>
</div> </div>
@@ -497,12 +503,25 @@ async function startUpdateAll() {
} }
} }
async function startSyncPersons() {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
setStatus("Person sync started");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Person sync failed: " + err.message);
}
}
document.getElementById("filter").addEventListener("input", applyFilter); document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("libraries").addEventListener("change", applyFilter); document.getElementById("libraries").addEventListener("change", applyFilter);
document.getElementById("reload").addEventListener("click", load); document.getElementById("reload").addEventListener("click", load);
document.getElementById("batchSave").addEventListener("click", batchSave); document.getElementById("batchSave").addEventListener("click", batchSave);
document.getElementById("build").addEventListener("click", startBuild); document.getElementById("build").addEventListener("click", startBuild);
document.getElementById("updateAll").addEventListener("click", startUpdateAll); document.getElementById("updateAll").addEventListener("click", startUpdateAll);
document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
for (const th of document.querySelectorAll("th.sortable")) { for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => { th.addEventListener("click", () => {
const col = th.dataset.col; const col = th.dataset.col;
@@ -581,11 +600,13 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *, def __init__(self, cache: MatchesCache, *,
orchestrator=None, orchestrator=None,
default_library_ids: "list[int] | None" = None, default_library_ids: "list[int] | None" = None,
person_perf=None,
host: str = "0.0.0.0", host: str = "0.0.0.0",
port: int = 8080): port: int = 8080):
self._cache = cache self._cache = cache
self._orchestrator = orchestrator self._orchestrator = orchestrator
self._defaults = list(default_library_ids or []) self._defaults = list(default_library_ids or [])
self._person_perf = person_perf
self._host = host self._host = host
self._port = port self._port = port
self._job = _JobState() self._job = _JobState()
@@ -757,8 +778,38 @@ class MatchesWebApp:
return Response("a job is already running", status=409) return Response("a job is already running", status=409)
return jsonify({"started": label}) return jsonify({"started": label})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
def task(job: _JobState):
report = self._orchestrator.sync_persons(
trigger="ln", perf=self._person_perf)
job.append(f"updated={report['updated']} "
f"skipped={report['skipped']} "
f"not_found={report['not_found']} "
f"conflicts={report['conflicts']}")
for err in report.get("errors", []):
job.append(f" {err}")
if not self._job.start("person sync", task):
return Response("a job is already running", status=409)
return jsonify({"started": "person sync"})
@app.get("/api/status") @app.get("/api/status")
def api_status(): def api_status():
snap = self._job.snapshot() snap = self._job.snapshot()
snap["defaults"] = self._defaults snap["defaults"] = self._defaults
return jsonify(snap) return jsonify(snap)
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "person") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._person_perf if name == "person" else None
return jsonify(stats.all() if stats is not None else {"runs": []})
+27 -63
View File
@@ -45,7 +45,6 @@ from __future__ import annotations
import io import io
import sys import sys
import threading
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import zipfile import zipfile
from datetime import datetime from datetime import datetime
@@ -67,7 +66,7 @@ from MatchesCache import MatchesCache
from SuwayomiMover import (_load_chapter_index, _save_chapter_index, from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value) _sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule from PerfStats import PerfStats
from CoverCache import CoverCache, _IMAGE_EXTS from CoverCache import CoverCache, _IMAGE_EXTS
try: try:
@@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater:
request_timeout : HTTP timeout in seconds. request_timeout : HTTP timeout in seconds.
log_path : File that receives one line per updated chapter. log_path : File that receives one line per updated chapter.
Default: <kavita_path>/volume_updater.log Default: <kavita_path>/volume_updater.log
schedule : Cron expression (5 fields) defining when scans run,
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time — set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
cover_cache_dir : Directory for the persistent cover cache. None -> cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit. temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing.
Scheduling lives outside this class (see CronRunner); call update_all()
on whatever cadence you like.
""" """
def __init__(self, def __init__(self,
@@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater:
request_timeout: int = 30, request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1", api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None, log_path=None,
schedule: str = "0 19 * * 1,4", cover_cache_dir=None,
cover_cache_dir=None): perf_stats: "PerfStats | None" = None):
self._dst = Path(kavita_path) self._dst = Path(kavita_path)
self._matches_cache = matches_cache self._matches_cache = matches_cache
self._language = language self._language = language
@@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater:
self._api_base_url = api_base_url.rstrip("/") self._api_base_url = api_base_url.rstrip("/")
self._log_path = (Path(log_path) if log_path self._log_path = (Path(log_path) if log_path
else self._dst / "volume_updater.log") else self._dst / "volume_updater.log")
self._cron = CronSchedule(schedule) self._perf = perf_stats or PerfStats(None)
session = requests.Session() session = requests.Session()
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0") session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater:
self._cover_cache = CoverCache( self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout) cover_cache_dir, session=session, request_timeout=request_timeout)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
# ------------------------------------------------------------------
# Cron API (mirrors SuwayomiFolderWatcher)
# ------------------------------------------------------------------
def start(self) -> None:
"""Starts the periodic scan thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
self._thread.start()
print(f"[{_now()}] [updater] scanning {self._dst} "
f"on cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Stops the scan thread (current scan finishes its series first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [updater] next scheduled scan: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
summary = self.update_all()
print(f"[{_now()}] [updater] scan done: "
f"{summary['series_updated']} series / "
f"{summary['chapters_updated']} chapters updated",
flush=True)
except Exception as exc:
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public scan API # Public scan API
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater:
self._vol_resolver.clear_cache() self._vol_resolver.clear_cache()
self._works_resolver.clear_cache() self._works_resolver.clear_cache()
run = self._perf.begin_run()
try:
for series_dir in sorted(self._dst.iterdir()): for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
if not series_dir.is_dir(): if not series_dir.is_dir():
continue continue
summary["series_scanned"] += 1 summary["series_scanned"] += 1
try: try:
updated = self.update_series(series_dir) updated = self.update_series(series_dir, run)
except Exception as exc: except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True) print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue continue
if updated: if updated:
summary["series_updated"] += 1 summary["series_updated"] += 1
summary["chapters_updated"] += updated summary["chapters_updated"] += updated
finally:
run.finish()
return summary return summary
def update_series(self, series_dir: Path) -> int: def update_series(self, series_dir: Path, run=None) -> int:
""" """
Updates one series folder. Returns the number of updated chapters. Updates one series folder. Returns the number of updated chapters.
@@ -300,8 +256,11 @@ class KavitaVolumeCoverUpdater:
md = builder.fetch_metadata() md = builder.fetch_metadata()
series_id = str(md.get("id") or "") series_id = str(md.get("id") or "")
series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
# Resolve volumes for all null-volume chapters first (API only). # Resolve volumes for all null-volume chapters first (API only).
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None} updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
with series_rec.measure("resolve_volumes"):
for num in sorted(missing, key=_chapter_sort_value): for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num builder.chapter = num
try: try:
@@ -314,6 +273,7 @@ class KavitaVolumeCoverUpdater:
"cover": self._fetch_cover(series_id, volume)} "cover": self._fetch_cover(series_id, volume)}
if not updates: if not updates:
series_rec.finish(ok=True)
return 0 return 0
first = min(chapters, key=_chapter_sort_value) first = min(chapters, key=_chapter_sort_value)
@@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater:
continue continue
# The first chapter gets a full metadata rebuild (Kavita reads # The first chapter gets a full metadata rebuild (Kavita reads
# series metadata from it); other chapters only a volume edit. # series metadata from it); other chapters only a volume edit.
chap_rec = series_rec.begin_item(num)
with chap_rec.measure("archive_rewrite"):
ok, cover_swapped = self._apply_update( ok, cover_swapped = self._apply_update(
cbz, builder, num, cbz, builder, num,
volume=up["volume"], cover=up["cover"], volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first)) full_rebuild=(num == first))
chap_rec.finish(ok=ok)
if not ok: if not ok:
continue continue
entry["volume"] = _normalise_volume_value(up["volume"]) entry["volume"] = _normalise_volume_value(up["volume"])
@@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater:
first_entry = chapters.get(first) or {} first_entry = chapters.get(first) or {}
cbz = series_dir / (first_entry.get("archiveName") or "") cbz = series_dir / (first_entry.get("archiveName") or "")
if first_entry.get("archiveName") and cbz.is_file(): if first_entry.get("archiveName") and cbz.is_file():
chap_rec = series_rec.begin_item(f"{first} (refresh)")
with chap_rec.measure("archive_rewrite"):
ok, _ = self._apply_update( ok, _ = self._apply_update(
cbz, builder, first, cbz, builder, first,
volume=None, cover=None, full_rebuild=True) volume=None, cover=None, full_rebuild=True)
chap_rec.finish(ok=ok)
if ok: if ok:
self._log(f"{series_dir.name} | chapter {first} | " self._log(f"{series_dir.name} | chapter {first} | "
f"first-chapter metadata refreshed | {cbz.name}") f"first-chapter metadata refreshed | {cbz.name}")
if updated: if updated:
_save_chapter_index(series_dir, index) _save_chapter_index(series_dir, index)
series_rec.finish(ok=True)
return updated return updated
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -545,10 +512,7 @@ if __name__ == "__main__":
matches_cache=MatchesCache(MATCHES_PATH), matches_cache=MatchesCache(MATCHES_PATH),
) )
# One-shot scan (no cron thread): # One-shot scan. Scheduling is handled externally via CronRunner
# (see main_manga.py).
summary = updater.update_all() summary = updater.update_all()
print(f"\n[updater] {summary}") print(f"\n[updater] {summary}")
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
# updater.start()
# updater.wait()
+56 -137
View File
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from ComicInfoBuilder import _pick_thumbnail_url from ComicInfoBuilder import _pick_thumbnail_url
from PerfWebPage import render_perf_page
# Cross-link tabs shown on every perf page in the manga container.
_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
_INDEX_HTML = """<!doctype html> _INDEX_HTML = """<!doctype html>
@@ -71,7 +75,8 @@ _INDEX_HTML = """<!doctype html>
<button id="batchSave" class="primary">Save dirty (0)</button> <button id="batchSave" class="primary">Save dirty (0)</button>
<button id="build">Build all (rescan)</button> <button id="build">Build all (rescan)</button>
<button id="move">Start move</button> <button id="move">Start move</button>
<a href="/perf" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a> <button id="syncPersons">Sync persons</button>
<a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span> <span class="status" id="status"></span>
</div> </div>
@@ -342,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
btn.disabled = false; btn.disabled = false;
} }
}); });
document.getElementById("syncPersons").addEventListener("click", async () => {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
const btn = document.getElementById("syncPersons");
btn.disabled = true;
setStatus("Syncing persons… (running on the server)");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
const d = await r.json();
setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
+ d.not_found + " not found, " + d.conflicts + " conflicts");
} catch (err) {
setStatus("Person sync failed: " + err.message);
} finally {
btn.disabled = false;
}
});
for (const th of document.querySelectorAll("th.sortable")) { for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => { th.addEventListener("click", () => {
const col = th.dataset.col; const col = th.dataset.col;
@@ -358,133 +380,6 @@ load();
""" """
_PERF_HTML = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Move performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.3rem 0; }
summary { cursor:pointer; padding:.25rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Move performance <a href="/" style="font-size:.9rem;">◂ back to matches</a></h1>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
let runs = [];
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + name + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
function seriesBlock(s) {
let chapters = "";
// Chapters sorted slowest first to surface outliers.
const chs = (s.chapters || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
for (const c of chs) {
const steps = Object.entries(c.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ");
chapters += "<tr><td>" + c.chapter + (c.ok ? "" : " <span class=err>(failed)</span>") + "</td>"
+ "<td class='num'>" + fmtSecs(c.totalSeconds) + "</td>"
+ "<td>" + steps + "</td></tr>";
}
const seriesSteps = Object.entries(s.steps || {})
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ") || "";
return "<details><summary><b>" + s.title + "</b> "
+ "<span class=chip>" + fmtSecs(s.totalSeconds) + " · "
+ (s.chapterCount || 0) + " chapters · " + seriesSteps + "</span></summary>"
+ "<table><thead><tr><th>Chapter</th><th class=num>Total</th>"
+ "<th>Steps</th></tr></thead><tbody>" + chapters + "</tbody></table></details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.seriesCount + " series · " + run.chapterCount + " chapters";
let html = "<h2>Chapter steps (summed over all chapters)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Series steps (metadata / person sync)</h2>"
+ stepTable(run.seriesStepTotals, run.totalSeconds)
+ "<h2>Series detail</h2>";
const series = (run.series || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += series.map(seriesBlock).join("");
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")";
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf");
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
class MatchesWebApp: class MatchesWebApp:
@@ -497,16 +392,22 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *, def __init__(self, cache: MatchesCache, *,
mover=None, mover=None,
person_updater=None,
person_trigger: str = "web",
perf_stats=None, perf_stats=None,
host: str = "0.0.0.0", host: str = "0.0.0.0",
port: int = 8080): port: int = 8080):
self._cache = cache self._cache = cache
self._mover = mover self._mover = mover
self._perf = perf_stats self._person_updater = person_updater
self._person_trigger = person_trigger
# perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
self._perf = perf_stats or {}
self._host = host self._host = host
self._port = port self._port = port
self._build_lock = threading.Lock() self._build_lock = threading.Lock()
self._move_lock = threading.Lock() self._move_lock = threading.Lock()
self._person_lock = threading.Lock()
self._app = Flask(__name__) self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None self._thread: "threading.Thread | None" = None
self._register_routes() self._register_routes()
@@ -631,12 +532,30 @@ class MatchesWebApp:
self._move_lock.release() self._move_lock.release()
return jsonify({"results": results}) return jsonify({"results": results})
@app.get("/perf") @app.post("/api/persons/sync")
def perf_page() -> Response: def api_persons_sync():
return Response(_PERF_HTML, mimetype="text/html; charset=utf-8") if self._person_updater is None:
return Response("no person updater configured", status=503)
if not self._person_lock.acquire(blocking=False):
return Response("person sync already running", status=409)
try:
report = self._person_updater.update_all_persons(
trigger=self._person_trigger,
perf=self._perf.get("person"))
except Exception as exc:
return Response(f"person sync failed: {exc}", status=500)
finally:
self._person_lock.release()
return jsonify(report)
@app.get("/api/perf") # Perf pages: /perf (move) + /perf/<name> for the updaters.
def api_perf(): @app.get("/perf")
if self._perf is None: @app.get("/perf/<name>")
return jsonify({"runs": []}) def perf_page(name: str = "move") -> Response:
return jsonify(self._perf.all()) return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._perf.get(name)
return jsonify(stats.all() if stats is not None else {"runs": []})
-267
View File
@@ -1,267 +0,0 @@
"""
perf_stats.py
=============
Lightweight performance profiler for the Suwayomi -> Kavita move pipeline.
It records, per move run, how long each step of every chapter takes plus
per-series and per-run totals, so a slowdown can be traced to the step
responsible (cover download, image-dimension probing, CBZ packing, …).
Data model (one entry per run, newest first)::
{
"runs": [
{
"startedAt": 1700000000, # unix seconds
"finishedAt": 1700000123,
"totalSeconds": 123.4, # wall clock of the whole run
"seriesCount": 2,
"chapterCount": 31,
"stepTotals": { # summed over ALL chapters
"cover": 41.2, "image_dimensions": 55.8, "pack_cbz": 18.1, ...
},
"seriesStepTotals": { # summed over ALL series
"fetch_metadata": 2.4, "person_sync": 9.7
},
"series": [
{
"title": "Call of the Night",
"totalSeconds": 60.2,
"chapterCount": 20,
"steps": {"fetch_metadata": 1.2, "person_sync": 3.4},
"chapters": [
{"chapter": "1", "ok": true, "totalSeconds": 11.5,
"steps": {"cover": 1.8, "image_dimensions": 4.2, ...}}
]
}
]
}
]
}
Usage from the mover::
perf = PerfStats(path) # path=None -> disabled (no-op)
run = perf.begin_run()
series = run.begin_series("Title")
with series.measure("fetch_metadata"):
...
chap = series.begin_chapter("1")
with chap.measure("pack_cbz"):
...
chap.finish(ok=True)
series.finish()
run.finish() # persists the run to disk
When ``path`` is None every recorder is a no-op and nothing is written,
so the profiler can be left permanently wired in with negligible cost.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ChapterRecorder(_StepTimer):
"""Per-chapter step timer."""
def __init__(self, series: "SeriesRecorder", chapter: str,
enabled: bool = True):
super().__init__(enabled)
self._series = series
self._chapter = chapter
self._ok = True
def finish(self, *, ok: bool = True) -> None:
self._ok = ok
if not self._enabled:
return
self._series._chapters.append({
"chapter": self._chapter,
"ok": ok,
"totalSeconds": self.elapsed(),
"steps": self.steps,
})
class SeriesRecorder(_StepTimer):
"""Per-series step timer; also collects its chapters."""
def __init__(self, run: "RunRecorder", title: str, enabled: bool = True):
super().__init__(enabled)
self._run = run
self._title = title
self._chapters: list[dict] = []
def begin_chapter(self, chapter: str) -> ChapterRecorder:
return ChapterRecorder(self, chapter, enabled=self._enabled)
def finish(self) -> None:
if not self._enabled:
return
self._run._series.append({
"title": self._title,
"totalSeconds": self.elapsed(),
"chapterCount": len(self._chapters),
"steps": self.steps,
"chapters": self._chapters,
})
# Persist the run's progress after every series so a long run is
# observable live and survives a crash mid-run.
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full move run."""
def __init__(self, stats: "PerfStats", enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._series: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
# Stable identity so incremental flushes update the same run entry
# instead of inserting a duplicate on every series.
self._run_id = uuid.uuid4().hex
def begin_series(self, title: str) -> SeriesRecorder:
return SeriesRecorder(self, title, enabled=self._enabled)
def _snapshot(self) -> dict:
"""Aggregates the run's current state into a serialisable dict."""
step_totals: dict[str, float] = {}
series_step_totals: dict[str, float] = {}
chapter_count = 0
for s in self._series:
for step, secs in s["steps"].items():
series_step_totals[step] = round(
series_step_totals.get(step, 0.0) + secs, 4)
for ch in s["chapters"]:
chapter_count += 1
for step, secs in ch["steps"].items():
step_totals[step] = round(
step_totals.get(step, 0.0) + secs, 4)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"seriesCount": len(self._series),
"chapterCount": chapter_count,
"stepTotals": step_totals,
"seriesStepTotals": series_step_totals,
"series": self._series,
}
def flush(self) -> dict | None:
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> dict | None:
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self) -> RunRecorder:
return RunRecorder(self, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every series.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+11 -47
View File
@@ -64,8 +64,6 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from KavitaClient import KavitaClient
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache, _IMAGE_EXTS from CoverCache import CoverCache, _IMAGE_EXTS
@@ -306,9 +304,6 @@ class SuwayomiMover:
Expected layout: <root>/<Source>/<Title>/<Chapter N>/ Expected layout: <root>/<Source>/<Title>/<Chapter N>/
kavita_path : Root of the Kavita library. kavita_path : Root of the Kavita library.
Series sub-directories are created automatically. Series sub-directories are created automatically.
kavita_base_url : Kavita server URL — required only for person sync,
e.g. "http://192.168.2.2:5000".
kavita_api_key : Kavita API key — required only for person sync.
language : ComicInfo LanguageISO and SeriesSort language ("en"). language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests. request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack. delete_source : Remove the source chapter folder after successful pack.
@@ -316,14 +311,16 @@ class SuwayomiMover:
temporary cache, deleted at process exit. temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing. None perf_stats : Optional PerfStats instance for per-step timing. None
(default) disables profiling. (default) disables profiling.
Note: Kavita person sync is no longer done here — it runs as a separate,
global, id-based updater on its own cron schedule (KavitaPersonUpdater).
The mover only touches MangaBaka / MangaDex / MAL / AniList.
""" """
def __init__(self, def __init__(self,
suwayomi_path, suwayomi_path,
kavita_path, kavita_path,
*, *,
kavita_base_url: "str | None" = None,
kavita_api_key: "str | None" = None,
language: str = "en", language: str = "en",
request_timeout: int = 30, request_timeout: int = 30,
delete_source: bool = True, delete_source: bool = True,
@@ -357,16 +354,6 @@ class SuwayomiMover:
self._cover_cache = CoverCache( self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout) cover_cache_dir, session=session, request_timeout=request_timeout)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
kavita_client = KavitaClient(
kavita_base_url, kavita_api_key,
request_timeout=request_timeout)
self._person_updater = KavitaPersonUpdater(
kavita_client,
mal_resolver=self._mal,
al_resolver=self._al)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public API # Public API
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -502,7 +489,7 @@ class SuwayomiMover:
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _process_series_dir(self, manga_dir: Path, run=None) -> dict: def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
manga_title = manga_dir.name manga_title = manga_dir.name
series_rec = (run or self._perf.begin_run()).begin_series(manga_title) series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
chapter_dirs = sorted( chapter_dirs = sorted(
(d for d in manga_dir.iterdir() if d.is_dir()), (d for d in manga_dir.iterdir() if d.is_dir()),
@@ -597,27 +584,11 @@ class SuwayomiMover:
} }
_save_chapter_index(dest_series, chapter_index) _save_chapter_index(dest_series, chapter_index)
# Sync Kavita persons once per series. # Person sync no longer runs here — it has its own global,
# Both MAL and AniList IDs come from MangaBaka's source map; # id-based updater on a separate cron schedule (see
# AniList is used as fallback when MAL returns no characters/staff. # KavitaPersonUpdater.update_all_persons).
person_result: "dict | None" = None
if self._person_updater:
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
or self._mal.find_mal_id(builder_title))
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
if mal_id or al_id:
try:
with series_rec.measure("person_sync"):
person_result = self._person_updater.update_for_manga(
mal_id, al_manga_id=al_id)
print(f" Persons: chars={person_result['characters'].get('updated')} "
f"staff={person_result['staff'].get('updated')}")
except Exception as exc:
person_result = {"error": str(exc)}
print(f" Persons: ERROR {exc}")
series_rec.finish() series_rec.finish()
return {"chapters": chapter_results, "persons": person_result} return {"chapters": chapter_results}
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: chapter # Internal: chapter
@@ -637,8 +608,8 @@ class SuwayomiMover:
<Pages> element correctly points to the front cover). <Pages> element correctly points to the front cover).
""" """
cbz_path = dest_series / f"{chapter_dir.name}.cbz" cbz_path = dest_series / f"{chapter_dir.name}.cbz"
chap_rec = (series_rec or self._perf.begin_run().begin_series("") chap_rec = (series_rec or self._perf.begin_run().begin_item("")
).begin_chapter(chapter_num) ).begin_item(chapter_num)
# add_pages_from_folder records its own sub-steps on this recorder. # add_pages_from_folder records its own sub-steps on this recorder.
builder.perf = chap_rec builder.perf = chap_rec
ok = False ok = False
@@ -675,14 +646,9 @@ class SuwayomiMover:
# Usage example # Usage example
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
if __name__ == "__main__": if __name__ == "__main__":
import os
# Local (no-Docker) smoke test. Adjust paths to your environment. # Local (no-Docker) smoke test. Adjust paths to your environment.
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
SUWAYOMI_PATH = r"M:\config\downloads\mangas" SUWAYOMI_PATH = r"M:\config\downloads\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test" KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
# matches.json lives next to this script during local testing. # matches.json lives next to this script during local testing.
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json" MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
@@ -691,8 +657,6 @@ if __name__ == "__main__":
mover = SuwayomiMover( mover = SuwayomiMover(
SUWAYOMI_PATH, SUWAYOMI_PATH,
KAVITA_PATH, KAVITA_PATH,
kavita_base_url=KAVITA_URL,
kavita_api_key=KAVITA_KEY,
delete_source=False, delete_source=False,
matches_cache=matches_cache, matches_cache=matches_cache,
) )