Performance and Person Updater Improvements #7

Merged
johannesbot merged 4 commits from time-measurement into master 2026-06-16 18:46:56 +02:00
18 changed files with 1142 additions and 559 deletions
+7 -1
View File
@@ -11,12 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config
MANGA_WEB_PORT=8080 MANGA_WEB_PORT=8080
SETTLE_SECONDS=600 SETTLE_SECONDS=600
DELETE_SOURCE=true DELETE_SOURCE=true
# Periodic updaters (volume/cover + global person sync) run together on
# this cron. Sundays 10:00. Person updater also covers LN libraries.
UPDATER_ENABLED=true UPDATER_ENABLED=true
UPDATER_SCHEDULE=0 19 * * 1,4 UPDATER_SCHEDULE=0 10 * * 0
COVER_CACHE_PATH=/config/covers COVER_CACHE_PATH=/config/covers
PERF_PATH=/config/perf_stats.json
VOLUME_PERF_PATH=/config/volume_perf_stats.json
PERSON_PERF_PATH=/config/person_perf_stats.json
# Light-novel container (kavita-lightnovel-metadata-fetcher) # Light-novel container (kavita-lightnovel-metadata-fetcher)
HOST_LN_CONFIG_PATH=/path/to/ln-config HOST_LN_CONFIG_PATH=/path/to/ln-config
LN_WEB_PORT=8081 LN_WEB_PORT=8081
LN_LIBRARY_IDS=3,5 LN_LIBRARY_IDS=3,5
LN_UPDATER_ENABLED=true
+12 -5
View File
@@ -13,15 +13,18 @@ services:
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}" SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}" DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "/config/matches.json" MATCH_PATH: "/config/matches.json"
# Volume/cover back-fill updater # Periodic updaters (volume/cover back-fill + global person sync) run
# together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}" UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_LOG: "/config/volume_updater.log" UPDATER_LOG: "/config/volume_updater.log"
# Persistent cover cache (empty = temp dir, deleted on container stop) # Persistent cover cache (empty = temp dir, deleted on container stop)
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}" COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC # Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
# Timezone for the cron schedule — without this 10:00 means 10:00 UTC
TZ: "${TZ:-Europe/Berlin}" TZ: "${TZ:-Europe/Berlin}"
ports: ports:
- "${MANGA_WEB_PORT:-8080}:8080" - "${MANGA_WEB_PORT:-8080}:8080"
@@ -43,6 +46,10 @@ services:
LIBRARY_IDS: "${LN_LIBRARY_IDS}" LIBRARY_IDS: "${LN_LIBRARY_IDS}"
LANGUAGE: "${LANGUAGE:-en}" LANGUAGE: "${LANGUAGE:-en}"
MATCH_PATH: "/config/matches.json" MATCH_PATH: "/config/matches.json"
# Global person sync on cron (same default cadence as the manga side)
UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}"
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
TZ: "${TZ:-Europe/Berlin}" TZ: "${TZ:-Europe/Berlin}"
ports: ports:
- "${LN_WEB_PORT:-8081}:8080" - "${LN_WEB_PORT:-8081}:8080"
+33
View File
@@ -27,6 +27,12 @@ Environment variables
MATCH_PATH default /config/matches.json MATCH_PATH default /config/matches.json
WEB_PORT default 8080 WEB_PORT default 8080
WEB_HOST default 0.0.0.0 WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (run the person updater on cron)
UPDATER_SCHEDULE cron expression for the person updater,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
""" """
from __future__ import annotations from __future__ import annotations
@@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln"))
from MatchesCache import MatchesCache # noqa: E402 from MatchesCache import MatchesCache # noqa: E402
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402 from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -98,6 +113,10 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080) web_port = _env_int("WEB_PORT", 8080)
library_ids = _env_int_list("LIBRARY_IDS") library_ids = _env_int_list("LIBRARY_IDS")
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] kavita url = {kavita_url}", flush=True) print(f"[main] kavita url = {kavita_url}", flush=True)
print(f"[main] language = {language}", flush=True) print(f"[main] language = {language}", flush=True)
@@ -107,6 +126,7 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True) print(f"[main] web = {web_host}:{web_port}", flush=True)
cache = MatchesCache(match_path) cache = MatchesCache(match_path)
person_perf = PerfStats(person_perf_path)
orchestrator = LightNovelOrchestrator( orchestrator = LightNovelOrchestrator(
kavita_url=kavita_url, kavita_url=kavita_url,
kavita_api_key=kavita_api_key, kavita_api_key=kavita_api_key,
@@ -118,9 +138,22 @@ def main() -> int:
app = MatchesWebApp( app = MatchesWebApp(
cache, orchestrator=orchestrator, cache, orchestrator=orchestrator,
default_library_ids=library_ids, default_library_ids=library_ids,
person_perf=person_perf,
host=web_host, port=web_port, host=web_host, port=web_port,
) )
app.start() app.start()
if updater_enabled:
try:
CronRunner(
updater_schedule,
lambda: orchestrator.sync_persons(trigger="cron",
perf=person_perf),
name="person-updater").start()
except ValueError as exc:
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"scheduled person sync DISABLED", flush=True)
app.wait() app.wait()
return 0 return 0
+49 -13
View File
@@ -28,13 +28,19 @@ Environment variables
MATCH_PATH default /config/matches.json MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json) WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0 WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (volume/cover back-fill cron) UPDATER_ENABLED default true (run volume/cover + person updaters on cron)
UPDATER_SCHEDULE cron expression for the updater scans, UPDATER_SCHEDULE cron expression for the periodic updaters,
default "0 19 * * 1,4" = 19:00 every Mon + Thu default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!) (local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log UPDATER_LOG default /config/volume_updater.log
COVER_CACHE_PATH directory for the persistent cover cache; COVER_CACHE_PATH directory for the persistent cover cache;
empty (default) = temporary cache, deleted on exit empty (default) = temporary cache, deleted on exit
PERF_PATH JSON file for per-step move timing stats.
Default /config/perf_stats.json (empty disables it)
VOLUME_PERF_PATH JSON file for volume/cover updater timing.
Default /config/volume_perf_stats.json
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
""" """
from __future__ import annotations from __future__ import annotations
@@ -42,7 +48,6 @@ from __future__ import annotations
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
try: try:
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
@@ -61,6 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
from MatchesCache import MatchesCache # noqa: E402 from MatchesCache import MatchesCache # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
from KavitaClient import KavitaClient # noqa: E402
from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -104,9 +113,14 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080) web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True) updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4") updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log") updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
volume_perf_path = _env_str("VOLUME_PERF_PATH",
"/config/volume_perf_stats.json") or None
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True)
@@ -118,40 +132,62 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True) print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path) matches_cache = MatchesCache(match_path)
perf_move = PerfStats(perf_path)
perf_volume = PerfStats(volume_perf_path)
perf_person = PerfStats(person_perf_path)
mover = SuwayomiMover( mover = SuwayomiMover(
suwayomi_path, kavita_path, suwayomi_path, kavita_path,
kavita_base_url=kavita_url,
kavita_api_key=kavita_api_key,
language=language, language=language,
request_timeout=request_timeout, request_timeout=request_timeout,
delete_source=delete_source, delete_source=delete_source,
matches_cache=matches_cache, matches_cache=matches_cache,
cover_cache_dir=cover_cache_path, cover_cache_dir=cover_cache_path,
perf_stats=perf_move,
) )
# Standalone, global, id-based person updater (manga + LN libraries).
person_updater = None
if kavita_api_key:
kavita_client = KavitaClient(kavita_url, kavita_api_key,
request_timeout=request_timeout)
person_updater = KavitaPersonUpdater(kavita_client)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds) # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port) web_app = MatchesWebApp(
matches_cache, mover=mover,
person_updater=person_updater, person_trigger="web",
perf_stats={"move": perf_move, "volume": perf_volume,
"person": perf_person},
host=web_host, port=web_port)
web_app.start() web_app.start()
if updater_enabled: if updater_enabled:
try:
updater = KavitaVolumeCoverUpdater( updater = KavitaVolumeCoverUpdater(
kavita_path, kavita_path,
matches_cache=matches_cache, matches_cache=matches_cache,
language=language, language=language,
request_timeout=request_timeout, request_timeout=request_timeout,
log_path=updater_log, log_path=updater_log,
schedule=updater_schedule,
cover_cache_dir=cover_cache_path, cover_cache_dir=cover_cache_path,
perf_stats=perf_volume,
) )
updater.start()
def _scheduled_job():
updater.update_all()
if person_updater is not None:
person_updater.update_all_persons(trigger="cron",
perf=perf_person)
try:
CronRunner(updater_schedule, _scheduled_job,
name="updaters").start()
except ValueError as exc: except ValueError as exc:
# Invalid cron expression — keep the service up, just without # Invalid cron expression — keep the service up, just without
# the updater, and make the config error obvious in the logs. # the scheduled updaters, and surface the config error.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); " print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"volume/cover updater DISABLED", flush=True) f"scheduled updaters DISABLED", flush=True)
# watcher.start() # watcher.start()
# watcher.wait() # blocks until stop() is called via a signal # watcher.wait() # blocks until stop() is called via a signal
+87
View File
@@ -0,0 +1,87 @@
"""
cron_runner.py
==============
Runs a single callable on a cron schedule on a background thread.
Decouples *what* runs from *when*: both the manga container (volume/cover
updater + person updater) and the LN container (person updater) schedule
their work through this one helper, using a shared ``CronSchedule`` for the
``next_after`` arithmetic.
Usage::
runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00
runner.start()
...
runner.stop()
When the schedule string is invalid, the CronSchedule constructor raises
ValueError — the caller decides whether to disable the runner or fall back.
The schedule is evaluated in local time (set TZ inside the container).
"""
from __future__ import annotations
import threading
from datetime import datetime
from CronSchedule import CronSchedule
def _now() -> str:
return datetime.now().isoformat(timespec="seconds")
class CronRunner:
"""
Fires ``job()`` whenever the cron ``schedule`` elapses.
Parameters
----------
schedule : 5-field cron expression (see CronSchedule).
job : Zero-arg callable invoked on each scheduled tick. Exceptions
are caught and logged so a failing run does not kill the loop.
name : Thread name (for logs).
"""
def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
self._cron = CronSchedule(schedule)
self._job = job
self._name = name
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
def start(self) -> None:
"""Starts the scheduling thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name=self._name, daemon=True)
self._thread.start()
print(f"[{_now()}] [{self._name}] scheduled on "
f"cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Signals the loop to stop (a job already running finishes first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [{self._name}] next run: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
self._job()
except Exception as exc:
print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
+25
View File
@@ -204,6 +204,31 @@ class KavitaClient:
r.raise_for_status() r.raise_for_status()
return r.json() or [] return r.json() or []
def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
"""
Returns every PersonDto in the instance.
Pages through POST /api/Person/all (the browse endpoint) with an
empty filter until an empty page is returned — same paging pattern
as list_series_in_library.
"""
results: list[dict] = []
page = 1
while True:
r = self._session.post(
f"{self._base}/api/Person/all",
params={"PageNumber": page, "PageSize": page_size},
json={}, timeout=self._timeout)
r.raise_for_status()
chunk = r.json() or []
if not chunk:
break
results.extend(chunk)
if len(chunk) < page_size:
break
page += 1
return results
def update_person(self, payload: dict) -> None: def update_person(self, payload: dict) -> None:
"""Writes a person record (malId, aniListId, description, …).""" """Writes a person record (malId, aniListId, description, …)."""
r = self._session.post(f"{self._base}/api/Person/update", r = self._session.post(f"{self._base}/api/Person/update",
+131 -329
View File
@@ -2,407 +2,220 @@
kavita_person_updater.py kavita_person_updater.py
======================== ========================
Synchronises Kavita person / character records with MyAnimeList data. Synchronises Kavita character person-records with MyAnimeList / AniList data.
For every character and staff member that MAL knows about for a given manga Global, id-based mode
the updater: ---------------------
1. Searches Kavita for a matching Person record (by name similarity / Kavita person-records are created with a disambiguated name carrying the
alias match, configurable threshold). tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
2. Sets the MAL ID on the Kavita person if it is not yet linked. ComicInfo <Characters>; light novels: written by the metadata builder).
3. Uploads the MAL profile image when the cover is not locked and has ``update_all_persons`` walks **every** person in the Kavita instance, reads
not been set in a previous sync run. that id from the name, looks the character up on MAL / AniList by id, and
4. Populates the description field when Kavita has none and MAL provides writes back:
an 'about' text (requires an extra Jikan request per character; only
performed when update_descriptions=True). * the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
* a description (when the record has none),
* the profile image (when not locked and not already set).
Persons whose name carries no id (authors / staff, which are not
disambiguated) are skipped. A record already linked to a *different*
tracker id than its name says is reported as a conflict and left untouched.
This mode is format-independent (it only does id lookups, never title
searches) so a single pass covers both the manga and light-novel libraries.
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient` All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`). (`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
Tested against Kavita 0.9.0.2. Tested against Kavita 0.9.0.2.
""" """
from __future__ import annotations from __future__ import annotations
import datetime
import requests import requests
from KavitaClient import KavitaClient from KavitaClient import KavitaClient
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id from PerfStats import PerfStats
from TextUtils import paragraphs_to_html, parse_person_tracker_id
class KavitaPersonUpdater: class KavitaPersonUpdater:
""" """
Syncs Kavita Person records with MyAnimeList data. Syncs Kavita character person-records with MAL / AniList data, keyed by
the tracker id embedded in each person's name.
Parameters Parameters
---------- ----------
client : Shared KavitaClient (session, auth, cover uploads) client : Shared KavitaClient (session, auth, cover uploads).
mal_resolver : Shared MALResolver singleton (created automatically if omitted) mal_resolver : Shared MALResolver singleton (created if omitted).
al_resolver : Shared AniListResolver singleton (created automatically if omitted) al_resolver : Shared AniListResolver singleton (created if omitted).
min_name_score : Minimum difflib similarity ratio (01) required to accept a
Kavita person as a match for a MAL name. Default 0.80.
""" """
def __init__(self, client: KavitaClient, *, def __init__(self, client: KavitaClient, *,
mal_resolver: "MALResolver | None" = None, mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None, al_resolver: "AniListResolver | None" = None):
min_name_score: float = 0.80):
self._client = client self._client = client
self._min_score = min_name_score
self._mal = mal_resolver or MALResolver() self._mal = mal_resolver or MALResolver()
self._al = al_resolver or AniListResolver() self._al = al_resolver or AniListResolver()
# Cache: normalised name -> list of PersonDto dicts (best matches first)
self._person_search_cache: dict[str, list[dict]] = {}
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public: combined update # Public: global person sync
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def update_for_manga(self, mal_manga_id: "int | None", *, def update_all_persons(self, *,
al_manga_id: "int | None" = None, trigger: str = "cron",
perf: "PerfStats | None" = None,
update_covers: bool = True, update_covers: bool = True,
update_descriptions: bool = True) -> dict: update_descriptions: bool = True) -> dict:
""" """
Runs a full update pass for both characters and staff of the manga. Walks every Kavita person, syncing the ones whose name carries a
MAL is tried first; AniList is used as fallback when MAL returns nothing. tracker character id.
Returns Parameters
------- ----------
{ trigger : Source that started this run ("cron" | "web" | "ln") —
"characters": {"updated": n, "skipped": n, "not_found": n}, recorded in the perf-stats run meta.
"staff": {"updated": n, "skipped": n, "not_found": n}, perf : Optional PerfStats for per-person step timing.
}
Returns {"trigger", "updated", "skipped", "not_found",
"conflicts", "errors"}.
""" """
return { perf = perf or PerfStats(None)
"characters": self.update_characters( run = perf.begin_run(meta={"trigger": trigger})
mal_manga_id, al_manga_id=al_manga_id, result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
update_covers=update_covers, "not_found": 0, "conflicts": 0, "errors": []}
update_descriptions=update_descriptions),
"staff": self.update_staff(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
}
# ------------------------------------------------------------------ try:
# Public: character update persons = self._client.list_all_persons()
# ------------------------------------------------------------------ except requests.RequestException as exc:
def update_characters(self, mal_manga_id: "int | None", *, result["errors"].append(f"list persons failed: {exc}")
al_manga_id: "int | None" = None, run.finish()
update_covers: bool = True, return result
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList characters for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}. for person in persons:
""" name = (person.get("name") or "").strip()
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else [] parsed = parse_person_tracker_id(name)
resolver = self._mal if not parsed:
if not entries and al_manga_id: result["skipped"] += 1 # author/staff or un-tagged
entries = self._al.get_characters_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "character", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: staff update
# ------------------------------------------------------------------
def update_staff(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList staff for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_staff_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "staff", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears the Kavita person search cache."""
self._person_search_cache.clear()
# ------------------------------------------------------------------
# Internal: main sync loop
# ------------------------------------------------------------------
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
update_covers: bool,
update_descriptions: bool) -> dict:
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
"errors": []}
for entry in entries:
name = (entry.get("name") or "").strip()
raw_name = (entry.get("raw_name") or "").strip()
if not name and not raw_name:
continue continue
if kind == "character": source, tracker_id = parsed
# Characters are stored under their disambiguated name item = run.begin_item(name)
# ("Rem (MAL 118737)") — see person_name_with_id. The ok = True
# series metadata write creates the person under exactly try:
# this name, so only that form is searched. category = self._apply_to_person(
search_names = [person_name_with_id( person, source, tracker_id, item,
name, mal_id=entry.get("mal_id"),
al_id=entry.get("al_id"))]
else:
# Staff: cleaned (XML-safe) name first; if Kavita stores
# the legacy comma form, retry with the raw MAL name.
search_names = [name]
if raw_name and raw_name != name:
search_names.append(raw_name)
matches: list[dict] = []
for search_name in search_names:
if not search_name:
continue
matches = self._find_kavita_person(search_name)
if matches:
break
if not matches:
result["not_found"] += 1
continue
changed = self._apply_mal_data(
matches[0], entry, kind, resolver,
update_cover=update_covers, update_cover=update_covers,
update_desc=update_descriptions, update_desc=update_descriptions,
errors=result["errors"]) errors=result["errors"])
result["updated" if changed else "skipped"] += 1 result[category] += 1
ok = category != "conflicts"
except Exception as exc:
result["errors"].append(f"{name}: {exc}")
ok = False
finally:
item.finish(ok=ok)
run.finish()
print(f"[persons] trigger={trigger} updated={result['updated']} "
f"skipped={result['skipped']} not_found={result['not_found']} "
f"conflicts={result['conflicts']} errors={len(result['errors'])}",
flush=True)
return result return result
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: Kavita person search # Internal: apply tracker data to one person
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _find_kavita_person(self, name: str) -> list[dict]: def _apply_to_person(self, person: dict, source: str, tracker_id: int,
item, *, update_cover: bool, update_desc: bool,
errors: list) -> str:
""" """
Searches Kavita for persons matching `name`. Applies MAL/AniList character data to one Kavita person.
Returns the result category: "updated" | "skipped" | "not_found"
Checks both the main name and any stored aliases. | "conflicts".
Returns persons sorted by similarity, filtered by min_name_score.
Results are cached per (normalised) query name.
""" """
key = name.lower().strip() person_id = person.get("id")
if key in self._person_search_cache:
return self._person_search_cache[key]
try:
persons = self._client.search_persons(name)
except requests.RequestException:
self._person_search_cache[key] = []
return []
scored = []
for p in persons:
candidates = [p.get("name")] + list(p.get("aliases") or [])
scored.append((best_similarity(key, candidates), p))
scored.sort(key=lambda pair: pair[0], reverse=True)
filtered = [p for score, p in scored if score >= self._min_score]
self._person_search_cache[key] = filtered
return filtered
# ------------------------------------------------------------------
# Internal: apply MAL data to a single Kavita person
# ------------------------------------------------------------------
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
resolver, *,
update_cover: bool, update_desc: bool,
errors: "list | None" = None) -> bool:
"""
Applies tracker data (MAL or AniList) to one Kavita person record.
Fields updated
--------------
- malId : set when the entry carries a MAL ID and it differs
- aniListId : set when the entry carries an AniList ID and it differs
- description: set when empty and the tracker provides a description
- cover image: uploaded when not locked and no prior sync cover exists
Returns True if any change was made. Failures are appended to the
`errors` list (if provided) instead of being silently swallowed.
"""
person_id: "int | None" = person.get("id")
if not person_id: if not person_id:
return False return "skipped"
person_name = person.get("name") or "" resolver = self._mal if source == "mal" else self._al
id_field = "malId" if source == "mal" else "aniListId"
current = person.get(id_field) or 0
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id. # The name is authoritative; a record linked to a different id is a
mal_id: "int | None" = mal_entry.get("mal_id") # data conflict — never overwrite it.
al_id: "int | None" = mal_entry.get("al_id") if current and current != tracker_id:
entity_id = mal_id or al_id # used for resolver detail calls
current_mal_id: int = person.get("malId") or 0
current_al_id: int = person.get("aniListId") or 0
# Collision guard: the Kavita person is already linked to a
# *different* tracker entity — same display name, different
# character/person. Never overwrite; first writer wins.
if ((mal_id and current_mal_id and current_mal_id != mal_id)
or (al_id and current_al_id and current_al_id != al_id)):
if errors is not None:
errors.append( errors.append(
f"conflict: '{person_name}' (#{person_id}) is linked to " f"conflict: '{person.get('name')}' (#{person_id}) has "
f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} " f"{id_field}={current} but name says {tracker_id} — skipped")
f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} " return "conflicts"
f"— skipped")
return False
needs_mal_id = bool(mal_id and current_mal_id != mal_id) with item.measure("detail_fetch"):
needs_al_id = bool(al_id and current_al_id != al_id) details = resolver.get_character_details(tracker_id)
if not details:
return "not_found"
# ------ Lazy description fetch ----------------------------------- need_id = not current # write id when still missing
description: "str | None" = None description = None
if update_desc and not (person.get("description") or "").strip(): if update_desc and not (person.get("description") or "").strip():
if entity_id:
if kind == "character":
details = resolver.get_character_details(entity_id)
if details:
description = _build_character_description(details) or None description = _build_character_description(details) or None
else: need_desc = bool(description)
details = resolver.get_person_details(entity_id)
if details:
description = _build_person_description(details) or None
needs_desc = bool(description)
# ------ Metadata update ------------------------------------------
changed = False changed = False
if needs_mal_id or needs_al_id or needs_desc: if need_id or need_desc:
payload: dict = { payload = {
"id": person_id, "id": person_id,
"name": person_name, "name": person.get("name") or "",
# MUST stay a boolean — the cover image itself is uploaded # MUST stay a boolean — the cover is uploaded separately.
# separately via POST /api/Upload/person (below). Putting a
# URL here makes Kavita reject the whole payload with HTTP 400.
"coverImageLocked": bool(person.get("coverImageLocked", False)), "coverImageLocked": bool(person.get("coverImageLocked", False)),
"aliases": person.get("aliases") or [], "aliases": person.get("aliases") or [],
"description": description or person.get("description"), "description": description or person.get("description"),
"malId": mal_id if needs_mal_id else (current_mal_id or None), "malId": tracker_id if source == "mal"
"aniListId": al_id if needs_al_id else (current_al_id or None), else (person.get("malId") or None),
"aniListId": tracker_id if source == "al"
else (person.get("aniListId") or None),
} }
try: try:
with item.measure("person_update"):
self._client.update_person(payload) self._client.update_person(payload)
changed = True changed = True
except requests.RequestException as e: except requests.RequestException as exc:
if errors is not None: errors.append(f"update failed #{person_id} "
errors.append( f"'{person.get('name')}': {exc}")
f"Person/update failed for #{person_id} "
f"'{person_name}': {e}")
# ------ Cover image upload ---------------------------------------- # Cover: upload when not locked and not already set for this id.
# Upload whenever:
# - caller requested cover updates
# - cover is NOT locked (user did not manually pin it)
# - we have not already uploaded this exact tracker entity's image
# (i.e. the tracked ID differs OR there is no cover yet).
if update_cover and not person.get("coverImageLocked"): if update_cover and not person.get("coverImageLocked"):
image_url = mal_entry.get("image_url") image_url = details.get("image_url")
already_uploaded = ( already = bool(current) and bool(person.get("coverImage"))
entity_id is not None if image_url and not already:
and (current_mal_id == mal_id or current_al_id == al_id)
and bool(person.get("coverImage"))
)
if image_url and not already_uploaded:
try: try:
with item.measure("cover_upload"):
self._client.upload_person_cover(person_id, image_url) self._client.upload_person_cover(person_id, image_url)
changed = True changed = True
except requests.RequestException as e: except requests.RequestException as exc:
if errors is not None: errors.append(f"cover upload failed #{person_id} "
errors.append( f"'{person.get('name')}': {exc}")
f"cover upload failed for #{person_id} "
f"'{person_name}' ({image_url}): {e}")
return changed return "updated" if changed else "skipped"
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Module helpers: description builders # Module helper: character description builder
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
def _format_birthday(birthday: str) -> str:
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
if not birthday:
return ""
try:
dt = datetime.date.fromisoformat(birthday.split("T")[0])
return f"{dt.day} {dt.strftime('%B %Y')}"
except (ValueError, AttributeError):
return ""
def _build_character_description(details: dict) -> str: def _build_character_description(details: dict) -> str:
""" """
Builds a Kavita-safe HTML description for a MAL character. Builds a Kavita-safe HTML description for a MAL / AniList character.
Top line: "Favorites: N" as a link to the character's MAL page. Top line: "Favorites: N" linked to the character page (when available).
Remainder: the character's `about` text converted to HTML paragraphs. Remainder: the character's `about` text converted to HTML paragraphs.
""" """
parts: list[str] = [] parts: list[str] = []
url = details.get("url") or "" url = details.get("url") or ""
favorites = details.get("favorites") favorites = details.get("favorites")
if url and favorites is not None: if url and favorites is not None:
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>') parts.append(f'<p><a href="{url}" target="_blank">'
about = (details.get("about") or "").strip() f'Favorites: {favorites:,}</a></p>')
if about:
parts.append(paragraphs_to_html(about))
return "<br>".join(parts)
def _build_person_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
Renders a summary table (given name, family name, birthday, website,
member favorites) followed by the `about` biography as HTML paragraphs.
"""
_TD = 'style="padding-right:1.5em"'
rows: list[str] = []
given = (details.get("given_name") or "").strip()
family = (details.get("family_name") or "").strip()
birthday = details.get("birthday") or ""
favorites = details.get("favorites")
website = (details.get("website_url") or "").strip()
url = (details.get("url") or "").strip()
if given:
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
if family:
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
bday_str = _format_birthday(birthday)
if bday_str:
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
if website:
rows.append(
f'<tr><td {_TD}>Website</td>'
f'<td><a href="{website}">{website}</a></td></tr>'
)
if favorites is not None:
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
else f"{favorites:,}")
rows.append(
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
parts: list[str] = []
if rows:
parts.append(f'<table>{"".join(rows)}</table>')
about = (details.get("about") or "").strip() about = (details.get("about") or "").strip()
if about: if about:
parts.append(paragraphs_to_html(about)) parts.append(paragraphs_to_html(about))
@@ -418,18 +231,7 @@ if __name__ == "__main__":
client = KavitaClient(os.environ["KAVITA_URL"], client = KavitaClient(os.environ["KAVITA_URL"],
os.environ["KAVITA_API_KEY"]) os.environ["KAVITA_API_KEY"])
updater = KavitaPersonUpdater(client) updater = KavitaPersonUpdater(client)
report = updater.update_all_persons(trigger="cron")
mal = MALResolver() print(report)
mal_id = mal.find_mal_id("よふかしのうた") for err in report["errors"]:
print("MAL ID:", mal_id) print(" ", err)
if mal_id:
result = updater.update_for_manga(mal_id)
print("Characters:", {k: v for k, v in result["characters"].items()
if k != "errors"})
print("Staff :", {k: v for k, v in result["staff"].items()
if k != "errors"})
# Surface any non-fatal upload / API errors for debugging
for section in ("characters", "staff"):
for err in result[section].get("errors", []):
print(f"[{section}] {err}")
+7 -7
View File
@@ -151,9 +151,10 @@ class MangaBakaWorksResolver:
Returns volume-level works for a series, filtered to those that have Returns volume-level works for a series, filtered to those that have
a usable cover image. a usable cover image.
Non-empty results are cached per series; empty results are not, so Results are cached per series — including empty results, so a series
works added on MangaBaka later become visible without restarting without works is not re-paginated for every chapter of a move run.
the (long-running) process. The periodic cover updater calls clear_cache() before each scan, so
works added on MangaBaka later are still picked up there.
""" """
if not series_id: if not series_id:
return [] return []
@@ -165,7 +166,6 @@ class MangaBakaWorksResolver:
# Discard works that carry no usable cover # Discard works that carry no usable cover
works_with_cover = [w for w in all_works if w.get("images")] works_with_cover = [w for w in all_works if w.get("images")]
if works_with_cover:
self._cache[series_id] = works_with_cover self._cache[series_id] = works_with_cover
return works_with_cover return works_with_cover
@@ -228,9 +228,9 @@ class MangaBakaWorksResolver:
if url: if url:
result[norm] = url result[norm] = url
# Empty results are not cached — covers added on MangaBaka later # Cache even an empty result so a series without volume images is not
# become visible without restarting the long-running process. # re-paginated for every chapter. The periodic cover updater clears
if result: # this cache before each scan, so newly added images are still found.
self._images_cache[series_id] = result self._images_cache[series_id] = result
return result return result
+254
View File
@@ -0,0 +1,254 @@
"""
perf_stats.py
=============
Generic run/step performance profiler with JSON persistence, shared by the
move pipeline and the periodic updaters (volume/cover, persons).
Each run is a tree of *items* (e.g. series -> chapter, or one person) and
every item carries named *step* timings. A run also carries free-form
``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
updater).
Data model (one entry per run, newest first)::
{
"runs": [
{
"runId": "",
"startedAt": 1700000000,
"finishedAt": 1700000123,
"totalSeconds": 123.4,
"meta": {"trigger": "cron"},
"itemCount": 2, # top-level items
"leafCount": 31, # items without children
"stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...},
"items": [
{"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
"steps": {"fetch_metadata": 1.2},
"items": [
{"label": "1", "totalSeconds": 11.5, "ok": true,
"steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
]}
]
}
]
}
Usage::
perf = PerfStats(path) # path=None -> disabled
run = perf.begin_run(meta={"trigger": "cron"})
item = run.begin_item("Call of the Night")
with item.measure("fetch_metadata"):
...
chap = item.begin_item("1")
with chap.measure("pack_cbz"):
...
chap.finish()
item.finish() # flushes the run to disk
run.finish()
When ``path`` is None every recorder is a no-op and nothing is written, so
the profiler can be left permanently wired in at negligible cost. The run
is flushed after every top-level item finishes, so a long run is observable
live and survives a crash mid-run.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ItemRecorder(_StepTimer):
"""
One node in a run's item tree. Has its own step timings and may contain
nested child items (e.g. a series item containing chapter items).
"""
def __init__(self, run: "RunRecorder", label: str, *,
parent: "ItemRecorder | None" = None,
enabled: bool = True):
super().__init__(enabled)
self._run = run
self._label = label
self._parent = parent
self._children: list[dict] = []
def begin_item(self, label: str) -> "ItemRecorder":
return ItemRecorder(self._run, label, parent=self,
enabled=self._enabled)
def finish(self, *, ok: bool = True) -> None:
if not self._enabled:
return
node = {
"label": self._label,
"totalSeconds": self.elapsed(),
"ok": ok,
"steps": self.steps,
"items": self._children,
}
if self._parent is not None:
self._parent._children.append(node)
else:
# Top-level item: attach to the run and persist progress.
self._run._items.append(node)
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full run."""
def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._meta = meta or {}
self._items: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
self._run_id = uuid.uuid4().hex
def begin_item(self, label: str) -> ItemRecorder:
return ItemRecorder(self, label, parent=None, enabled=self._enabled)
def _snapshot(self) -> dict:
step_totals: dict[str, float] = {}
leaf_count = 0
def walk(node: dict) -> None:
nonlocal leaf_count
for step, secs in node["steps"].items():
step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
if node["items"]:
for child in node["items"]:
walk(child)
else:
leaf_count += 1
for item in self._items:
walk(item)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"meta": self._meta,
"itemCount": len(self._items),
"leafCount": leaf_count,
"stepTotals": step_totals,
"items": self._items,
}
def flush(self) -> "dict | None":
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> "dict | None":
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
return RunRecorder(self, meta=meta, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every item.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+160
View File
@@ -0,0 +1,160 @@
"""
perf_web_page.py
================
Shared HTML page for browsing PerfStats output, used by both container web
UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads
``/api/perf/<name>`` and renders each run's step totals plus the nested item
tree (series -> chapter, or one person, …) and the run trigger from meta.
``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
available perf datasets in that container.
"""
from __future__ import annotations
_PERF_PAGE = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>__PERF_NAME__ performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.tabs { margin-bottom:1rem; }
.tabs a { margin-right:1rem; }
.tabs a.active { font-weight:bold; text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.2rem 0 .2rem 1rem; }
summary { cursor:pointer; padding:.2rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
<div class="tabs">__PERF_TABS__</div>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
const PERF_NAME = "__PERF_NAME__";
let runs = [];
for (const a of document.querySelectorAll(".tabs a")) {
if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
}
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function esc(s) {
return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
}
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + esc(name) + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
// Renders one item node (and its children) as a nested <details> block.
function itemNode(it) {
const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "";
const head = "<summary><b>" + esc(it.label) + "</b>"
+ (it.ok === false ? " <span class=err>(failed)</span>" : "")
+ " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
const body = kids.map(itemNode).join("");
return "<details>" + head + body + "</details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
let html = "<h2>Steps (summed over all items)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Detail</h2>";
const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")" + trig;
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf/" + PERF_NAME);
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
"""
Returns the perf page HTML for dataset ``name``.
tabs : list of (label, dataset_name) for the cross-link bar.
"""
tab_html = " ".join(
f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
return (_PERF_PAGE
.replace("__PERF_TABS__", tab_html)
.replace("__PERF_NAME__", name))
+27
View File
@@ -70,3 +70,30 @@ def person_name_with_id(name: str, *,
if al_id: if al_id:
return f"{name} (AL {al_id})" return f"{name} (AL {al_id})"
return name return name
# Matches the suffix produced by person_name_with_id at the end of a name.
_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
"""
Inverse of person_name_with_id: extracts the tracker id from a
disambiguated Kavita person name.
"Rem (MAL 118737)" -> ("mal", 118737)
"Subaru (AL 88311)" -> ("al", 88311)
"Kotoyama" -> None (no id suffix — e.g. an author/staff record)
Returns ("mal" | "al", id) or None.
"""
if not name:
return None
m = _TRACKER_ID_RE.search(name)
if not m:
return None
source = "mal" if m.group(1).upper() == "MAL" else "al"
try:
return source, int(m.group(2))
except ValueError:
return None
+14 -9
View File
@@ -192,14 +192,9 @@ class LightNovelOrchestrator:
except Exception as exc: except Exception as exc:
return {"ok": False, "error": f"series update failed: {exc}"} return {"ok": False, "error": f"series update failed: {exc}"}
# Persons # Person sync no longer runs per series — it has its own global,
try: # id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
person_report = self._person_updater.update_for_manga( # on a separate cron schedule.
built.get("malId"),
al_manga_id=built.get("anilistId"),
)
except Exception as exc:
person_report = {"error": str(exc)}
# Relationships + collection # Relationships + collection
try: try:
@@ -221,10 +216,20 @@ class LightNovelOrchestrator:
"title": cached_title, "title": cached_title,
"mangabakaId": built.get("mangabakaId"), "mangabakaId": built.get("mangabakaId"),
"series": series_report, "series": series_report,
"persons": person_report,
"relationships": relation_report, "relationships": relation_report,
} }
# ------------------------------------------------------------------
# Person sync (global, id-based — independent of series updates)
# ------------------------------------------------------------------
def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
"""
Runs the global, id-based person updater over every Kavita person.
Covers both manga and light-novel libraries in one pass.
"""
return self._person_updater.update_all_persons(
trigger=trigger, perf=perf)
def update_all(self, library_ids: "list[int] | None") -> dict: def update_all(self, library_ids: "list[int] | None") -> dict:
"""Updates every cached series in the given libraries.""" """Updates every cached series in the given libraries."""
if library_ids is None: if library_ids is None:
+51
View File
@@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from LightNovelMetadataBuilder import pick_thumbnail_url from LightNovelMetadataBuilder import pick_thumbnail_url
from PerfWebPage import render_perf_page
# Only the person dataset exists in the LN container.
_PERF_TABS = [("persons", "person")]
def _int_list(values) -> list[int]: def _int_list(values) -> list[int]:
@@ -97,7 +101,9 @@ _INDEX_HTML = r"""<!doctype html>
<button id="reload">Reload</button> <button id="reload">Reload</button>
<button id="build">Match all in libraries</button> <button id="build">Match all in libraries</button>
<button id="updateAll" class="success">Update all in libraries</button> <button id="updateAll" class="success">Update all in libraries</button>
<button id="syncPersons">Sync persons</button>
<button id="batchSave" class="primary">Save dirty (0)</button> <button id="batchSave" class="primary">Save dirty (0)</button>
<a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span> <span class="status" id="status"></span>
</div> </div>
@@ -497,12 +503,25 @@ async function startUpdateAll() {
} }
} }
async function startSyncPersons() {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
setStatus("Person sync started");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Person sync failed: " + err.message);
}
}
document.getElementById("filter").addEventListener("input", applyFilter); document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("libraries").addEventListener("change", applyFilter); document.getElementById("libraries").addEventListener("change", applyFilter);
document.getElementById("reload").addEventListener("click", load); document.getElementById("reload").addEventListener("click", load);
document.getElementById("batchSave").addEventListener("click", batchSave); document.getElementById("batchSave").addEventListener("click", batchSave);
document.getElementById("build").addEventListener("click", startBuild); document.getElementById("build").addEventListener("click", startBuild);
document.getElementById("updateAll").addEventListener("click", startUpdateAll); document.getElementById("updateAll").addEventListener("click", startUpdateAll);
document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
for (const th of document.querySelectorAll("th.sortable")) { for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => { th.addEventListener("click", () => {
const col = th.dataset.col; const col = th.dataset.col;
@@ -581,11 +600,13 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *, def __init__(self, cache: MatchesCache, *,
orchestrator=None, orchestrator=None,
default_library_ids: "list[int] | None" = None, default_library_ids: "list[int] | None" = None,
person_perf=None,
host: str = "0.0.0.0", host: str = "0.0.0.0",
port: int = 8080): port: int = 8080):
self._cache = cache self._cache = cache
self._orchestrator = orchestrator self._orchestrator = orchestrator
self._defaults = list(default_library_ids or []) self._defaults = list(default_library_ids or [])
self._person_perf = person_perf
self._host = host self._host = host
self._port = port self._port = port
self._job = _JobState() self._job = _JobState()
@@ -757,8 +778,38 @@ class MatchesWebApp:
return Response("a job is already running", status=409) return Response("a job is already running", status=409)
return jsonify({"started": label}) return jsonify({"started": label})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
def task(job: _JobState):
report = self._orchestrator.sync_persons(
trigger="ln", perf=self._person_perf)
job.append(f"updated={report['updated']} "
f"skipped={report['skipped']} "
f"not_found={report['not_found']} "
f"conflicts={report['conflicts']}")
for err in report.get("errors", []):
job.append(f" {err}")
if not self._job.start("person sync", task):
return Response("a job is already running", status=409)
return jsonify({"started": "person sync"})
@app.get("/api/status") @app.get("/api/status")
def api_status(): def api_status():
snap = self._job.snapshot() snap = self._job.snapshot()
snap["defaults"] = self._defaults snap["defaults"] = self._defaults
return jsonify(snap) return jsonify(snap)
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "person") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._person_perf if name == "person" else None
return jsonify(stats.all() if stats is not None else {"runs": []})
+55
View File
@@ -40,6 +40,7 @@ from __future__ import annotations
import re import re
import sys import sys
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from contextlib import contextmanager
from pathlib import Path from pathlib import Path
import requests import requests
@@ -65,6 +66,16 @@ except ImportError:
_HAS_PIL = False _HAS_PIL = False
@contextmanager
def _no_measure():
"""No-op stand-in for a perf recorder's measure() context manager."""
yield
# Sentinel marking a per-chapter memo slot as "not computed yet".
_UNSET = object()
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Constants # Constants
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
@@ -218,10 +229,22 @@ class ComicInfoBuilder:
self._matches_cache = matches_cache self._matches_cache = matches_cache
self._cover_cache = cover_cache or _default_cover_cache() self._cover_cache = cover_cache or _default_cover_cache()
# Optional performance recorder (duck-typed: any object with a
# .measure(name) context manager). The mover sets this per chapter;
# when None, _measure() is a no-op so the builder stays decoupled
# from PerfStats and works standalone (e.g. the cover updater).
self.perf = None
self._metadata: "dict | None" = None self._metadata: "dict | None" = None
self._pages: list[dict] = [] self._pages: list[dict] = []
self._cover_path: "Path | None" = None self._cover_path: "Path | None" = None
self._suwayomi_data: dict = {} self._suwayomi_data: dict = {}
# Per-chapter memo for _determine_volume (resolved up to 3x/chapter
# otherwise: cover download, explicit volume step, XML build).
self._volume_memo = _UNSET
# Per-series cache for full series fetches by id (parent series for
# SeriesGroup, merged-series redirects) — reused across all chapters.
self._series_by_id_cache: dict[str, dict] = {}
# ----- Repr ----------------------------------------------------------- # ----- Repr -----------------------------------------------------------
def __repr__(self) -> str: def __repr__(self) -> str:
@@ -261,6 +284,13 @@ class ComicInfoBuilder:
self._pages = [] self._pages = []
self._cover_path = None self._cover_path = None
self._suwayomi_data = {} self._suwayomi_data = {}
self._volume_memo = _UNSET
def _measure(self, name: str):
"""Times a named step on the attached recorder; no-op when unset."""
if self.perf is not None:
return self.perf.measure(name)
return _no_measure()
# ====================================================================== # ======================================================================
# Public XML functions # Public XML functions
@@ -305,10 +335,12 @@ class ComicInfoBuilder:
if not folder.is_dir(): if not folder.is_dir():
raise NotADirectoryError(f"Folder not found: {folder}") raise NotADirectoryError(f"Folder not found: {folder}")
with self._measure("read_comicinfo"):
self._suwayomi_data = self._read_existing_comicinfo(folder) self._suwayomi_data = self._read_existing_comicinfo(folder)
self._cover_path = None self._cover_path = None
if download_cover: if download_cover:
with self._measure("cover"):
self._cover_path = self._download_cover(folder, cover_filename) self._cover_path = self._download_cover(folder, cover_filename)
cover_resolved = self._cover_path.resolve() if self._cover_path else None cover_resolved = self._cover_path.resolve() if self._cover_path else None
@@ -329,6 +361,9 @@ class ComicInfoBuilder:
ordered.extend((img, "Story") for img in story_images) ordered.extend((img, "Story") for img in story_images)
self._pages = [] self._pages = []
# Probing every page for its pixel dimensions reads each file — on a
# network share this is often the dominant per-chapter cost.
with self._measure("image_dimensions"):
for index, (img_path, page_type) in enumerate(ordered): for index, (img_path, page_type) in enumerate(ordered):
width, height = self._image_dimensions(img_path) width, height = self._image_dimensions(img_path)
try: try:
@@ -413,12 +448,20 @@ class ComicInfoBuilder:
return series return series
def _fetch_series_by_id(self, series_id) -> dict: def _fetch_series_by_id(self, series_id) -> dict:
# Cached per builder (i.e. per series): SeriesGroup resolution calls
# this for the parent on every chapter — without the cache that is
# one MangaBaka request per chapter for the same parent id.
key = str(series_id)
cached = self._series_by_id_cache.get(key)
if cached is not None:
return cached
url = f"{self.api_base_url}/series/{series_id}" url = f"{self.api_base_url}/series/{series_id}"
resp = self._session.get(url, timeout=self.request_timeout) resp = self._session.get(url, timeout=self.request_timeout)
resp.raise_for_status() resp.raise_for_status()
data = resp.json().get("data") data = resp.json().get("data")
if not data: if not data:
raise RuntimeError(f"Series with ID {series_id} not found.") raise RuntimeError(f"Series with ID {series_id} not found.")
self._series_by_id_cache[key] = data
return data return data
# ====================================================================== # ======================================================================
@@ -554,6 +597,18 @@ class ComicInfoBuilder:
# Volume determination # Volume determination
# ====================================================================== # ======================================================================
def _determine_volume(self) -> "str | None": def _determine_volume(self) -> "str | None":
"""
Resolves the volume for the current chapter, memoized per chapter.
The result is reused across the three call sites per chapter (cover
download, explicit volume step, XML build); the memo is cleared
whenever the chapter or manga title changes (see _clear_results).
"""
if self._volume_memo is _UNSET:
self._volume_memo = self._resolve_volume()
return self._volume_memo
def _resolve_volume(self) -> "str | None":
""" """
Resolves the volume for the current chapter via MangaDex. Resolves the volume for the current chapter via MangaDex.
Falls back to estimation when the chapter is absent from MangaDex. Falls back to estimation when the chapter is absent from MangaDex.
+27 -63
View File
@@ -45,7 +45,6 @@ from __future__ import annotations
import io import io
import sys import sys
import threading
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import zipfile import zipfile
from datetime import datetime from datetime import datetime
@@ -67,7 +66,7 @@ from MatchesCache import MatchesCache
from SuwayomiMover import (_load_chapter_index, _save_chapter_index, from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value) _sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule from PerfStats import PerfStats
from CoverCache import CoverCache, _IMAGE_EXTS from CoverCache import CoverCache, _IMAGE_EXTS
try: try:
@@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater:
request_timeout : HTTP timeout in seconds. request_timeout : HTTP timeout in seconds.
log_path : File that receives one line per updated chapter. log_path : File that receives one line per updated chapter.
Default: <kavita_path>/volume_updater.log Default: <kavita_path>/volume_updater.log
schedule : Cron expression (5 fields) defining when scans run,
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time — set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
cover_cache_dir : Directory for the persistent cover cache. None -> cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit. temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing.
Scheduling lives outside this class (see CronRunner); call update_all()
on whatever cadence you like.
""" """
def __init__(self, def __init__(self,
@@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater:
request_timeout: int = 30, request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1", api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None, log_path=None,
schedule: str = "0 19 * * 1,4", cover_cache_dir=None,
cover_cache_dir=None): perf_stats: "PerfStats | None" = None):
self._dst = Path(kavita_path) self._dst = Path(kavita_path)
self._matches_cache = matches_cache self._matches_cache = matches_cache
self._language = language self._language = language
@@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater:
self._api_base_url = api_base_url.rstrip("/") self._api_base_url = api_base_url.rstrip("/")
self._log_path = (Path(log_path) if log_path self._log_path = (Path(log_path) if log_path
else self._dst / "volume_updater.log") else self._dst / "volume_updater.log")
self._cron = CronSchedule(schedule) self._perf = perf_stats or PerfStats(None)
session = requests.Session() session = requests.Session()
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0") session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater:
self._cover_cache = CoverCache( self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout) cover_cache_dir, session=session, request_timeout=request_timeout)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
# ------------------------------------------------------------------
# Cron API (mirrors SuwayomiFolderWatcher)
# ------------------------------------------------------------------
def start(self) -> None:
"""Starts the periodic scan thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
self._thread.start()
print(f"[{_now()}] [updater] scanning {self._dst} "
f"on cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Stops the scan thread (current scan finishes its series first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [updater] next scheduled scan: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
summary = self.update_all()
print(f"[{_now()}] [updater] scan done: "
f"{summary['series_updated']} series / "
f"{summary['chapters_updated']} chapters updated",
flush=True)
except Exception as exc:
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public scan API # Public scan API
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater:
self._vol_resolver.clear_cache() self._vol_resolver.clear_cache()
self._works_resolver.clear_cache() self._works_resolver.clear_cache()
run = self._perf.begin_run()
try:
for series_dir in sorted(self._dst.iterdir()): for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
if not series_dir.is_dir(): if not series_dir.is_dir():
continue continue
summary["series_scanned"] += 1 summary["series_scanned"] += 1
try: try:
updated = self.update_series(series_dir) updated = self.update_series(series_dir, run)
except Exception as exc: except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True) print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue continue
if updated: if updated:
summary["series_updated"] += 1 summary["series_updated"] += 1
summary["chapters_updated"] += updated summary["chapters_updated"] += updated
finally:
run.finish()
return summary return summary
def update_series(self, series_dir: Path) -> int: def update_series(self, series_dir: Path, run=None) -> int:
""" """
Updates one series folder. Returns the number of updated chapters. Updates one series folder. Returns the number of updated chapters.
@@ -300,8 +256,11 @@ class KavitaVolumeCoverUpdater:
md = builder.fetch_metadata() md = builder.fetch_metadata()
series_id = str(md.get("id") or "") series_id = str(md.get("id") or "")
series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
# Resolve volumes for all null-volume chapters first (API only). # Resolve volumes for all null-volume chapters first (API only).
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None} updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
with series_rec.measure("resolve_volumes"):
for num in sorted(missing, key=_chapter_sort_value): for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num builder.chapter = num
try: try:
@@ -314,6 +273,7 @@ class KavitaVolumeCoverUpdater:
"cover": self._fetch_cover(series_id, volume)} "cover": self._fetch_cover(series_id, volume)}
if not updates: if not updates:
series_rec.finish(ok=True)
return 0 return 0
first = min(chapters, key=_chapter_sort_value) first = min(chapters, key=_chapter_sort_value)
@@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater:
continue continue
# The first chapter gets a full metadata rebuild (Kavita reads # The first chapter gets a full metadata rebuild (Kavita reads
# series metadata from it); other chapters only a volume edit. # series metadata from it); other chapters only a volume edit.
chap_rec = series_rec.begin_item(num)
with chap_rec.measure("archive_rewrite"):
ok, cover_swapped = self._apply_update( ok, cover_swapped = self._apply_update(
cbz, builder, num, cbz, builder, num,
volume=up["volume"], cover=up["cover"], volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first)) full_rebuild=(num == first))
chap_rec.finish(ok=ok)
if not ok: if not ok:
continue continue
entry["volume"] = _normalise_volume_value(up["volume"]) entry["volume"] = _normalise_volume_value(up["volume"])
@@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater:
first_entry = chapters.get(first) or {} first_entry = chapters.get(first) or {}
cbz = series_dir / (first_entry.get("archiveName") or "") cbz = series_dir / (first_entry.get("archiveName") or "")
if first_entry.get("archiveName") and cbz.is_file(): if first_entry.get("archiveName") and cbz.is_file():
chap_rec = series_rec.begin_item(f"{first} (refresh)")
with chap_rec.measure("archive_rewrite"):
ok, _ = self._apply_update( ok, _ = self._apply_update(
cbz, builder, first, cbz, builder, first,
volume=None, cover=None, full_rebuild=True) volume=None, cover=None, full_rebuild=True)
chap_rec.finish(ok=ok)
if ok: if ok:
self._log(f"{series_dir.name} | chapter {first} | " self._log(f"{series_dir.name} | chapter {first} | "
f"first-chapter metadata refreshed | {cbz.name}") f"first-chapter metadata refreshed | {cbz.name}")
if updated: if updated:
_save_chapter_index(series_dir, index) _save_chapter_index(series_dir, index)
series_rec.finish(ok=True)
return updated return updated
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -545,10 +512,7 @@ if __name__ == "__main__":
matches_cache=MatchesCache(MATCHES_PATH), matches_cache=MatchesCache(MATCHES_PATH),
) )
# One-shot scan (no cron thread): # One-shot scan. Scheduling is handled externally via CronRunner
# (see main_manga.py).
summary = updater.update_all() summary = updater.update_all()
print(f"\n[updater] {summary}") print(f"\n[updater] {summary}")
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
# updater.start()
# updater.wait()
+16 -1
View File
@@ -93,6 +93,9 @@ class MangaDexVolumeResolver:
self._cache: dict[str, dict] = {} self._cache: dict[str, dict] = {}
# Cache: manga_id -> {relation_type: [title, ...]} # Cache: manga_id -> {relation_type: [title, ...]}
self._relations_cache: dict[str, dict] = {} self._relations_cache: dict[str, dict] = {}
# Cache: title_lower -> manga_id (or None) — avoids repeating the
# MangaDex search for every chapter of the same series.
self._id_cache: dict[str, "str | None"] = {}
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
# Locate the manga ID # Locate the manga ID
@@ -105,6 +108,11 @@ class MangaDexVolumeResolver:
if not title or not title.strip(): if not title or not title.strip():
return None return None
key = title.strip().lower()
if key in self._id_cache:
return self._id_cache[key]
try:
resp = self._session.get( resp = self._session.get(
f"{self.base_url}/manga", f"{self.base_url}/manga",
params={"title": title, "limit": 5, params={"title": title, "limit": 5,
@@ -113,7 +121,12 @@ class MangaDexVolumeResolver:
timeout=self.request_timeout) timeout=self.request_timeout)
resp.raise_for_status() resp.raise_for_status()
results = resp.json().get("data") or [] results = resp.json().get("data") or []
except requests.RequestException:
# Don't cache transient failures — allow a retry next time.
return None
if not results: if not results:
self._id_cache[key] = None
return None return None
def score(entry) -> float: def score(entry) -> float:
@@ -130,7 +143,9 @@ class MangaDexVolumeResolver:
return best return best
results.sort(key=score, reverse=True) results.sort(key=score, reverse=True)
return results[0].get("id") manga_id = results[0].get("id")
self._id_cache[key] = manga_id
return manga_id
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
# Main function: retrieve and return volume / chapter data # Main function: retrieve and return volume / chapter data
+61
View File
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from ComicInfoBuilder import _pick_thumbnail_url from ComicInfoBuilder import _pick_thumbnail_url
from PerfWebPage import render_perf_page
# Cross-link tabs shown on every perf page in the manga container.
_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
_INDEX_HTML = """<!doctype html> _INDEX_HTML = """<!doctype html>
@@ -71,6 +75,8 @@ _INDEX_HTML = """<!doctype html>
<button id="batchSave" class="primary">Save dirty (0)</button> <button id="batchSave" class="primary">Save dirty (0)</button>
<button id="build">Build all (rescan)</button> <button id="build">Build all (rescan)</button>
<button id="move">Start move</button> <button id="move">Start move</button>
<button id="syncPersons">Sync persons</button>
<a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span> <span class="status" id="status"></span>
</div> </div>
@@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
btn.disabled = false; btn.disabled = false;
} }
}); });
document.getElementById("syncPersons").addEventListener("click", async () => {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
const btn = document.getElementById("syncPersons");
btn.disabled = true;
setStatus("Syncing persons… (running on the server)");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
const d = await r.json();
setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
+ d.not_found + " not found, " + d.conflicts + " conflicts");
} catch (err) {
setStatus("Person sync failed: " + err.message);
} finally {
btn.disabled = false;
}
});
for (const th of document.querySelectorAll("th.sortable")) { for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => { th.addEventListener("click", () => {
const col = th.dataset.col; const col = th.dataset.col;
@@ -357,6 +380,8 @@ load();
""" """
class MatchesWebApp: class MatchesWebApp:
""" """
Flask app exposing the MatchesCache. `mover` is required when you want Flask app exposing the MatchesCache. `mover` is required when you want
@@ -367,14 +392,22 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *, def __init__(self, cache: MatchesCache, *,
mover=None, mover=None,
person_updater=None,
person_trigger: str = "web",
perf_stats=None,
host: str = "0.0.0.0", host: str = "0.0.0.0",
port: int = 8080): port: int = 8080):
self._cache = cache self._cache = cache
self._mover = mover self._mover = mover
self._person_updater = person_updater
self._person_trigger = person_trigger
# perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
self._perf = perf_stats or {}
self._host = host self._host = host
self._port = port self._port = port
self._build_lock = threading.Lock() self._build_lock = threading.Lock()
self._move_lock = threading.Lock() self._move_lock = threading.Lock()
self._person_lock = threading.Lock()
self._app = Flask(__name__) self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None self._thread: "threading.Thread | None" = None
self._register_routes() self._register_routes()
@@ -498,3 +531,31 @@ class MatchesWebApp:
finally: finally:
self._move_lock.release() self._move_lock.release()
return jsonify({"results": results}) return jsonify({"results": results})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._person_updater is None:
return Response("no person updater configured", status=503)
if not self._person_lock.acquire(blocking=False):
return Response("person sync already running", status=409)
try:
report = self._person_updater.update_all_persons(
trigger=self._person_trigger,
perf=self._perf.get("person"))
except Exception as exc:
return Response(f"person sync failed: {exc}", status=500)
finally:
self._person_lock.release()
return jsonify(report)
# Perf pages: /perf (move) + /perf/<name> for the updaters.
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "move") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._perf.get(name)
return jsonify(stats.all() if stats is not None else {"runs": []})
+44 -49
View File
@@ -64,11 +64,10 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from KavitaClient import KavitaClient
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache, _IMAGE_EXTS from CoverCache import CoverCache, _IMAGE_EXTS
from PerfStats import PerfStats
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
@@ -305,28 +304,30 @@ class SuwayomiMover:
Expected layout: <root>/<Source>/<Title>/<Chapter N>/ Expected layout: <root>/<Source>/<Title>/<Chapter N>/
kavita_path : Root of the Kavita library. kavita_path : Root of the Kavita library.
Series sub-directories are created automatically. Series sub-directories are created automatically.
kavita_base_url : Kavita server URL — required only for person sync,
e.g. "http://192.168.2.2:5000".
kavita_api_key : Kavita API key — required only for person sync.
language : ComicInfo LanguageISO and SeriesSort language ("en"). language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests. request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack. delete_source : Remove the source chapter folder after successful pack.
cover_cache_dir : Directory for the persistent cover cache. None -> cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit. temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing. None
(default) disables profiling.
Note: Kavita person sync is no longer done here — it runs as a separate,
global, id-based updater on its own cron schedule (KavitaPersonUpdater).
The mover only touches MangaBaka / MangaDex / MAL / AniList.
""" """
def __init__(self, def __init__(self,
suwayomi_path, suwayomi_path,
kavita_path, kavita_path,
*, *,
kavita_base_url: "str | None" = None,
kavita_api_key: "str | None" = None,
language: str = "en", language: str = "en",
request_timeout: int = 30, request_timeout: int = 30,
delete_source: bool = True, delete_source: bool = True,
matches_cache: "MatchesCache | None" = None, matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1", api_base_url: str = "https://api.mangabaka.dev/v1",
cover_cache_dir=None): cover_cache_dir=None,
perf_stats: "PerfStats | None" = None):
self._src = Path(suwayomi_path) self._src = Path(suwayomi_path)
self._dst = Path(kavita_path) self._dst = Path(kavita_path)
self._language = language self._language = language
@@ -334,6 +335,7 @@ class SuwayomiMover:
self._delete_source = delete_source self._delete_source = delete_source
self._matches_cache = matches_cache self._matches_cache = matches_cache
self._api_base_url = api_base_url.rstrip("/") self._api_base_url = api_base_url.rstrip("/")
self._perf = perf_stats or PerfStats(None)
# Shared HTTP session and resolvers — reused across all series/chapters # Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips. # to maximise cache hits and minimise API round-trips.
@@ -352,16 +354,6 @@ class SuwayomiMover:
self._cover_cache = CoverCache( self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout) cover_cache_dir, session=session, request_timeout=request_timeout)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
kavita_client = KavitaClient(
kavita_base_url, kavita_api_key,
request_timeout=request_timeout)
self._person_updater = KavitaPersonUpdater(
kavita_client,
mal_resolver=self._mal,
al_resolver=self._al)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public API # Public API
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -376,6 +368,8 @@ class SuwayomiMover:
dict from _process_series_dir. dict from _process_series_dir.
""" """
results: dict = {} results: dict = {}
run = self._perf.begin_run()
try:
for source_dir in sorted(self._src.iterdir()): for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir(): if not source_dir.is_dir():
continue continue
@@ -384,7 +378,9 @@ class SuwayomiMover:
continue continue
title = manga_dir.name title = manga_dir.name
print(f"[SuwayomiMover] {title}") print(f"[SuwayomiMover] {title}")
results[title] = self._process_series_dir(manga_dir) results[title] = self._process_series_dir(manga_dir, run)
finally:
run.finish()
return results return results
def process_series(self, manga_title: str) -> dict: def process_series(self, manga_title: str) -> dict:
@@ -400,7 +396,11 @@ class SuwayomiMover:
continue continue
candidate = source_dir / manga_title candidate = source_dir / manga_title
if candidate.is_dir(): if candidate.is_dir():
return self._process_series_dir(candidate) run = self._perf.begin_run()
try:
return self._process_series_dir(candidate, run)
finally:
run.finish()
raise FileNotFoundError( raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}") f"No Suwayomi directory found for '{manga_title}' under {self._src}")
@@ -487,8 +487,9 @@ class SuwayomiMover:
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: series # Internal: series
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def _process_series_dir(self, manga_dir: Path) -> dict: def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
manga_title = manga_dir.name manga_title = manga_dir.name
series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
chapter_dirs = sorted( chapter_dirs = sorted(
(d for d in manga_dir.iterdir() if d.is_dir()), (d for d in manga_dir.iterdir() if d.is_dir()),
@@ -539,6 +540,7 @@ class SuwayomiMover:
md: "dict | None" = None md: "dict | None" = None
mangabaka_title = manga_title mangabaka_title = manga_title
try: try:
with series_rec.measure("fetch_metadata"):
md = builder.fetch_metadata() md = builder.fetch_metadata()
mangabaka_title = md.get("title") or manga_title mangabaka_title = md.get("title") or manga_title
except Exception as exc: except Exception as exc:
@@ -571,7 +573,7 @@ class SuwayomiMover:
chapter_results: list[dict] = [] chapter_results: list[dict] = []
for chapter_dir, _fields, chapter_num in pending: for chapter_dir, _fields, chapter_num in pending:
result = self._process_chapter( result = self._process_chapter(
builder, chapter_num, chapter_dir, dest_series) builder, chapter_num, chapter_dir, dest_series, series_rec)
chapter_results.append(result) chapter_results.append(result)
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}" status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
print(f" Chapter {chapter_num}: {status}") print(f" Chapter {chapter_num}: {status}")
@@ -582,25 +584,11 @@ class SuwayomiMover:
} }
_save_chapter_index(dest_series, chapter_index) _save_chapter_index(dest_series, chapter_index)
# Sync Kavita persons once per series. # Person sync no longer runs here — it has its own global,
# Both MAL and AniList IDs come from MangaBaka's source map; # id-based updater on a separate cron schedule (see
# AniList is used as fallback when MAL returns no characters/staff. # KavitaPersonUpdater.update_all_persons).
person_result: "dict | None" = None series_rec.finish()
if self._person_updater: return {"chapters": chapter_results}
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
or self._mal.find_mal_id(builder_title))
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
if mal_id or al_id:
try:
person_result = self._person_updater.update_for_manga(
mal_id, al_manga_id=al_id)
print(f" Persons: chars={person_result['characters'].get('updated')} "
f"staff={person_result['staff'].get('updated')}")
except Exception as exc:
person_result = {"error": str(exc)}
print(f" Persons: ERROR {exc}")
return {"chapters": chapter_results, "persons": person_result}
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: chapter # Internal: chapter
@@ -609,7 +597,8 @@ class SuwayomiMover:
builder: ComicInfoBuilder, builder: ComicInfoBuilder,
chapter_num: str, chapter_num: str,
chapter_dir: Path, chapter_dir: Path,
dest_series: Path) -> dict: dest_series: Path,
series_rec=None) -> dict:
""" """
Generates ComicInfo.xml for one chapter, packs it to CBZ, and Generates ComicInfo.xml for one chapter, packs it to CBZ, and
optionally removes the source folder. optionally removes the source folder.
@@ -619,6 +608,11 @@ class SuwayomiMover:
<Pages> element correctly points to the front cover). <Pages> element correctly points to the front cover).
""" """
cbz_path = dest_series / f"{chapter_dir.name}.cbz" cbz_path = dest_series / f"{chapter_dir.name}.cbz"
chap_rec = (series_rec or self._perf.begin_run().begin_item("")
).begin_item(chapter_num)
# add_pages_from_folder records its own sub-steps on this recorder.
builder.perf = chap_rec
ok = False
try: try:
builder.chapter = chapter_num builder.chapter = chapter_num
builder.add_pages_from_folder(chapter_dir, cover_filename="000") builder.add_pages_from_folder(chapter_dir, cover_filename="000")
@@ -626,32 +620,35 @@ class SuwayomiMover:
# by add_pages_from_folder, so it's effectively free. Used by # by add_pages_from_folder, so it's effectively free. Used by
# the chapter index in the Kavita destination folder. # the chapter index in the Kavita destination folder.
try: try:
with chap_rec.measure("volume"):
volume = builder._determine_volume() volume = builder._determine_volume()
except Exception: except Exception:
volume = None volume = None
with chap_rec.measure("save_xml"):
builder.save_xml(chapter_dir) builder.save_xml(chapter_dir)
with chap_rec.measure("pack_cbz"):
_pack_to_cbz(chapter_dir, cbz_path) _pack_to_cbz(chapter_dir, cbz_path)
if self._delete_source: if self._delete_source:
with chap_rec.measure("delete_source"):
shutil.rmtree(chapter_dir) shutil.rmtree(chapter_dir)
ok = True
return {"chapter": chapter_num, "cbz": str(cbz_path), return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": True, "volume": volume} "ok": True, "volume": volume}
except Exception as exc: except Exception as exc:
return {"chapter": chapter_num, "cbz": str(cbz_path), return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": False, "error": str(exc)} "ok": False, "error": str(exc)}
finally:
builder.perf = None
chap_rec.finish(ok=ok)
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Usage example # Usage example
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
if __name__ == "__main__": if __name__ == "__main__":
import os
# Local (no-Docker) smoke test. Adjust paths to your environment. # Local (no-Docker) smoke test. Adjust paths to your environment.
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
SUWAYOMI_PATH = r"M:\config\downloads\mangas" SUWAYOMI_PATH = r"M:\config\downloads\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test" KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
# matches.json lives next to this script during local testing. # matches.json lives next to this script during local testing.
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json" MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
@@ -660,8 +657,6 @@ if __name__ == "__main__":
mover = SuwayomiMover( mover = SuwayomiMover(
SUWAYOMI_PATH, SUWAYOMI_PATH,
KAVITA_PATH, KAVITA_PATH,
kavita_base_url=KAVITA_URL,
kavita_api_key=KAVITA_KEY,
delete_source=False, delete_source=False,
matches_cache=matches_cache, matches_cache=matches_cache,
) )