Performance and Person Updater Improvements #7

Merged
johannesbot merged 4 commits from time-measurement into master 2026-06-16 18:46:56 +02:00
16 changed files with 984 additions and 920 deletions
Showing only changes of commit 6ca1a245a3 - Show all commits
+6 -1
View File
@@ -11,13 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config
MANGA_WEB_PORT=8080
SETTLE_SECONDS=600
DELETE_SOURCE=true
# Periodic updaters (volume/cover + global person sync) run together on
# this cron. Sundays 10:00. Person updater also covers LN libraries.
UPDATER_ENABLED=true
UPDATER_SCHEDULE=0 19 * * 1,4
UPDATER_SCHEDULE=0 10 * * 0
COVER_CACHE_PATH=/config/covers
PERF_PATH=/config/perf_stats.json
VOLUME_PERF_PATH=/config/volume_perf_stats.json
PERSON_PERF_PATH=/config/person_perf_stats.json
# Light-novel container (kavita-lightnovel-metadata-fetcher)
HOST_LN_CONFIG_PATH=/path/to/ln-config
LN_WEB_PORT=8081
LN_LIBRARY_IDS=3,5
LN_UPDATER_ENABLED=true
+11 -6
View File
@@ -13,17 +13,18 @@ services:
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "/config/matches.json"
# Volume/cover back-fill updater
# Periodic updaters (volume/cover back-fill + global person sync) run
# together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
UPDATER_LOG: "/config/volume_updater.log"
# Persistent cover cache (empty = temp dir, deleted on container stop)
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
# Per-step move timing stats (viewable at /perf); empty disables it
# Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
# Timezone for the cron schedule — without this 10:00 means 10:00 UTC
TZ: "${TZ:-Europe/Berlin}"
ports:
- "${MANGA_WEB_PORT:-8080}:8080"
@@ -45,6 +46,10 @@ services:
LIBRARY_IDS: "${LN_LIBRARY_IDS}"
LANGUAGE: "${LANGUAGE:-en}"
MATCH_PATH: "/config/matches.json"
# Global person sync on cron (same default cadence as the manga side)
UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}"
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
TZ: "${TZ:-Europe/Berlin}"
ports:
- "${LN_WEB_PORT:-8081}:8080"
+33
View File
@@ -27,6 +27,12 @@ Environment variables
MATCH_PATH default /config/matches.json
WEB_PORT default 8080
WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (run the person updater on cron)
UPDATER_SCHEDULE cron expression for the person updater,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
"""
from __future__ import annotations
@@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln"))
from MatchesCache import MatchesCache # noqa: E402
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def _env_str(name: str, default: "str | None" = None,
@@ -98,6 +113,10 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
library_ids = _env_int_list("LIBRARY_IDS")
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] kavita url = {kavita_url}", flush=True)
print(f"[main] language = {language}", flush=True)
@@ -107,6 +126,7 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True)
cache = MatchesCache(match_path)
person_perf = PerfStats(person_perf_path)
orchestrator = LightNovelOrchestrator(
kavita_url=kavita_url,
kavita_api_key=kavita_api_key,
@@ -118,9 +138,22 @@ def main() -> int:
app = MatchesWebApp(
cache, orchestrator=orchestrator,
default_library_ids=library_ids,
person_perf=person_perf,
host=web_host, port=web_port,
)
app.start()
if updater_enabled:
try:
CronRunner(
updater_schedule,
lambda: orchestrator.sync_persons(trigger="cron",
perf=person_perf),
name="person-updater").start()
except ValueError as exc:
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"scheduled person sync DISABLED", flush=True)
app.wait()
return 0
+54 -24
View File
@@ -28,15 +28,19 @@ Environment variables
MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (volume/cover back-fill cron)
UPDATER_SCHEDULE cron expression for the updater scans,
default "0 19 * * 1,4" = 19:00 every Mon + Thu
UPDATER_ENABLED default true (run volume/cover + person updaters on cron)
UPDATER_SCHEDULE cron expression for the periodic updaters,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log
COVER_CACHE_PATH directory for the persistent cover cache;
empty (default) = temporary cache, deleted on exit
PERF_PATH JSON file for per-step move timing stats;
empty disables profiling. Default /config/perf_stats.json
PERF_PATH JSON file for per-step move timing stats.
Default /config/perf_stats.json (empty disables it)
VOLUME_PERF_PATH JSON file for volume/cover updater timing.
Default /config/volume_perf_stats.json
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
"""
from __future__ import annotations
@@ -62,7 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
from MatchesCache import MatchesCache # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
from KavitaClient import KavitaClient # noqa: E402
from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_str(name: str, default: "str | None" = None,
@@ -106,10 +113,14 @@ def main() -> int:
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 1,4")
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
volume_perf_path = _env_str("VOLUME_PERF_PATH",
"/config/volume_perf_stats.json") or None
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True)
@@ -121,43 +132,62 @@ def main() -> int:
print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path)
perf_stats = PerfStats(perf_path)
perf_move = PerfStats(perf_path)
perf_volume = PerfStats(volume_perf_path)
perf_person = PerfStats(person_perf_path)
mover = SuwayomiMover(
suwayomi_path, kavita_path,
kavita_base_url=kavita_url,
kavita_api_key=kavita_api_key,
language=language,
request_timeout=request_timeout,
delete_source=delete_source,
matches_cache=matches_cache,
cover_cache_dir=cover_cache_path,
perf_stats=perf_stats,
perf_stats=perf_move,
)
# Standalone, global, id-based person updater (manga + LN libraries).
person_updater = None
if kavita_api_key:
kavita_client = KavitaClient(kavita_url, kavita_api_key,
request_timeout=request_timeout)
person_updater = KavitaPersonUpdater(kavita_client)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
web_app = MatchesWebApp(matches_cache, mover=mover, perf_stats=perf_stats,
host=web_host, port=web_port)
web_app = MatchesWebApp(
matches_cache, mover=mover,
person_updater=person_updater, person_trigger="web",
perf_stats={"move": perf_move, "volume": perf_volume,
"person": perf_person},
host=web_host, port=web_port)
web_app.start()
if updater_enabled:
updater = KavitaVolumeCoverUpdater(
kavita_path,
matches_cache=matches_cache,
language=language,
request_timeout=request_timeout,
log_path=updater_log,
cover_cache_dir=cover_cache_path,
perf_stats=perf_volume,
)
def _scheduled_job():
updater.update_all()
if person_updater is not None:
person_updater.update_all_persons(trigger="cron",
perf=perf_person)
try:
updater = KavitaVolumeCoverUpdater(
kavita_path,
matches_cache=matches_cache,
language=language,
request_timeout=request_timeout,
log_path=updater_log,
schedule=updater_schedule,
cover_cache_dir=cover_cache_path,
)
updater.start()
CronRunner(updater_schedule, _scheduled_job,
name="updaters").start()
except ValueError as exc:
# Invalid cron expression — keep the service up, just without
# the updater, and make the config error obvious in the logs.
# the scheduled updaters, and surface the config error.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"volume/cover updater DISABLED", flush=True)
f"scheduled updaters DISABLED", flush=True)
# watcher.start()
# watcher.wait() # blocks until stop() is called via a signal
+87
View File
@@ -0,0 +1,87 @@
"""
cron_runner.py
==============
Runs a single callable on a cron schedule on a background thread.
Decouples *what* runs from *when*: both the manga container (volume/cover
updater + person updater) and the LN container (person updater) schedule
their work through this one helper, using a shared ``CronSchedule`` for the
``next_after`` arithmetic.
Usage::
runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00
runner.start()
...
runner.stop()
When the schedule string is invalid, the CronSchedule constructor raises
ValueError — the caller decides whether to disable the runner or fall back.
The schedule is evaluated in local time (set TZ inside the container).
"""
from __future__ import annotations
import threading
from datetime import datetime
from CronSchedule import CronSchedule
def _now() -> str:
return datetime.now().isoformat(timespec="seconds")
class CronRunner:
"""
Fires ``job()`` whenever the cron ``schedule`` elapses.
Parameters
----------
schedule : 5-field cron expression (see CronSchedule).
job : Zero-arg callable invoked on each scheduled tick. Exceptions
are caught and logged so a failing run does not kill the loop.
name : Thread name (for logs).
"""
def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
self._cron = CronSchedule(schedule)
self._job = job
self._name = name
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
def start(self) -> None:
"""Starts the scheduling thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name=self._name, daemon=True)
self._thread.start()
print(f"[{_now()}] [{self._name}] scheduled on "
f"cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Signals the loop to stop (a job already running finishes first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [{self._name}] next run: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
self._job()
except Exception as exc:
print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
+25
View File
@@ -204,6 +204,31 @@ class KavitaClient:
r.raise_for_status()
return r.json() or []
def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
"""
Returns every PersonDto in the instance.
Pages through POST /api/Person/all (the browse endpoint) with an
empty filter until an empty page is returned — same paging pattern
as list_series_in_library.
"""
results: list[dict] = []
page = 1
while True:
r = self._session.post(
f"{self._base}/api/Person/all",
params={"PageNumber": page, "PageSize": page_size},
json={}, timeout=self._timeout)
r.raise_for_status()
chunk = r.json() or []
if not chunk:
break
results.extend(chunk)
if len(chunk) < page_size:
break
page += 1
return results
def update_person(self, payload: dict) -> None:
"""Writes a person record (malId, aniListId, description, …)."""
r = self._session.post(f"{self._base}/api/Person/update",
+140 -338
View File
@@ -2,407 +2,220 @@
kavita_person_updater.py
========================
Synchronises Kavita person / character records with MyAnimeList data.
Synchronises Kavita character person-records with MyAnimeList / AniList data.
For every character and staff member that MAL knows about for a given manga
the updater:
1. Searches Kavita for a matching Person record (by name similarity /
alias match, configurable threshold).
2. Sets the MAL ID on the Kavita person if it is not yet linked.
3. Uploads the MAL profile image when the cover is not locked and has
not been set in a previous sync run.
4. Populates the description field when Kavita has none and MAL provides
an 'about' text (requires an extra Jikan request per character; only
performed when update_descriptions=True).
Global, id-based mode
---------------------
Kavita person-records are created with a disambiguated name carrying the
tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
ComicInfo <Characters>; light novels: written by the metadata builder).
``update_all_persons`` walks **every** person in the Kavita instance, reads
that id from the name, looks the character up on MAL / AniList by id, and
writes back:
* the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
* a description (when the record has none),
* the profile image (when not locked and not already set).
Persons whose name carries no id (authors / staff, which are not
disambiguated) are skipped. A record already linked to a *different*
tracker id than its name says is reported as a conflict and left untouched.
This mode is format-independent (it only does id lookups, never title
searches) so a single pass covers both the manga and light-novel libraries.
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
Tested against Kavita 0.9.0.2.
"""
from __future__ import annotations
import datetime
import requests
from KavitaClient import KavitaClient
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id
from PerfStats import PerfStats
from TextUtils import paragraphs_to_html, parse_person_tracker_id
class KavitaPersonUpdater:
"""
Syncs Kavita Person records with MyAnimeList data.
Syncs Kavita character person-records with MAL / AniList data, keyed by
the tracker id embedded in each person's name.
Parameters
----------
client : Shared KavitaClient (session, auth, cover uploads)
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
al_resolver : Shared AniListResolver singleton (created automatically if omitted)
min_name_score : Minimum difflib similarity ratio (01) required to accept a
Kavita person as a match for a MAL name. Default 0.80.
client : Shared KavitaClient (session, auth, cover uploads).
mal_resolver : Shared MALResolver singleton (created if omitted).
al_resolver : Shared AniListResolver singleton (created if omitted).
"""
def __init__(self, client: KavitaClient, *,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None,
min_name_score: float = 0.80):
al_resolver: "AniListResolver | None" = None):
self._client = client
self._min_score = min_name_score
self._mal = mal_resolver or MALResolver()
self._al = al_resolver or AniListResolver()
# Cache: normalised name -> list of PersonDto dicts (best matches first)
self._person_search_cache: dict[str, list[dict]] = {}
# ------------------------------------------------------------------
# Public: combined update
# Public: global person sync
# ------------------------------------------------------------------
def update_for_manga(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
def update_all_persons(self, *,
trigger: str = "cron",
perf: "PerfStats | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Runs a full update pass for both characters and staff of the manga.
MAL is tried first; AniList is used as fallback when MAL returns nothing.
Walks every Kavita person, syncing the ones whose name carries a
tracker character id.
Returns
-------
{
"characters": {"updated": n, "skipped": n, "not_found": n},
"staff": {"updated": n, "skipped": n, "not_found": n},
}
Parameters
----------
trigger : Source that started this run ("cron" | "web" | "ln") —
recorded in the perf-stats run meta.
perf : Optional PerfStats for per-person step timing.
Returns {"trigger", "updated", "skipped", "not_found",
"conflicts", "errors"}.
"""
return {
"characters": self.update_characters(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
"staff": self.update_staff(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
}
perf = perf or PerfStats(None)
run = perf.begin_run(meta={"trigger": trigger})
result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
"not_found": 0, "conflicts": 0, "errors": []}
# ------------------------------------------------------------------
# Public: character update
# ------------------------------------------------------------------
def update_characters(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList characters for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
try:
persons = self._client.list_all_persons()
except requests.RequestException as exc:
result["errors"].append(f"list persons failed: {exc}")
run.finish()
return result
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_characters_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "character", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: staff update
# ------------------------------------------------------------------
def update_staff(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList staff for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_staff_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "staff", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears the Kavita person search cache."""
self._person_search_cache.clear()
# ------------------------------------------------------------------
# Internal: main sync loop
# ------------------------------------------------------------------
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
update_covers: bool,
update_descriptions: bool) -> dict:
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
"errors": []}
for entry in entries:
name = (entry.get("name") or "").strip()
raw_name = (entry.get("raw_name") or "").strip()
if not name and not raw_name:
for person in persons:
name = (person.get("name") or "").strip()
parsed = parse_person_tracker_id(name)
if not parsed:
result["skipped"] += 1 # author/staff or un-tagged
continue
if kind == "character":
# Characters are stored under their disambiguated name
# ("Rem (MAL 118737)") — see person_name_with_id. The
# series metadata write creates the person under exactly
# this name, so only that form is searched.
search_names = [person_name_with_id(
name, mal_id=entry.get("mal_id"),
al_id=entry.get("al_id"))]
else:
# Staff: cleaned (XML-safe) name first; if Kavita stores
# the legacy comma form, retry with the raw MAL name.
search_names = [name]
if raw_name and raw_name != name:
search_names.append(raw_name)
matches: list[dict] = []
for search_name in search_names:
if not search_name:
continue
matches = self._find_kavita_person(search_name)
if matches:
break
if not matches:
result["not_found"] += 1
continue
changed = self._apply_mal_data(
matches[0], entry, kind, resolver,
update_cover=update_covers,
update_desc=update_descriptions,
errors=result["errors"])
result["updated" if changed else "skipped"] += 1
source, tracker_id = parsed
item = run.begin_item(name)
ok = True
try:
category = self._apply_to_person(
person, source, tracker_id, item,
update_cover=update_covers,
update_desc=update_descriptions,
errors=result["errors"])
result[category] += 1
ok = category != "conflicts"
except Exception as exc:
result["errors"].append(f"{name}: {exc}")
ok = False
finally:
item.finish(ok=ok)
run.finish()
print(f"[persons] trigger={trigger} updated={result['updated']} "
f"skipped={result['skipped']} not_found={result['not_found']} "
f"conflicts={result['conflicts']} errors={len(result['errors'])}",
flush=True)
return result
# ------------------------------------------------------------------
# Internal: Kavita person search
# Internal: apply tracker data to one person
# ------------------------------------------------------------------
def _find_kavita_person(self, name: str) -> list[dict]:
def _apply_to_person(self, person: dict, source: str, tracker_id: int,
item, *, update_cover: bool, update_desc: bool,
errors: list) -> str:
"""
Searches Kavita for persons matching `name`.
Checks both the main name and any stored aliases.
Returns persons sorted by similarity, filtered by min_name_score.
Results are cached per (normalised) query name.
Applies MAL/AniList character data to one Kavita person.
Returns the result category: "updated" | "skipped" | "not_found"
| "conflicts".
"""
key = name.lower().strip()
if key in self._person_search_cache:
return self._person_search_cache[key]
try:
persons = self._client.search_persons(name)
except requests.RequestException:
self._person_search_cache[key] = []
return []
scored = []
for p in persons:
candidates = [p.get("name")] + list(p.get("aliases") or [])
scored.append((best_similarity(key, candidates), p))
scored.sort(key=lambda pair: pair[0], reverse=True)
filtered = [p for score, p in scored if score >= self._min_score]
self._person_search_cache[key] = filtered
return filtered
# ------------------------------------------------------------------
# Internal: apply MAL data to a single Kavita person
# ------------------------------------------------------------------
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
resolver, *,
update_cover: bool, update_desc: bool,
errors: "list | None" = None) -> bool:
"""
Applies tracker data (MAL or AniList) to one Kavita person record.
Fields updated
--------------
- malId : set when the entry carries a MAL ID and it differs
- aniListId : set when the entry carries an AniList ID and it differs
- description: set when empty and the tracker provides a description
- cover image: uploaded when not locked and no prior sync cover exists
Returns True if any change was made. Failures are appended to the
`errors` list (if provided) instead of being silently swallowed.
"""
person_id: "int | None" = person.get("id")
person_id = person.get("id")
if not person_id:
return False
return "skipped"
person_name = person.get("name") or ""
resolver = self._mal if source == "mal" else self._al
id_field = "malId" if source == "mal" else "aniListId"
current = person.get(id_field) or 0
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
mal_id: "int | None" = mal_entry.get("mal_id")
al_id: "int | None" = mal_entry.get("al_id")
entity_id = mal_id or al_id # used for resolver detail calls
# The name is authoritative; a record linked to a different id is a
# data conflict — never overwrite it.
if current and current != tracker_id:
errors.append(
f"conflict: '{person.get('name')}' (#{person_id}) has "
f"{id_field}={current} but name says {tracker_id} — skipped")
return "conflicts"
current_mal_id: int = person.get("malId") or 0
current_al_id: int = person.get("aniListId") or 0
with item.measure("detail_fetch"):
details = resolver.get_character_details(tracker_id)
if not details:
return "not_found"
# Collision guard: the Kavita person is already linked to a
# *different* tracker entity — same display name, different
# character/person. Never overwrite; first writer wins.
if ((mal_id and current_mal_id and current_mal_id != mal_id)
or (al_id and current_al_id and current_al_id != al_id)):
if errors is not None:
errors.append(
f"conflict: '{person_name}' (#{person_id}) is linked to "
f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} "
f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} "
f"— skipped")
return False
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
needs_al_id = bool(al_id and current_al_id != al_id)
# ------ Lazy description fetch -----------------------------------
description: "str | None" = None
need_id = not current # write id when still missing
description = None
if update_desc and not (person.get("description") or "").strip():
if entity_id:
if kind == "character":
details = resolver.get_character_details(entity_id)
if details:
description = _build_character_description(details) or None
else:
details = resolver.get_person_details(entity_id)
if details:
description = _build_person_description(details) or None
description = _build_character_description(details) or None
need_desc = bool(description)
needs_desc = bool(description)
# ------ Metadata update ------------------------------------------
changed = False
if needs_mal_id or needs_al_id or needs_desc:
payload: dict = {
if need_id or need_desc:
payload = {
"id": person_id,
"name": person_name,
# MUST stay a boolean — the cover image itself is uploaded
# separately via POST /api/Upload/person (below). Putting a
# URL here makes Kavita reject the whole payload with HTTP 400.
"name": person.get("name") or "",
# MUST stay a boolean — the cover is uploaded separately.
"coverImageLocked": bool(person.get("coverImageLocked", False)),
"aliases": person.get("aliases") or [],
"description": description or person.get("description"),
"malId": mal_id if needs_mal_id else (current_mal_id or None),
"aniListId": al_id if needs_al_id else (current_al_id or None),
"malId": tracker_id if source == "mal"
else (person.get("malId") or None),
"aniListId": tracker_id if source == "al"
else (person.get("aniListId") or None),
}
try:
self._client.update_person(payload)
with item.measure("person_update"):
self._client.update_person(payload)
changed = True
except requests.RequestException as e:
if errors is not None:
errors.append(
f"Person/update failed for #{person_id} "
f"'{person_name}': {e}")
except requests.RequestException as exc:
errors.append(f"update failed #{person_id} "
f"'{person.get('name')}': {exc}")
# ------ Cover image upload ----------------------------------------
# Upload whenever:
# - caller requested cover updates
# - cover is NOT locked (user did not manually pin it)
# - we have not already uploaded this exact tracker entity's image
# (i.e. the tracked ID differs OR there is no cover yet).
# Cover: upload when not locked and not already set for this id.
if update_cover and not person.get("coverImageLocked"):
image_url = mal_entry.get("image_url")
already_uploaded = (
entity_id is not None
and (current_mal_id == mal_id or current_al_id == al_id)
and bool(person.get("coverImage"))
)
if image_url and not already_uploaded:
image_url = details.get("image_url")
already = bool(current) and bool(person.get("coverImage"))
if image_url and not already:
try:
self._client.upload_person_cover(person_id, image_url)
with item.measure("cover_upload"):
self._client.upload_person_cover(person_id, image_url)
changed = True
except requests.RequestException as e:
if errors is not None:
errors.append(
f"cover upload failed for #{person_id} "
f"'{person_name}' ({image_url}): {e}")
except requests.RequestException as exc:
errors.append(f"cover upload failed #{person_id} "
f"'{person.get('name')}': {exc}")
return changed
return "updated" if changed else "skipped"
# --------------------------------------------------------------------------
# Module helpers: description builders
# Module helper: character description builder
# --------------------------------------------------------------------------
def _format_birthday(birthday: str) -> str:
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
if not birthday:
return ""
try:
dt = datetime.date.fromisoformat(birthday.split("T")[0])
return f"{dt.day} {dt.strftime('%B %Y')}"
except (ValueError, AttributeError):
return ""
def _build_character_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL character.
Builds a Kavita-safe HTML description for a MAL / AniList character.
Top line: "Favorites: N" as a link to the character's MAL page.
Top line: "Favorites: N" linked to the character page (when available).
Remainder: the character's `about` text converted to HTML paragraphs.
"""
parts: list[str] = []
url = details.get("url") or ""
favorites = details.get("favorites")
if url and favorites is not None:
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
about = (details.get("about") or "").strip()
if about:
parts.append(paragraphs_to_html(about))
return "<br>".join(parts)
def _build_person_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
Renders a summary table (given name, family name, birthday, website,
member favorites) followed by the `about` biography as HTML paragraphs.
"""
_TD = 'style="padding-right:1.5em"'
rows: list[str] = []
given = (details.get("given_name") or "").strip()
family = (details.get("family_name") or "").strip()
birthday = details.get("birthday") or ""
favorites = details.get("favorites")
website = (details.get("website_url") or "").strip()
url = (details.get("url") or "").strip()
if given:
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
if family:
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
bday_str = _format_birthday(birthday)
if bday_str:
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
if website:
rows.append(
f'<tr><td {_TD}>Website</td>'
f'<td><a href="{website}">{website}</a></td></tr>'
)
if favorites is not None:
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
else f"{favorites:,}")
rows.append(
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
parts: list[str] = []
if rows:
parts.append(f'<table>{"".join(rows)}</table>')
parts.append(f'<p><a href="{url}" target="_blank">'
f'Favorites: {favorites:,}</a></p>')
about = (details.get("about") or "").strip()
if about:
parts.append(paragraphs_to_html(about))
@@ -418,18 +231,7 @@ if __name__ == "__main__":
client = KavitaClient(os.environ["KAVITA_URL"],
os.environ["KAVITA_API_KEY"])
updater = KavitaPersonUpdater(client)
mal = MALResolver()
mal_id = mal.find_mal_id("よふかしのうた")
print("MAL ID:", mal_id)
if mal_id:
result = updater.update_for_manga(mal_id)
print("Characters:", {k: v for k, v in result["characters"].items()
if k != "errors"})
print("Staff :", {k: v for k, v in result["staff"].items()
if k != "errors"})
# Surface any non-fatal upload / API errors for debugging
for section in ("characters", "staff"):
for err in result[section].get("errors", []):
print(f"[{section}] {err}")
report = updater.update_all_persons(trigger="cron")
print(report)
for err in report["errors"]:
print(" ", err)
+254
View File
@@ -0,0 +1,254 @@
"""
perf_stats.py
=============
Generic run/step performance profiler with JSON persistence, shared by the
move pipeline and the periodic updaters (volume/cover, persons).
Each run is a tree of *items* (e.g. series -> chapter, or one person) and
every item carries named *step* timings. A run also carries free-form
``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
updater).
Data model (one entry per run, newest first)::
{
"runs": [
{
"runId": "",
"startedAt": 1700000000,
"finishedAt": 1700000123,
"totalSeconds": 123.4,
"meta": {"trigger": "cron"},
"itemCount": 2, # top-level items
"leafCount": 31, # items without children
"stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...},
"items": [
{"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
"steps": {"fetch_metadata": 1.2},
"items": [
{"label": "1", "totalSeconds": 11.5, "ok": true,
"steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
]}
]
}
]
}
Usage::
perf = PerfStats(path) # path=None -> disabled
run = perf.begin_run(meta={"trigger": "cron"})
item = run.begin_item("Call of the Night")
with item.measure("fetch_metadata"):
...
chap = item.begin_item("1")
with chap.measure("pack_cbz"):
...
chap.finish()
item.finish() # flushes the run to disk
run.finish()
When ``path`` is None every recorder is a no-op and nothing is written, so
the profiler can be left permanently wired in at negligible cost. The run
is flushed after every top-level item finishes, so a long run is observable
live and survives a crash mid-run.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ItemRecorder(_StepTimer):
"""
One node in a run's item tree. Has its own step timings and may contain
nested child items (e.g. a series item containing chapter items).
"""
def __init__(self, run: "RunRecorder", label: str, *,
parent: "ItemRecorder | None" = None,
enabled: bool = True):
super().__init__(enabled)
self._run = run
self._label = label
self._parent = parent
self._children: list[dict] = []
def begin_item(self, label: str) -> "ItemRecorder":
return ItemRecorder(self._run, label, parent=self,
enabled=self._enabled)
def finish(self, *, ok: bool = True) -> None:
if not self._enabled:
return
node = {
"label": self._label,
"totalSeconds": self.elapsed(),
"ok": ok,
"steps": self.steps,
"items": self._children,
}
if self._parent is not None:
self._parent._children.append(node)
else:
# Top-level item: attach to the run and persist progress.
self._run._items.append(node)
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full run."""
def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._meta = meta or {}
self._items: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
self._run_id = uuid.uuid4().hex
def begin_item(self, label: str) -> ItemRecorder:
return ItemRecorder(self, label, parent=None, enabled=self._enabled)
def _snapshot(self) -> dict:
step_totals: dict[str, float] = {}
leaf_count = 0
def walk(node: dict) -> None:
nonlocal leaf_count
for step, secs in node["steps"].items():
step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
if node["items"]:
for child in node["items"]:
walk(child)
else:
leaf_count += 1
for item in self._items:
walk(item)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"meta": self._meta,
"itemCount": len(self._items),
"leafCount": leaf_count,
"stepTotals": step_totals,
"items": self._items,
}
def flush(self) -> "dict | None":
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> "dict | None":
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
return RunRecorder(self, meta=meta, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every item.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+160
View File
@@ -0,0 +1,160 @@
"""
perf_web_page.py
================
Shared HTML page for browsing PerfStats output, used by both container web
UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads
``/api/perf/<name>`` and renders each run's step totals plus the nested item
tree (series -> chapter, or one person, …) and the run trigger from meta.
``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
available perf datasets in that container.
"""
from __future__ import annotations
_PERF_PAGE = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>__PERF_NAME__ performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.tabs { margin-bottom:1rem; }
.tabs a { margin-right:1rem; }
.tabs a.active { font-weight:bold; text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.2rem 0 .2rem 1rem; }
summary { cursor:pointer; padding:.2rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
<div class="tabs">__PERF_TABS__</div>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
const PERF_NAME = "__PERF_NAME__";
let runs = [];
for (const a of document.querySelectorAll(".tabs a")) {
if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
}
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function esc(s) {
return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
}
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + esc(name) + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
// Renders one item node (and its children) as a nested <details> block.
function itemNode(it) {
const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "";
const head = "<summary><b>" + esc(it.label) + "</b>"
+ (it.ok === false ? " <span class=err>(failed)</span>" : "")
+ " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
const body = kids.map(itemNode).join("");
return "<details>" + head + body + "</details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
let html = "<h2>Steps (summed over all items)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Detail</h2>";
const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")" + trig;
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf/" + PERF_NAME);
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
"""
Returns the perf page HTML for dataset ``name``.
tabs : list of (label, dataset_name) for the cross-link bar.
"""
tab_html = " ".join(
f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
return (_PERF_PAGE
.replace("__PERF_TABS__", tab_html)
.replace("__PERF_NAME__", name))
+27
View File
@@ -70,3 +70,30 @@ def person_name_with_id(name: str, *,
if al_id:
return f"{name} (AL {al_id})"
return name
# Matches the suffix produced by person_name_with_id at the end of a name.
_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
"""
Inverse of person_name_with_id: extracts the tracker id from a
disambiguated Kavita person name.
"Rem (MAL 118737)" -> ("mal", 118737)
"Subaru (AL 88311)" -> ("al", 88311)
"Kotoyama" -> None (no id suffix — e.g. an author/staff record)
Returns ("mal" | "al", id) or None.
"""
if not name:
return None
m = _TRACKER_ID_RE.search(name)
if not m:
return None
source = "mal" if m.group(1).upper() == "MAL" else "al"
try:
return source, int(m.group(2))
except ValueError:
return None
+14 -9
View File
@@ -192,14 +192,9 @@ class LightNovelOrchestrator:
except Exception as exc:
return {"ok": False, "error": f"series update failed: {exc}"}
# Persons
try:
person_report = self._person_updater.update_for_manga(
built.get("malId"),
al_manga_id=built.get("anilistId"),
)
except Exception as exc:
person_report = {"error": str(exc)}
# Person sync no longer runs per series — it has its own global,
# id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
# on a separate cron schedule.
# Relationships + collection
try:
@@ -221,10 +216,20 @@ class LightNovelOrchestrator:
"title": cached_title,
"mangabakaId": built.get("mangabakaId"),
"series": series_report,
"persons": person_report,
"relationships": relation_report,
}
# ------------------------------------------------------------------
# Person sync (global, id-based — independent of series updates)
# ------------------------------------------------------------------
def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
"""
Runs the global, id-based person updater over every Kavita person.
Covers both manga and light-novel libraries in one pass.
"""
return self._person_updater.update_all_persons(
trigger=trigger, perf=perf)
def update_all(self, library_ids: "list[int] | None") -> dict:
"""Updates every cached series in the given libraries."""
if library_ids is None:
+51
View File
@@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
from LightNovelMetadataBuilder import pick_thumbnail_url
from PerfWebPage import render_perf_page
# Only the person dataset exists in the LN container.
_PERF_TABS = [("persons", "person")]
def _int_list(values) -> list[int]:
@@ -97,7 +101,9 @@ _INDEX_HTML = r"""<!doctype html>
<button id="reload">Reload</button>
<button id="build">Match all in libraries</button>
<button id="updateAll" class="success">Update all in libraries</button>
<button id="syncPersons">Sync persons</button>
<button id="batchSave" class="primary">Save dirty (0)</button>
<a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span>
</div>
@@ -497,12 +503,25 @@ async function startUpdateAll() {
}
}
async function startSyncPersons() {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
setStatus("Person sync started");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Person sync failed: " + err.message);
}
}
document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("libraries").addEventListener("change", applyFilter);
document.getElementById("reload").addEventListener("click", load);
document.getElementById("batchSave").addEventListener("click", batchSave);
document.getElementById("build").addEventListener("click", startBuild);
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => {
const col = th.dataset.col;
@@ -581,11 +600,13 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *,
orchestrator=None,
default_library_ids: "list[int] | None" = None,
person_perf=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._orchestrator = orchestrator
self._defaults = list(default_library_ids or [])
self._person_perf = person_perf
self._host = host
self._port = port
self._job = _JobState()
@@ -757,8 +778,38 @@ class MatchesWebApp:
return Response("a job is already running", status=409)
return jsonify({"started": label})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
def task(job: _JobState):
report = self._orchestrator.sync_persons(
trigger="ln", perf=self._person_perf)
job.append(f"updated={report['updated']} "
f"skipped={report['skipped']} "
f"not_found={report['not_found']} "
f"conflicts={report['conflicts']}")
for err in report.get("errors", []):
job.append(f" {err}")
if not self._job.start("person sync", task):
return Response("a job is already running", status=409)
return jsonify({"started": "person sync"})
@app.get("/api/status")
def api_status():
snap = self._job.snapshot()
snap["defaults"] = self._defaults
return jsonify(snap)
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "person") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._person_perf if name == "person" else None
return jsonify(stats.all() if stats is not None else {"runs": []})
+55 -91
View File
@@ -45,7 +45,6 @@ from __future__ import annotations
import io
import sys
import threading
import xml.etree.ElementTree as ET
import zipfile
from datetime import datetime
@@ -67,7 +66,7 @@ from MatchesCache import MatchesCache
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule
from PerfStats import PerfStats
from CoverCache import CoverCache, _IMAGE_EXTS
try:
@@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater:
request_timeout : HTTP timeout in seconds.
log_path : File that receives one line per updated chapter.
Default: <kavita_path>/volume_updater.log
schedule : Cron expression (5 fields) defining when scans run,
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time — set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing.
Scheduling lives outside this class (see CronRunner); call update_all()
on whatever cadence you like.
"""
def __init__(self,
@@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater:
request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None,
schedule: str = "0 19 * * 1,4",
cover_cache_dir=None):
cover_cache_dir=None,
perf_stats: "PerfStats | None" = None):
self._dst = Path(kavita_path)
self._matches_cache = matches_cache
self._language = language
@@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater:
self._api_base_url = api_base_url.rstrip("/")
self._log_path = (Path(log_path) if log_path
else self._dst / "volume_updater.log")
self._cron = CronSchedule(schedule)
self._perf = perf_stats or PerfStats(None)
session = requests.Session()
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater:
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
# ------------------------------------------------------------------
# Cron API (mirrors SuwayomiFolderWatcher)
# ------------------------------------------------------------------
def start(self) -> None:
"""Starts the periodic scan thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
self._thread.start()
print(f"[{_now()}] [updater] scanning {self._dst} "
f"on cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Stops the scan thread (current scan finishes its series first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [updater] next scheduled scan: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
summary = self.update_all()
print(f"[{_now()}] [updater] scan done: "
f"{summary['series_updated']} series / "
f"{summary['chapters_updated']} chapters updated",
flush=True)
except Exception as exc:
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
# ------------------------------------------------------------------
# Public scan API
# ------------------------------------------------------------------
@@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater:
self._vol_resolver.clear_cache()
self._works_resolver.clear_cache()
for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
if not series_dir.is_dir():
continue
summary["series_scanned"] += 1
try:
updated = self.update_series(series_dir)
except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue
if updated:
summary["series_updated"] += 1
summary["chapters_updated"] += updated
run = self._perf.begin_run()
try:
for series_dir in sorted(self._dst.iterdir()):
if not series_dir.is_dir():
continue
summary["series_scanned"] += 1
try:
updated = self.update_series(series_dir, run)
except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue
if updated:
summary["series_updated"] += 1
summary["chapters_updated"] += updated
finally:
run.finish()
return summary
def update_series(self, series_dir: Path) -> int:
def update_series(self, series_dir: Path, run=None) -> int:
"""
Updates one series folder. Returns the number of updated chapters.
@@ -300,20 +256,24 @@ class KavitaVolumeCoverUpdater:
md = builder.fetch_metadata()
series_id = str(md.get("id") or "")
series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
# Resolve volumes for all null-volume chapters first (API only).
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num
try:
volume = builder._determine_volume()
except Exception:
volume = None
if not volume:
continue
updates[num] = {"volume": volume,
"cover": self._fetch_cover(series_id, volume)}
with series_rec.measure("resolve_volumes"):
for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num
try:
volume = builder._determine_volume()
except Exception:
volume = None
if not volume:
continue
updates[num] = {"volume": volume,
"cover": self._fetch_cover(series_id, volume)}
if not updates:
series_rec.finish(ok=True)
return 0
first = min(chapters, key=_chapter_sort_value)
@@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater:
continue
# The first chapter gets a full metadata rebuild (Kavita reads
# series metadata from it); other chapters only a volume edit.
ok, cover_swapped = self._apply_update(
cbz, builder, num,
volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first))
chap_rec = series_rec.begin_item(num)
with chap_rec.measure("archive_rewrite"):
ok, cover_swapped = self._apply_update(
cbz, builder, num,
volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first))
chap_rec.finish(ok=ok)
if not ok:
continue
entry["volume"] = _normalise_volume_value(up["volume"])
@@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater:
first_entry = chapters.get(first) or {}
cbz = series_dir / (first_entry.get("archiveName") or "")
if first_entry.get("archiveName") and cbz.is_file():
ok, _ = self._apply_update(
cbz, builder, first,
volume=None, cover=None, full_rebuild=True)
chap_rec = series_rec.begin_item(f"{first} (refresh)")
with chap_rec.measure("archive_rewrite"):
ok, _ = self._apply_update(
cbz, builder, first,
volume=None, cover=None, full_rebuild=True)
chap_rec.finish(ok=ok)
if ok:
self._log(f"{series_dir.name} | chapter {first} | "
f"first-chapter metadata refreshed | {cbz.name}")
if updated:
_save_chapter_index(series_dir, index)
series_rec.finish(ok=True)
return updated
# ------------------------------------------------------------------
@@ -545,10 +512,7 @@ if __name__ == "__main__":
matches_cache=MatchesCache(MATCHES_PATH),
)
# One-shot scan (no cron thread):
# One-shot scan. Scheduling is handled externally via CronRunner
# (see main_manga.py).
summary = updater.update_all()
print(f"\n[updater] {summary}")
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
# updater.start()
# updater.wait()
+56 -137
View File
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
from ComicInfoBuilder import _pick_thumbnail_url
from PerfWebPage import render_perf_page
# Cross-link tabs shown on every perf page in the manga container.
_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
_INDEX_HTML = """<!doctype html>
@@ -71,7 +75,8 @@ _INDEX_HTML = """<!doctype html>
<button id="batchSave" class="primary">Save dirty (0)</button>
<button id="build">Build all (rescan)</button>
<button id="move">Start move</button>
<a href="/perf" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<button id="syncPersons">Sync persons</button>
<a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span>
</div>
@@ -342,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
btn.disabled = false;
}
});
document.getElementById("syncPersons").addEventListener("click", async () => {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
const btn = document.getElementById("syncPersons");
btn.disabled = true;
setStatus("Syncing persons… (running on the server)");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
const d = await r.json();
setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
+ d.not_found + " not found, " + d.conflicts + " conflicts");
} catch (err) {
setStatus("Person sync failed: " + err.message);
} finally {
btn.disabled = false;
}
});
for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => {
const col = th.dataset.col;
@@ -358,133 +380,6 @@ load();
"""
_PERF_HTML = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Move performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.3rem 0; }
summary { cursor:pointer; padding:.25rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Move performance <a href="/" style="font-size:.9rem;">◂ back to matches</a></h1>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
let runs = [];
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + name + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
function seriesBlock(s) {
let chapters = "";
// Chapters sorted slowest first to surface outliers.
const chs = (s.chapters || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
for (const c of chs) {
const steps = Object.entries(c.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ");
chapters += "<tr><td>" + c.chapter + (c.ok ? "" : " <span class=err>(failed)</span>") + "</td>"
+ "<td class='num'>" + fmtSecs(c.totalSeconds) + "</td>"
+ "<td>" + steps + "</td></tr>";
}
const seriesSteps = Object.entries(s.steps || {})
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ") || "";
return "<details><summary><b>" + s.title + "</b> "
+ "<span class=chip>" + fmtSecs(s.totalSeconds) + " · "
+ (s.chapterCount || 0) + " chapters · " + seriesSteps + "</span></summary>"
+ "<table><thead><tr><th>Chapter</th><th class=num>Total</th>"
+ "<th>Steps</th></tr></thead><tbody>" + chapters + "</tbody></table></details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.seriesCount + " series · " + run.chapterCount + " chapters";
let html = "<h2>Chapter steps (summed over all chapters)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Series steps (metadata / person sync)</h2>"
+ stepTable(run.seriesStepTotals, run.totalSeconds)
+ "<h2>Series detail</h2>";
const series = (run.series || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += series.map(seriesBlock).join("");
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")";
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf");
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
class MatchesWebApp:
@@ -497,16 +392,22 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *,
mover=None,
person_updater=None,
person_trigger: str = "web",
perf_stats=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._mover = mover
self._perf = perf_stats
self._person_updater = person_updater
self._person_trigger = person_trigger
# perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
self._perf = perf_stats or {}
self._host = host
self._port = port
self._build_lock = threading.Lock()
self._move_lock = threading.Lock()
self._person_lock = threading.Lock()
self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None
self._register_routes()
@@ -631,12 +532,30 @@ class MatchesWebApp:
self._move_lock.release()
return jsonify({"results": results})
@app.get("/perf")
def perf_page() -> Response:
return Response(_PERF_HTML, mimetype="text/html; charset=utf-8")
@app.post("/api/persons/sync")
def api_persons_sync():
if self._person_updater is None:
return Response("no person updater configured", status=503)
if not self._person_lock.acquire(blocking=False):
return Response("person sync already running", status=409)
try:
report = self._person_updater.update_all_persons(
trigger=self._person_trigger,
perf=self._perf.get("person"))
except Exception as exc:
return Response(f"person sync failed: {exc}", status=500)
finally:
self._person_lock.release()
return jsonify(report)
@app.get("/api/perf")
def api_perf():
if self._perf is None:
return jsonify({"runs": []})
return jsonify(self._perf.all())
# Perf pages: /perf (move) + /perf/<name> for the updaters.
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "move") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._perf.get(name)
return jsonify(stats.all() if stats is not None else {"runs": []})
-267
View File
@@ -1,267 +0,0 @@
"""
perf_stats.py
=============
Lightweight performance profiler for the Suwayomi -> Kavita move pipeline.
It records, per move run, how long each step of every chapter takes plus
per-series and per-run totals, so a slowdown can be traced to the step
responsible (cover download, image-dimension probing, CBZ packing, …).
Data model (one entry per run, newest first)::
{
"runs": [
{
"startedAt": 1700000000, # unix seconds
"finishedAt": 1700000123,
"totalSeconds": 123.4, # wall clock of the whole run
"seriesCount": 2,
"chapterCount": 31,
"stepTotals": { # summed over ALL chapters
"cover": 41.2, "image_dimensions": 55.8, "pack_cbz": 18.1, ...
},
"seriesStepTotals": { # summed over ALL series
"fetch_metadata": 2.4, "person_sync": 9.7
},
"series": [
{
"title": "Call of the Night",
"totalSeconds": 60.2,
"chapterCount": 20,
"steps": {"fetch_metadata": 1.2, "person_sync": 3.4},
"chapters": [
{"chapter": "1", "ok": true, "totalSeconds": 11.5,
"steps": {"cover": 1.8, "image_dimensions": 4.2, ...}}
]
}
]
}
]
}
Usage from the mover::
perf = PerfStats(path) # path=None -> disabled (no-op)
run = perf.begin_run()
series = run.begin_series("Title")
with series.measure("fetch_metadata"):
...
chap = series.begin_chapter("1")
with chap.measure("pack_cbz"):
...
chap.finish(ok=True)
series.finish()
run.finish() # persists the run to disk
When ``path`` is None every recorder is a no-op and nothing is written,
so the profiler can be left permanently wired in with negligible cost.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ChapterRecorder(_StepTimer):
"""Per-chapter step timer."""
def __init__(self, series: "SeriesRecorder", chapter: str,
enabled: bool = True):
super().__init__(enabled)
self._series = series
self._chapter = chapter
self._ok = True
def finish(self, *, ok: bool = True) -> None:
self._ok = ok
if not self._enabled:
return
self._series._chapters.append({
"chapter": self._chapter,
"ok": ok,
"totalSeconds": self.elapsed(),
"steps": self.steps,
})
class SeriesRecorder(_StepTimer):
"""Per-series step timer; also collects its chapters."""
def __init__(self, run: "RunRecorder", title: str, enabled: bool = True):
super().__init__(enabled)
self._run = run
self._title = title
self._chapters: list[dict] = []
def begin_chapter(self, chapter: str) -> ChapterRecorder:
return ChapterRecorder(self, chapter, enabled=self._enabled)
def finish(self) -> None:
if not self._enabled:
return
self._run._series.append({
"title": self._title,
"totalSeconds": self.elapsed(),
"chapterCount": len(self._chapters),
"steps": self.steps,
"chapters": self._chapters,
})
# Persist the run's progress after every series so a long run is
# observable live and survives a crash mid-run.
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full move run."""
def __init__(self, stats: "PerfStats", enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._series: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
# Stable identity so incremental flushes update the same run entry
# instead of inserting a duplicate on every series.
self._run_id = uuid.uuid4().hex
def begin_series(self, title: str) -> SeriesRecorder:
return SeriesRecorder(self, title, enabled=self._enabled)
def _snapshot(self) -> dict:
"""Aggregates the run's current state into a serialisable dict."""
step_totals: dict[str, float] = {}
series_step_totals: dict[str, float] = {}
chapter_count = 0
for s in self._series:
for step, secs in s["steps"].items():
series_step_totals[step] = round(
series_step_totals.get(step, 0.0) + secs, 4)
for ch in s["chapters"]:
chapter_count += 1
for step, secs in ch["steps"].items():
step_totals[step] = round(
step_totals.get(step, 0.0) + secs, 4)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"seriesCount": len(self._series),
"chapterCount": chapter_count,
"stepTotals": step_totals,
"seriesStepTotals": series_step_totals,
"series": self._series,
}
def flush(self) -> dict | None:
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> dict | None:
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self) -> RunRecorder:
return RunRecorder(self, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every series.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+11 -47
View File
@@ -64,8 +64,6 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from KavitaClient import KavitaClient
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache, _IMAGE_EXTS
@@ -306,9 +304,6 @@ class SuwayomiMover:
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
kavita_path : Root of the Kavita library.
Series sub-directories are created automatically.
kavita_base_url : Kavita server URL — required only for person sync,
e.g. "http://192.168.2.2:5000".
kavita_api_key : Kavita API key — required only for person sync.
language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack.
@@ -316,14 +311,16 @@ class SuwayomiMover:
temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing. None
(default) disables profiling.
Note: Kavita person sync is no longer done here — it runs as a separate,
global, id-based updater on its own cron schedule (KavitaPersonUpdater).
The mover only touches MangaBaka / MangaDex / MAL / AniList.
"""
def __init__(self,
suwayomi_path,
kavita_path,
*,
kavita_base_url: "str | None" = None,
kavita_api_key: "str | None" = None,
language: str = "en",
request_timeout: int = 30,
delete_source: bool = True,
@@ -357,16 +354,6 @@ class SuwayomiMover:
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
kavita_client = KavitaClient(
kavita_base_url, kavita_api_key,
request_timeout=request_timeout)
self._person_updater = KavitaPersonUpdater(
kavita_client,
mal_resolver=self._mal,
al_resolver=self._al)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
@@ -502,7 +489,7 @@ class SuwayomiMover:
# ------------------------------------------------------------------
def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
manga_title = manga_dir.name
series_rec = (run or self._perf.begin_run()).begin_series(manga_title)
series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
chapter_dirs = sorted(
(d for d in manga_dir.iterdir() if d.is_dir()),
@@ -597,27 +584,11 @@ class SuwayomiMover:
}
_save_chapter_index(dest_series, chapter_index)
# Sync Kavita persons once per series.
# Both MAL and AniList IDs come from MangaBaka's source map;
# AniList is used as fallback when MAL returns no characters/staff.
person_result: "dict | None" = None
if self._person_updater:
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
or self._mal.find_mal_id(builder_title))
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
if mal_id or al_id:
try:
with series_rec.measure("person_sync"):
person_result = self._person_updater.update_for_manga(
mal_id, al_manga_id=al_id)
print(f" Persons: chars={person_result['characters'].get('updated')} "
f"staff={person_result['staff'].get('updated')}")
except Exception as exc:
person_result = {"error": str(exc)}
print(f" Persons: ERROR {exc}")
# Person sync no longer runs here — it has its own global,
# id-based updater on a separate cron schedule (see
# KavitaPersonUpdater.update_all_persons).
series_rec.finish()
return {"chapters": chapter_results, "persons": person_result}
return {"chapters": chapter_results}
# ------------------------------------------------------------------
# Internal: chapter
@@ -637,8 +608,8 @@ class SuwayomiMover:
<Pages> element correctly points to the front cover).
"""
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
chap_rec = (series_rec or self._perf.begin_run().begin_series("")
).begin_chapter(chapter_num)
chap_rec = (series_rec or self._perf.begin_run().begin_item("")
).begin_item(chapter_num)
# add_pages_from_folder records its own sub-steps on this recorder.
builder.perf = chap_rec
ok = False
@@ -675,14 +646,9 @@ class SuwayomiMover:
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
import os
# Local (no-Docker) smoke test. Adjust paths to your environment.
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
# matches.json lives next to this script during local testing.
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
@@ -691,8 +657,6 @@ if __name__ == "__main__":
mover = SuwayomiMover(
SUWAYOMI_PATH,
KAVITA_PATH,
kavita_base_url=KAVITA_URL,
kavita_api_key=KAVITA_KEY,
delete_source=False,
matches_cache=matches_cache,
)