Merge pull request 'Performance and Person Updater Improvements' (#7) from time-measurement into master
Reviewed-on: #7
This commit was merged in pull request #7.
This commit is contained in:
+7
-1
@@ -11,12 +11,18 @@ HOST_MANGA_CONFIG_PATH=/path/to/manga-config
|
|||||||
MANGA_WEB_PORT=8080
|
MANGA_WEB_PORT=8080
|
||||||
SETTLE_SECONDS=600
|
SETTLE_SECONDS=600
|
||||||
DELETE_SOURCE=true
|
DELETE_SOURCE=true
|
||||||
|
# Periodic updaters (volume/cover + global person sync) run together on
|
||||||
|
# this cron. Sundays 10:00. Person updater also covers LN libraries.
|
||||||
UPDATER_ENABLED=true
|
UPDATER_ENABLED=true
|
||||||
UPDATER_SCHEDULE=0 19 * * 1,4
|
UPDATER_SCHEDULE=0 10 * * 0
|
||||||
COVER_CACHE_PATH=/config/covers
|
COVER_CACHE_PATH=/config/covers
|
||||||
|
PERF_PATH=/config/perf_stats.json
|
||||||
|
VOLUME_PERF_PATH=/config/volume_perf_stats.json
|
||||||
|
PERSON_PERF_PATH=/config/person_perf_stats.json
|
||||||
|
|
||||||
# Light-novel container (kavita-lightnovel-metadata-fetcher)
|
# Light-novel container (kavita-lightnovel-metadata-fetcher)
|
||||||
HOST_LN_CONFIG_PATH=/path/to/ln-config
|
HOST_LN_CONFIG_PATH=/path/to/ln-config
|
||||||
LN_WEB_PORT=8081
|
LN_WEB_PORT=8081
|
||||||
LN_LIBRARY_IDS=3,5
|
LN_LIBRARY_IDS=3,5
|
||||||
|
LN_UPDATER_ENABLED=true
|
||||||
|
|
||||||
|
|||||||
+12
-5
@@ -13,15 +13,18 @@ services:
|
|||||||
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
|
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
|
||||||
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
||||||
MATCH_PATH: "/config/matches.json"
|
MATCH_PATH: "/config/matches.json"
|
||||||
# Volume/cover back-fill updater
|
# Periodic updaters (volume/cover back-fill + global person sync) run
|
||||||
|
# together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
|
||||||
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
|
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
|
||||||
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
|
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
|
||||||
# (local time, see TZ)
|
|
||||||
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
|
|
||||||
UPDATER_LOG: "/config/volume_updater.log"
|
UPDATER_LOG: "/config/volume_updater.log"
|
||||||
# Persistent cover cache (empty = temp dir, deleted on container stop)
|
# Persistent cover cache (empty = temp dir, deleted on container stop)
|
||||||
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
|
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
|
||||||
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
|
# Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
|
||||||
|
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
|
||||||
|
VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
|
||||||
|
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
|
||||||
|
# Timezone for the cron schedule — without this 10:00 means 10:00 UTC
|
||||||
TZ: "${TZ:-Europe/Berlin}"
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
ports:
|
ports:
|
||||||
- "${MANGA_WEB_PORT:-8080}:8080"
|
- "${MANGA_WEB_PORT:-8080}:8080"
|
||||||
@@ -43,6 +46,10 @@ services:
|
|||||||
LIBRARY_IDS: "${LN_LIBRARY_IDS}"
|
LIBRARY_IDS: "${LN_LIBRARY_IDS}"
|
||||||
LANGUAGE: "${LANGUAGE:-en}"
|
LANGUAGE: "${LANGUAGE:-en}"
|
||||||
MATCH_PATH: "/config/matches.json"
|
MATCH_PATH: "/config/matches.json"
|
||||||
|
# Global person sync on cron (same default cadence as the manga side)
|
||||||
|
UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}"
|
||||||
|
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
|
||||||
|
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
|
||||||
TZ: "${TZ:-Europe/Berlin}"
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
ports:
|
ports:
|
||||||
- "${LN_WEB_PORT:-8081}:8080"
|
- "${LN_WEB_PORT:-8081}:8080"
|
||||||
|
|||||||
+33
@@ -27,6 +27,12 @@ Environment variables
|
|||||||
MATCH_PATH default /config/matches.json
|
MATCH_PATH default /config/matches.json
|
||||||
WEB_PORT default 8080
|
WEB_PORT default 8080
|
||||||
WEB_HOST default 0.0.0.0
|
WEB_HOST default 0.0.0.0
|
||||||
|
UPDATER_ENABLED default true (run the person updater on cron)
|
||||||
|
UPDATER_SCHEDULE cron expression for the person updater,
|
||||||
|
default "0 10 * * 0" = Sundays 10:00
|
||||||
|
(local time — set TZ inside the container!)
|
||||||
|
PERSON_PERF_PATH JSON file for person updater timing.
|
||||||
|
Default /config/person_perf_stats.json
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -51,6 +57,15 @@ sys.path.insert(0, str(_BASE / "src" / "ln"))
|
|||||||
from MatchesCache import MatchesCache # noqa: E402
|
from MatchesCache import MatchesCache # noqa: E402
|
||||||
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
||||||
from MatchesWebApp import MatchesWebApp # noqa: E402
|
from MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
|
from PerfStats import PerfStats # noqa: E402
|
||||||
|
from CronRunner import CronRunner # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _env_bool(name: str, default: bool) -> bool:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: "str | None" = None,
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
@@ -98,6 +113,10 @@ def main() -> int:
|
|||||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
web_port = _env_int("WEB_PORT", 8080)
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
library_ids = _env_int_list("LIBRARY_IDS")
|
library_ids = _env_int_list("LIBRARY_IDS")
|
||||||
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
|
||||||
|
person_perf_path = _env_str("PERSON_PERF_PATH",
|
||||||
|
"/config/person_perf_stats.json") or None
|
||||||
|
|
||||||
print(f"[main] kavita url = {kavita_url}", flush=True)
|
print(f"[main] kavita url = {kavita_url}", flush=True)
|
||||||
print(f"[main] language = {language}", flush=True)
|
print(f"[main] language = {language}", flush=True)
|
||||||
@@ -107,6 +126,7 @@ def main() -> int:
|
|||||||
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
||||||
|
|
||||||
cache = MatchesCache(match_path)
|
cache = MatchesCache(match_path)
|
||||||
|
person_perf = PerfStats(person_perf_path)
|
||||||
orchestrator = LightNovelOrchestrator(
|
orchestrator = LightNovelOrchestrator(
|
||||||
kavita_url=kavita_url,
|
kavita_url=kavita_url,
|
||||||
kavita_api_key=kavita_api_key,
|
kavita_api_key=kavita_api_key,
|
||||||
@@ -118,9 +138,22 @@ def main() -> int:
|
|||||||
app = MatchesWebApp(
|
app = MatchesWebApp(
|
||||||
cache, orchestrator=orchestrator,
|
cache, orchestrator=orchestrator,
|
||||||
default_library_ids=library_ids,
|
default_library_ids=library_ids,
|
||||||
|
person_perf=person_perf,
|
||||||
host=web_host, port=web_port,
|
host=web_host, port=web_port,
|
||||||
)
|
)
|
||||||
app.start()
|
app.start()
|
||||||
|
|
||||||
|
if updater_enabled:
|
||||||
|
try:
|
||||||
|
CronRunner(
|
||||||
|
updater_schedule,
|
||||||
|
lambda: orchestrator.sync_persons(trigger="cron",
|
||||||
|
perf=person_perf),
|
||||||
|
name="person-updater").start()
|
||||||
|
except ValueError as exc:
|
||||||
|
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
|
||||||
|
f"scheduled person sync DISABLED", flush=True)
|
||||||
|
|
||||||
app.wait()
|
app.wait()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
+56
-20
@@ -28,13 +28,19 @@ Environment variables
|
|||||||
MATCH_PATH default /config/matches.json
|
MATCH_PATH default /config/matches.json
|
||||||
WEB_PORT default 8080 (Flask web UI for matches.json)
|
WEB_PORT default 8080 (Flask web UI for matches.json)
|
||||||
WEB_HOST default 0.0.0.0
|
WEB_HOST default 0.0.0.0
|
||||||
UPDATER_ENABLED default true (volume/cover back-fill cron)
|
UPDATER_ENABLED default true (run volume/cover + person updaters on cron)
|
||||||
UPDATER_SCHEDULE cron expression for the updater scans,
|
UPDATER_SCHEDULE cron expression for the periodic updaters,
|
||||||
default "0 19 * * 1,4" = 19:00 every Mon + Thu
|
default "0 10 * * 0" = Sundays 10:00
|
||||||
(local time — set TZ inside the container!)
|
(local time — set TZ inside the container!)
|
||||||
UPDATER_LOG default /config/volume_updater.log
|
UPDATER_LOG default /config/volume_updater.log
|
||||||
COVER_CACHE_PATH directory for the persistent cover cache;
|
COVER_CACHE_PATH directory for the persistent cover cache;
|
||||||
empty (default) = temporary cache, deleted on exit
|
empty (default) = temporary cache, deleted on exit
|
||||||
|
PERF_PATH JSON file for per-step move timing stats.
|
||||||
|
Default /config/perf_stats.json (empty disables it)
|
||||||
|
VOLUME_PERF_PATH JSON file for volume/cover updater timing.
|
||||||
|
Default /config/volume_perf_stats.json
|
||||||
|
PERSON_PERF_PATH JSON file for person updater timing.
|
||||||
|
Default /config/person_perf_stats.json
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -42,7 +48,6 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -61,6 +66,10 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
|
|||||||
from MatchesCache import MatchesCache # noqa: E402
|
from MatchesCache import MatchesCache # noqa: E402
|
||||||
from MatchesWebApp import MatchesWebApp # noqa: E402
|
from MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
|
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
|
||||||
|
from KavitaClient import KavitaClient # noqa: E402
|
||||||
|
from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402
|
||||||
|
from PerfStats import PerfStats # noqa: E402
|
||||||
|
from CronRunner import CronRunner # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: "str | None" = None,
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
@@ -104,9 +113,14 @@ def main() -> int:
|
|||||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
web_port = _env_int("WEB_PORT", 8080)
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
|
||||||
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
||||||
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
||||||
|
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
|
||||||
|
volume_perf_path = _env_str("VOLUME_PERF_PATH",
|
||||||
|
"/config/volume_perf_stats.json") or None
|
||||||
|
person_perf_path = _env_str("PERSON_PERF_PATH",
|
||||||
|
"/config/person_perf_stats.json") or None
|
||||||
|
|
||||||
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
||||||
print(f"[main] kavita = {kavita_path}", flush=True)
|
print(f"[main] kavita = {kavita_path}", flush=True)
|
||||||
@@ -118,40 +132,62 @@ def main() -> int:
|
|||||||
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
||||||
|
|
||||||
matches_cache = MatchesCache(match_path)
|
matches_cache = MatchesCache(match_path)
|
||||||
|
perf_move = PerfStats(perf_path)
|
||||||
|
perf_volume = PerfStats(volume_perf_path)
|
||||||
|
perf_person = PerfStats(person_perf_path)
|
||||||
|
|
||||||
mover = SuwayomiMover(
|
mover = SuwayomiMover(
|
||||||
suwayomi_path, kavita_path,
|
suwayomi_path, kavita_path,
|
||||||
kavita_base_url=kavita_url,
|
|
||||||
kavita_api_key=kavita_api_key,
|
|
||||||
language=language,
|
language=language,
|
||||||
request_timeout=request_timeout,
|
request_timeout=request_timeout,
|
||||||
delete_source=delete_source,
|
delete_source=delete_source,
|
||||||
matches_cache=matches_cache,
|
matches_cache=matches_cache,
|
||||||
cover_cache_dir=cover_cache_path,
|
cover_cache_dir=cover_cache_path,
|
||||||
|
perf_stats=perf_move,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Standalone, global, id-based person updater (manga + LN libraries).
|
||||||
|
person_updater = None
|
||||||
|
if kavita_api_key:
|
||||||
|
kavita_client = KavitaClient(kavita_url, kavita_api_key,
|
||||||
|
request_timeout=request_timeout)
|
||||||
|
person_updater = KavitaPersonUpdater(kavita_client)
|
||||||
|
|
||||||
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
||||||
|
|
||||||
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
|
web_app = MatchesWebApp(
|
||||||
|
matches_cache, mover=mover,
|
||||||
|
person_updater=person_updater, person_trigger="web",
|
||||||
|
perf_stats={"move": perf_move, "volume": perf_volume,
|
||||||
|
"person": perf_person},
|
||||||
|
host=web_host, port=web_port)
|
||||||
web_app.start()
|
web_app.start()
|
||||||
|
|
||||||
if updater_enabled:
|
if updater_enabled:
|
||||||
|
updater = KavitaVolumeCoverUpdater(
|
||||||
|
kavita_path,
|
||||||
|
matches_cache=matches_cache,
|
||||||
|
language=language,
|
||||||
|
request_timeout=request_timeout,
|
||||||
|
log_path=updater_log,
|
||||||
|
cover_cache_dir=cover_cache_path,
|
||||||
|
perf_stats=perf_volume,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _scheduled_job():
|
||||||
|
updater.update_all()
|
||||||
|
if person_updater is not None:
|
||||||
|
person_updater.update_all_persons(trigger="cron",
|
||||||
|
perf=perf_person)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
updater = KavitaVolumeCoverUpdater(
|
CronRunner(updater_schedule, _scheduled_job,
|
||||||
kavita_path,
|
name="updaters").start()
|
||||||
matches_cache=matches_cache,
|
|
||||||
language=language,
|
|
||||||
request_timeout=request_timeout,
|
|
||||||
log_path=updater_log,
|
|
||||||
schedule=updater_schedule,
|
|
||||||
cover_cache_dir=cover_cache_path,
|
|
||||||
)
|
|
||||||
updater.start()
|
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
# Invalid cron expression — keep the service up, just without
|
# Invalid cron expression — keep the service up, just without
|
||||||
# the updater, and make the config error obvious in the logs.
|
# the scheduled updaters, and surface the config error.
|
||||||
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
|
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
|
||||||
f"volume/cover updater DISABLED", flush=True)
|
f"scheduled updaters DISABLED", flush=True)
|
||||||
|
|
||||||
# watcher.start()
|
# watcher.start()
|
||||||
# watcher.wait() # blocks until stop() is called via a signal
|
# watcher.wait() # blocks until stop() is called via a signal
|
||||||
|
|||||||
@@ -0,0 +1,87 @@
|
|||||||
|
"""
|
||||||
|
cron_runner.py
|
||||||
|
==============
|
||||||
|
|
||||||
|
Runs a single callable on a cron schedule on a background thread.
|
||||||
|
|
||||||
|
Decouples *what* runs from *when*: both the manga container (volume/cover
|
||||||
|
updater + person updater) and the LN container (person updater) schedule
|
||||||
|
their work through this one helper, using a shared ``CronSchedule`` for the
|
||||||
|
``next_after`` arithmetic.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00
|
||||||
|
runner.start()
|
||||||
|
...
|
||||||
|
runner.stop()
|
||||||
|
|
||||||
|
When the schedule string is invalid, the CronSchedule constructor raises
|
||||||
|
ValueError — the caller decides whether to disable the runner or fall back.
|
||||||
|
The schedule is evaluated in local time (set TZ inside the container).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from CronSchedule import CronSchedule
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> str:
|
||||||
|
return datetime.now().isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
|
||||||
|
class CronRunner:
|
||||||
|
"""
|
||||||
|
Fires ``job()`` whenever the cron ``schedule`` elapses.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
schedule : 5-field cron expression (see CronSchedule).
|
||||||
|
job : Zero-arg callable invoked on each scheduled tick. Exceptions
|
||||||
|
are caught and logged so a failing run does not kill the loop.
|
||||||
|
name : Thread name (for logs).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
|
||||||
|
self._cron = CronSchedule(schedule)
|
||||||
|
self._job = job
|
||||||
|
self._name = name
|
||||||
|
self._stop = threading.Event()
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Starts the scheduling thread. Non-blocking."""
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return
|
||||||
|
self._stop.clear()
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._loop, name=self._name, daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
print(f"[{_now()}] [{self._name}] scheduled on "
|
||||||
|
f"cron '{self._cron.expression}'", flush=True)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Signals the loop to stop (a job already running finishes first)."""
|
||||||
|
self._stop.set()
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join(timeout=10)
|
||||||
|
|
||||||
|
def wait(self) -> None:
|
||||||
|
"""Blocks the calling thread until stop() is invoked."""
|
||||||
|
self._stop.wait()
|
||||||
|
|
||||||
|
def _loop(self) -> None:
|
||||||
|
while not self._stop.is_set():
|
||||||
|
next_run = self._cron.next_after(datetime.now())
|
||||||
|
wait = max(0.0, (next_run - datetime.now()).total_seconds())
|
||||||
|
print(f"[{_now()}] [{self._name}] next run: "
|
||||||
|
f"{next_run.isoformat(timespec='minutes')}", flush=True)
|
||||||
|
if self._stop.wait(wait):
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
self._job()
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
|
||||||
@@ -204,6 +204,31 @@ class KavitaClient:
|
|||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json() or []
|
return r.json() or []
|
||||||
|
|
||||||
|
def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Returns every PersonDto in the instance.
|
||||||
|
|
||||||
|
Pages through POST /api/Person/all (the browse endpoint) with an
|
||||||
|
empty filter until an empty page is returned — same paging pattern
|
||||||
|
as list_series_in_library.
|
||||||
|
"""
|
||||||
|
results: list[dict] = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}/api/Person/all",
|
||||||
|
params={"PageNumber": page, "PageSize": page_size},
|
||||||
|
json={}, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
chunk = r.json() or []
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
results.extend(chunk)
|
||||||
|
if len(chunk) < page_size:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return results
|
||||||
|
|
||||||
def update_person(self, payload: dict) -> None:
|
def update_person(self, payload: dict) -> None:
|
||||||
"""Writes a person record (malId, aniListId, description, …)."""
|
"""Writes a person record (malId, aniListId, description, …)."""
|
||||||
r = self._session.post(f"{self._base}/api/Person/update",
|
r = self._session.post(f"{self._base}/api/Person/update",
|
||||||
|
|||||||
+140
-338
@@ -2,407 +2,220 @@
|
|||||||
kavita_person_updater.py
|
kavita_person_updater.py
|
||||||
========================
|
========================
|
||||||
|
|
||||||
Synchronises Kavita person / character records with MyAnimeList data.
|
Synchronises Kavita character person-records with MyAnimeList / AniList data.
|
||||||
|
|
||||||
For every character and staff member that MAL knows about for a given manga
|
Global, id-based mode
|
||||||
the updater:
|
---------------------
|
||||||
1. Searches Kavita for a matching Person record (by name similarity /
|
Kavita person-records are created with a disambiguated name carrying the
|
||||||
alias match, configurable threshold).
|
tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
|
||||||
2. Sets the MAL ID on the Kavita person if it is not yet linked.
|
ComicInfo <Characters>; light novels: written by the metadata builder).
|
||||||
3. Uploads the MAL profile image when the cover is not locked and has
|
``update_all_persons`` walks **every** person in the Kavita instance, reads
|
||||||
not been set in a previous sync run.
|
that id from the name, looks the character up on MAL / AniList by id, and
|
||||||
4. Populates the description field when Kavita has none and MAL provides
|
writes back:
|
||||||
an 'about' text (requires an extra Jikan request per character; only
|
|
||||||
performed when update_descriptions=True).
|
* the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
|
||||||
|
* a description (when the record has none),
|
||||||
|
* the profile image (when not locked and not already set).
|
||||||
|
|
||||||
|
Persons whose name carries no id (authors / staff, which are not
|
||||||
|
disambiguated) are skipped. A record already linked to a *different*
|
||||||
|
tracker id than its name says is reported as a conflict and left untouched.
|
||||||
|
|
||||||
|
This mode is format-independent (it only does id lookups, never title
|
||||||
|
searches) so a single pass covers both the manga and light-novel libraries.
|
||||||
|
|
||||||
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
|
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
|
||||||
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
|
(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
|
||||||
|
|
||||||
Tested against Kavita 0.9.0.2.
|
Tested against Kavita 0.9.0.2.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from KavitaClient import KavitaClient
|
from KavitaClient import KavitaClient
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id
|
from PerfStats import PerfStats
|
||||||
|
from TextUtils import paragraphs_to_html, parse_person_tracker_id
|
||||||
|
|
||||||
|
|
||||||
class KavitaPersonUpdater:
|
class KavitaPersonUpdater:
|
||||||
"""
|
"""
|
||||||
Syncs Kavita Person records with MyAnimeList data.
|
Syncs Kavita character person-records with MAL / AniList data, keyed by
|
||||||
|
the tracker id embedded in each person's name.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
client : Shared KavitaClient (session, auth, cover uploads)
|
client : Shared KavitaClient (session, auth, cover uploads).
|
||||||
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
mal_resolver : Shared MALResolver singleton (created if omitted).
|
||||||
al_resolver : Shared AniListResolver singleton (created automatically if omitted)
|
al_resolver : Shared AniListResolver singleton (created if omitted).
|
||||||
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
|
||||||
Kavita person as a match for a MAL name. Default 0.80.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, client: KavitaClient, *,
|
def __init__(self, client: KavitaClient, *,
|
||||||
mal_resolver: "MALResolver | None" = None,
|
mal_resolver: "MALResolver | None" = None,
|
||||||
al_resolver: "AniListResolver | None" = None,
|
al_resolver: "AniListResolver | None" = None):
|
||||||
min_name_score: float = 0.80):
|
|
||||||
self._client = client
|
self._client = client
|
||||||
self._min_score = min_name_score
|
|
||||||
self._mal = mal_resolver or MALResolver()
|
self._mal = mal_resolver or MALResolver()
|
||||||
self._al = al_resolver or AniListResolver()
|
self._al = al_resolver or AniListResolver()
|
||||||
|
|
||||||
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
|
||||||
self._person_search_cache: dict[str, list[dict]] = {}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public: combined update
|
# Public: global person sync
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def update_for_manga(self, mal_manga_id: "int | None", *,
|
def update_all_persons(self, *,
|
||||||
al_manga_id: "int | None" = None,
|
trigger: str = "cron",
|
||||||
update_covers: bool = True,
|
perf: "PerfStats | None" = None,
|
||||||
update_descriptions: bool = True) -> dict:
|
update_covers: bool = True,
|
||||||
|
update_descriptions: bool = True) -> dict:
|
||||||
"""
|
"""
|
||||||
Runs a full update pass for both characters and staff of the manga.
|
Walks every Kavita person, syncing the ones whose name carries a
|
||||||
MAL is tried first; AniList is used as fallback when MAL returns nothing.
|
tracker character id.
|
||||||
|
|
||||||
Returns
|
Parameters
|
||||||
-------
|
----------
|
||||||
{
|
trigger : Source that started this run ("cron" | "web" | "ln") —
|
||||||
"characters": {"updated": n, "skipped": n, "not_found": n},
|
recorded in the perf-stats run meta.
|
||||||
"staff": {"updated": n, "skipped": n, "not_found": n},
|
perf : Optional PerfStats for per-person step timing.
|
||||||
}
|
|
||||||
|
Returns {"trigger", "updated", "skipped", "not_found",
|
||||||
|
"conflicts", "errors"}.
|
||||||
"""
|
"""
|
||||||
return {
|
perf = perf or PerfStats(None)
|
||||||
"characters": self.update_characters(
|
run = perf.begin_run(meta={"trigger": trigger})
|
||||||
mal_manga_id, al_manga_id=al_manga_id,
|
result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
|
||||||
update_covers=update_covers,
|
"not_found": 0, "conflicts": 0, "errors": []}
|
||||||
update_descriptions=update_descriptions),
|
|
||||||
"staff": self.update_staff(
|
|
||||||
mal_manga_id, al_manga_id=al_manga_id,
|
|
||||||
update_covers=update_covers,
|
|
||||||
update_descriptions=update_descriptions),
|
|
||||||
}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
try:
|
||||||
# Public: character update
|
persons = self._client.list_all_persons()
|
||||||
# ------------------------------------------------------------------
|
except requests.RequestException as exc:
|
||||||
def update_characters(self, mal_manga_id: "int | None", *,
|
result["errors"].append(f"list persons failed: {exc}")
|
||||||
al_manga_id: "int | None" = None,
|
run.finish()
|
||||||
update_covers: bool = True,
|
return result
|
||||||
update_descriptions: bool = True) -> dict:
|
|
||||||
"""
|
|
||||||
Updates Kavita persons that match MAL/AniList characters for the manga.
|
|
||||||
MAL is tried first; AniList is the fallback when MAL returns nothing.
|
|
||||||
|
|
||||||
Returns {"updated": n, "skipped": n, "not_found": n}.
|
for person in persons:
|
||||||
"""
|
name = (person.get("name") or "").strip()
|
||||||
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
|
parsed = parse_person_tracker_id(name)
|
||||||
resolver = self._mal
|
if not parsed:
|
||||||
if not entries and al_manga_id:
|
result["skipped"] += 1 # author/staff or un-tagged
|
||||||
entries = self._al.get_characters_detailed(al_manga_id)
|
|
||||||
resolver = self._al
|
|
||||||
return self._sync_entries(entries, "character", resolver,
|
|
||||||
update_covers=update_covers,
|
|
||||||
update_descriptions=update_descriptions)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Public: staff update
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def update_staff(self, mal_manga_id: "int | None", *,
|
|
||||||
al_manga_id: "int | None" = None,
|
|
||||||
update_covers: bool = True,
|
|
||||||
update_descriptions: bool = True) -> dict:
|
|
||||||
"""
|
|
||||||
Updates Kavita persons that match MAL/AniList staff for the manga.
|
|
||||||
MAL is tried first; AniList is the fallback when MAL returns nothing.
|
|
||||||
|
|
||||||
Returns {"updated": n, "skipped": n, "not_found": n}.
|
|
||||||
"""
|
|
||||||
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
|
|
||||||
resolver = self._mal
|
|
||||||
if not entries and al_manga_id:
|
|
||||||
entries = self._al.get_staff_detailed(al_manga_id)
|
|
||||||
resolver = self._al
|
|
||||||
return self._sync_entries(entries, "staff", resolver,
|
|
||||||
update_covers=update_covers,
|
|
||||||
update_descriptions=update_descriptions)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Public: cache management
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def clear_cache(self) -> None:
|
|
||||||
"""Clears the Kavita person search cache."""
|
|
||||||
self._person_search_cache.clear()
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Internal: main sync loop
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
|
|
||||||
update_covers: bool,
|
|
||||||
update_descriptions: bool) -> dict:
|
|
||||||
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
|
|
||||||
"errors": []}
|
|
||||||
for entry in entries:
|
|
||||||
name = (entry.get("name") or "").strip()
|
|
||||||
raw_name = (entry.get("raw_name") or "").strip()
|
|
||||||
if not name and not raw_name:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if kind == "character":
|
source, tracker_id = parsed
|
||||||
# Characters are stored under their disambiguated name
|
item = run.begin_item(name)
|
||||||
# ("Rem (MAL 118737)") — see person_name_with_id. The
|
ok = True
|
||||||
# series metadata write creates the person under exactly
|
try:
|
||||||
# this name, so only that form is searched.
|
category = self._apply_to_person(
|
||||||
search_names = [person_name_with_id(
|
person, source, tracker_id, item,
|
||||||
name, mal_id=entry.get("mal_id"),
|
update_cover=update_covers,
|
||||||
al_id=entry.get("al_id"))]
|
update_desc=update_descriptions,
|
||||||
else:
|
errors=result["errors"])
|
||||||
# Staff: cleaned (XML-safe) name first; if Kavita stores
|
result[category] += 1
|
||||||
# the legacy comma form, retry with the raw MAL name.
|
ok = category != "conflicts"
|
||||||
search_names = [name]
|
except Exception as exc:
|
||||||
if raw_name and raw_name != name:
|
result["errors"].append(f"{name}: {exc}")
|
||||||
search_names.append(raw_name)
|
ok = False
|
||||||
|
finally:
|
||||||
matches: list[dict] = []
|
item.finish(ok=ok)
|
||||||
for search_name in search_names:
|
|
||||||
if not search_name:
|
|
||||||
continue
|
|
||||||
matches = self._find_kavita_person(search_name)
|
|
||||||
if matches:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not matches:
|
|
||||||
result["not_found"] += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
changed = self._apply_mal_data(
|
|
||||||
matches[0], entry, kind, resolver,
|
|
||||||
update_cover=update_covers,
|
|
||||||
update_desc=update_descriptions,
|
|
||||||
errors=result["errors"])
|
|
||||||
result["updated" if changed else "skipped"] += 1
|
|
||||||
|
|
||||||
|
run.finish()
|
||||||
|
print(f"[persons] trigger={trigger} updated={result['updated']} "
|
||||||
|
f"skipped={result['skipped']} not_found={result['not_found']} "
|
||||||
|
f"conflicts={result['conflicts']} errors={len(result['errors'])}",
|
||||||
|
flush=True)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Internal: Kavita person search
|
# Internal: apply tracker data to one person
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def _find_kavita_person(self, name: str) -> list[dict]:
|
def _apply_to_person(self, person: dict, source: str, tracker_id: int,
|
||||||
|
item, *, update_cover: bool, update_desc: bool,
|
||||||
|
errors: list) -> str:
|
||||||
"""
|
"""
|
||||||
Searches Kavita for persons matching `name`.
|
Applies MAL/AniList character data to one Kavita person.
|
||||||
|
Returns the result category: "updated" | "skipped" | "not_found"
|
||||||
Checks both the main name and any stored aliases.
|
| "conflicts".
|
||||||
Returns persons sorted by similarity, filtered by min_name_score.
|
|
||||||
Results are cached per (normalised) query name.
|
|
||||||
"""
|
"""
|
||||||
key = name.lower().strip()
|
person_id = person.get("id")
|
||||||
if key in self._person_search_cache:
|
|
||||||
return self._person_search_cache[key]
|
|
||||||
|
|
||||||
try:
|
|
||||||
persons = self._client.search_persons(name)
|
|
||||||
except requests.RequestException:
|
|
||||||
self._person_search_cache[key] = []
|
|
||||||
return []
|
|
||||||
|
|
||||||
scored = []
|
|
||||||
for p in persons:
|
|
||||||
candidates = [p.get("name")] + list(p.get("aliases") or [])
|
|
||||||
scored.append((best_similarity(key, candidates), p))
|
|
||||||
scored.sort(key=lambda pair: pair[0], reverse=True)
|
|
||||||
filtered = [p for score, p in scored if score >= self._min_score]
|
|
||||||
self._person_search_cache[key] = filtered
|
|
||||||
return filtered
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Internal: apply MAL data to a single Kavita person
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
|
|
||||||
resolver, *,
|
|
||||||
update_cover: bool, update_desc: bool,
|
|
||||||
errors: "list | None" = None) -> bool:
|
|
||||||
"""
|
|
||||||
Applies tracker data (MAL or AniList) to one Kavita person record.
|
|
||||||
|
|
||||||
Fields updated
|
|
||||||
--------------
|
|
||||||
- malId : set when the entry carries a MAL ID and it differs
|
|
||||||
- aniListId : set when the entry carries an AniList ID and it differs
|
|
||||||
- description: set when empty and the tracker provides a description
|
|
||||||
- cover image: uploaded when not locked and no prior sync cover exists
|
|
||||||
|
|
||||||
Returns True if any change was made. Failures are appended to the
|
|
||||||
`errors` list (if provided) instead of being silently swallowed.
|
|
||||||
"""
|
|
||||||
person_id: "int | None" = person.get("id")
|
|
||||||
if not person_id:
|
if not person_id:
|
||||||
return False
|
return "skipped"
|
||||||
|
|
||||||
person_name = person.get("name") or ""
|
resolver = self._mal if source == "mal" else self._al
|
||||||
|
id_field = "malId" if source == "mal" else "aniListId"
|
||||||
|
current = person.get(id_field) or 0
|
||||||
|
|
||||||
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
|
# The name is authoritative; a record linked to a different id is a
|
||||||
mal_id: "int | None" = mal_entry.get("mal_id")
|
# data conflict — never overwrite it.
|
||||||
al_id: "int | None" = mal_entry.get("al_id")
|
if current and current != tracker_id:
|
||||||
entity_id = mal_id or al_id # used for resolver detail calls
|
errors.append(
|
||||||
|
f"conflict: '{person.get('name')}' (#{person_id}) has "
|
||||||
|
f"{id_field}={current} but name says {tracker_id} — skipped")
|
||||||
|
return "conflicts"
|
||||||
|
|
||||||
current_mal_id: int = person.get("malId") or 0
|
with item.measure("detail_fetch"):
|
||||||
current_al_id: int = person.get("aniListId") or 0
|
details = resolver.get_character_details(tracker_id)
|
||||||
|
if not details:
|
||||||
|
return "not_found"
|
||||||
|
|
||||||
# Collision guard: the Kavita person is already linked to a
|
need_id = not current # write id when still missing
|
||||||
# *different* tracker entity — same display name, different
|
description = None
|
||||||
# character/person. Never overwrite; first writer wins.
|
|
||||||
if ((mal_id and current_mal_id and current_mal_id != mal_id)
|
|
||||||
or (al_id and current_al_id and current_al_id != al_id)):
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"conflict: '{person_name}' (#{person_id}) is linked to "
|
|
||||||
f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} "
|
|
||||||
f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} "
|
|
||||||
f"— skipped")
|
|
||||||
return False
|
|
||||||
|
|
||||||
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
|
|
||||||
needs_al_id = bool(al_id and current_al_id != al_id)
|
|
||||||
|
|
||||||
# ------ Lazy description fetch -----------------------------------
|
|
||||||
description: "str | None" = None
|
|
||||||
if update_desc and not (person.get("description") or "").strip():
|
if update_desc and not (person.get("description") or "").strip():
|
||||||
if entity_id:
|
description = _build_character_description(details) or None
|
||||||
if kind == "character":
|
need_desc = bool(description)
|
||||||
details = resolver.get_character_details(entity_id)
|
|
||||||
if details:
|
|
||||||
description = _build_character_description(details) or None
|
|
||||||
else:
|
|
||||||
details = resolver.get_person_details(entity_id)
|
|
||||||
if details:
|
|
||||||
description = _build_person_description(details) or None
|
|
||||||
|
|
||||||
needs_desc = bool(description)
|
|
||||||
|
|
||||||
# ------ Metadata update ------------------------------------------
|
|
||||||
changed = False
|
changed = False
|
||||||
if needs_mal_id or needs_al_id or needs_desc:
|
if need_id or need_desc:
|
||||||
payload: dict = {
|
payload = {
|
||||||
"id": person_id,
|
"id": person_id,
|
||||||
"name": person_name,
|
"name": person.get("name") or "",
|
||||||
# MUST stay a boolean — the cover image itself is uploaded
|
# MUST stay a boolean — the cover is uploaded separately.
|
||||||
# separately via POST /api/Upload/person (below). Putting a
|
|
||||||
# URL here makes Kavita reject the whole payload with HTTP 400.
|
|
||||||
"coverImageLocked": bool(person.get("coverImageLocked", False)),
|
"coverImageLocked": bool(person.get("coverImageLocked", False)),
|
||||||
"aliases": person.get("aliases") or [],
|
"aliases": person.get("aliases") or [],
|
||||||
"description": description or person.get("description"),
|
"description": description or person.get("description"),
|
||||||
"malId": mal_id if needs_mal_id else (current_mal_id or None),
|
"malId": tracker_id if source == "mal"
|
||||||
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
else (person.get("malId") or None),
|
||||||
|
"aniListId": tracker_id if source == "al"
|
||||||
|
else (person.get("aniListId") or None),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
self._client.update_person(payload)
|
with item.measure("person_update"):
|
||||||
|
self._client.update_person(payload)
|
||||||
changed = True
|
changed = True
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as exc:
|
||||||
if errors is not None:
|
errors.append(f"update failed #{person_id} "
|
||||||
errors.append(
|
f"'{person.get('name')}': {exc}")
|
||||||
f"Person/update failed for #{person_id} "
|
|
||||||
f"'{person_name}': {e}")
|
|
||||||
|
|
||||||
# ------ Cover image upload ----------------------------------------
|
# Cover: upload when not locked and not already set for this id.
|
||||||
# Upload whenever:
|
|
||||||
# - caller requested cover updates
|
|
||||||
# - cover is NOT locked (user did not manually pin it)
|
|
||||||
# - we have not already uploaded this exact tracker entity's image
|
|
||||||
# (i.e. the tracked ID differs OR there is no cover yet).
|
|
||||||
if update_cover and not person.get("coverImageLocked"):
|
if update_cover and not person.get("coverImageLocked"):
|
||||||
image_url = mal_entry.get("image_url")
|
image_url = details.get("image_url")
|
||||||
already_uploaded = (
|
already = bool(current) and bool(person.get("coverImage"))
|
||||||
entity_id is not None
|
if image_url and not already:
|
||||||
and (current_mal_id == mal_id or current_al_id == al_id)
|
|
||||||
and bool(person.get("coverImage"))
|
|
||||||
)
|
|
||||||
if image_url and not already_uploaded:
|
|
||||||
try:
|
try:
|
||||||
self._client.upload_person_cover(person_id, image_url)
|
with item.measure("cover_upload"):
|
||||||
|
self._client.upload_person_cover(person_id, image_url)
|
||||||
changed = True
|
changed = True
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as exc:
|
||||||
if errors is not None:
|
errors.append(f"cover upload failed #{person_id} "
|
||||||
errors.append(
|
f"'{person.get('name')}': {exc}")
|
||||||
f"cover upload failed for #{person_id} "
|
|
||||||
f"'{person_name}' ({image_url}): {e}")
|
|
||||||
|
|
||||||
return changed
|
return "updated" if changed else "skipped"
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Module helpers: description builders
|
# Module helper: character description builder
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def _format_birthday(birthday: str) -> str:
|
|
||||||
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
|
||||||
if not birthday:
|
|
||||||
return ""
|
|
||||||
try:
|
|
||||||
dt = datetime.date.fromisoformat(birthday.split("T")[0])
|
|
||||||
return f"{dt.day} {dt.strftime('%B %Y')}"
|
|
||||||
except (ValueError, AttributeError):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _build_character_description(details: dict) -> str:
|
def _build_character_description(details: dict) -> str:
|
||||||
"""
|
"""
|
||||||
Builds a Kavita-safe HTML description for a MAL character.
|
Builds a Kavita-safe HTML description for a MAL / AniList character.
|
||||||
|
|
||||||
Top line: "Favorites: N" as a link to the character's MAL page.
|
Top line: "Favorites: N" linked to the character page (when available).
|
||||||
Remainder: the character's `about` text converted to HTML paragraphs.
|
Remainder: the character's `about` text converted to HTML paragraphs.
|
||||||
"""
|
"""
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
url = details.get("url") or ""
|
url = details.get("url") or ""
|
||||||
favorites = details.get("favorites")
|
favorites = details.get("favorites")
|
||||||
if url and favorites is not None:
|
if url and favorites is not None:
|
||||||
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
parts.append(f'<p><a href="{url}" target="_blank">'
|
||||||
about = (details.get("about") or "").strip()
|
f'Favorites: {favorites:,}</a></p>')
|
||||||
if about:
|
|
||||||
parts.append(paragraphs_to_html(about))
|
|
||||||
return "<br>".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def _build_person_description(details: dict) -> str:
|
|
||||||
"""
|
|
||||||
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
|
|
||||||
|
|
||||||
Renders a summary table (given name, family name, birthday, website,
|
|
||||||
member favorites) followed by the `about` biography as HTML paragraphs.
|
|
||||||
"""
|
|
||||||
_TD = 'style="padding-right:1.5em"'
|
|
||||||
rows: list[str] = []
|
|
||||||
|
|
||||||
given = (details.get("given_name") or "").strip()
|
|
||||||
family = (details.get("family_name") or "").strip()
|
|
||||||
birthday = details.get("birthday") or ""
|
|
||||||
favorites = details.get("favorites")
|
|
||||||
website = (details.get("website_url") or "").strip()
|
|
||||||
url = (details.get("url") or "").strip()
|
|
||||||
|
|
||||||
if given:
|
|
||||||
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
|
|
||||||
if family:
|
|
||||||
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
|
|
||||||
bday_str = _format_birthday(birthday)
|
|
||||||
if bday_str:
|
|
||||||
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
|
|
||||||
if website:
|
|
||||||
rows.append(
|
|
||||||
f'<tr><td {_TD}>Website</td>'
|
|
||||||
f'<td><a href="{website}">{website}</a></td></tr>'
|
|
||||||
)
|
|
||||||
if favorites is not None:
|
|
||||||
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
|
|
||||||
else f"{favorites:,}")
|
|
||||||
rows.append(
|
|
||||||
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
|
|
||||||
|
|
||||||
parts: list[str] = []
|
|
||||||
if rows:
|
|
||||||
parts.append(f'<table>{"".join(rows)}</table>')
|
|
||||||
about = (details.get("about") or "").strip()
|
about = (details.get("about") or "").strip()
|
||||||
if about:
|
if about:
|
||||||
parts.append(paragraphs_to_html(about))
|
parts.append(paragraphs_to_html(about))
|
||||||
@@ -418,18 +231,7 @@ if __name__ == "__main__":
|
|||||||
client = KavitaClient(os.environ["KAVITA_URL"],
|
client = KavitaClient(os.environ["KAVITA_URL"],
|
||||||
os.environ["KAVITA_API_KEY"])
|
os.environ["KAVITA_API_KEY"])
|
||||||
updater = KavitaPersonUpdater(client)
|
updater = KavitaPersonUpdater(client)
|
||||||
|
report = updater.update_all_persons(trigger="cron")
|
||||||
mal = MALResolver()
|
print(report)
|
||||||
mal_id = mal.find_mal_id("よふかしのうた")
|
for err in report["errors"]:
|
||||||
print("MAL ID:", mal_id)
|
print(" ", err)
|
||||||
|
|
||||||
if mal_id:
|
|
||||||
result = updater.update_for_manga(mal_id)
|
|
||||||
print("Characters:", {k: v for k, v in result["characters"].items()
|
|
||||||
if k != "errors"})
|
|
||||||
print("Staff :", {k: v for k, v in result["staff"].items()
|
|
||||||
if k != "errors"})
|
|
||||||
# Surface any non-fatal upload / API errors for debugging
|
|
||||||
for section in ("characters", "staff"):
|
|
||||||
for err in result[section].get("errors", []):
|
|
||||||
print(f"[{section}] {err}")
|
|
||||||
|
|||||||
@@ -151,9 +151,10 @@ class MangaBakaWorksResolver:
|
|||||||
Returns volume-level works for a series, filtered to those that have
|
Returns volume-level works for a series, filtered to those that have
|
||||||
a usable cover image.
|
a usable cover image.
|
||||||
|
|
||||||
Non-empty results are cached per series; empty results are not, so
|
Results are cached per series — including empty results, so a series
|
||||||
works added on MangaBaka later become visible without restarting
|
without works is not re-paginated for every chapter of a move run.
|
||||||
the (long-running) process.
|
The periodic cover updater calls clear_cache() before each scan, so
|
||||||
|
works added on MangaBaka later are still picked up there.
|
||||||
"""
|
"""
|
||||||
if not series_id:
|
if not series_id:
|
||||||
return []
|
return []
|
||||||
@@ -165,8 +166,7 @@ class MangaBakaWorksResolver:
|
|||||||
|
|
||||||
# Discard works that carry no usable cover
|
# Discard works that carry no usable cover
|
||||||
works_with_cover = [w for w in all_works if w.get("images")]
|
works_with_cover = [w for w in all_works if w.get("images")]
|
||||||
if works_with_cover:
|
self._cache[series_id] = works_with_cover
|
||||||
self._cache[series_id] = works_with_cover
|
|
||||||
return works_with_cover
|
return works_with_cover
|
||||||
|
|
||||||
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
||||||
@@ -228,10 +228,10 @@ class MangaBakaWorksResolver:
|
|||||||
if url:
|
if url:
|
||||||
result[norm] = url
|
result[norm] = url
|
||||||
|
|
||||||
# Empty results are not cached — covers added on MangaBaka later
|
# Cache even an empty result so a series without volume images is not
|
||||||
# become visible without restarting the long-running process.
|
# re-paginated for every chapter. The periodic cover updater clears
|
||||||
if result:
|
# this cache before each scan, so newly added images are still found.
|
||||||
self._images_cache[series_id] = result
|
self._images_cache[series_id] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_cover_for_volume_from_images(self, series_id: str,
|
def get_cover_for_volume_from_images(self, series_id: str,
|
||||||
|
|||||||
@@ -0,0 +1,254 @@
|
|||||||
|
"""
|
||||||
|
perf_stats.py
|
||||||
|
=============
|
||||||
|
|
||||||
|
Generic run/step performance profiler with JSON persistence, shared by the
|
||||||
|
move pipeline and the periodic updaters (volume/cover, persons).
|
||||||
|
|
||||||
|
Each run is a tree of *items* (e.g. series -> chapter, or one person) and
|
||||||
|
every item carries named *step* timings. A run also carries free-form
|
||||||
|
``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
|
||||||
|
updater).
|
||||||
|
|
||||||
|
Data model (one entry per run, newest first)::
|
||||||
|
|
||||||
|
{
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"runId": "…",
|
||||||
|
"startedAt": 1700000000,
|
||||||
|
"finishedAt": 1700000123,
|
||||||
|
"totalSeconds": 123.4,
|
||||||
|
"meta": {"trigger": "cron"},
|
||||||
|
"itemCount": 2, # top-level items
|
||||||
|
"leafCount": 31, # items without children
|
||||||
|
"stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...},
|
||||||
|
"items": [
|
||||||
|
{"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
|
||||||
|
"steps": {"fetch_metadata": 1.2},
|
||||||
|
"items": [
|
||||||
|
{"label": "1", "totalSeconds": 11.5, "ok": true,
|
||||||
|
"steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
|
||||||
|
]}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
perf = PerfStats(path) # path=None -> disabled
|
||||||
|
run = perf.begin_run(meta={"trigger": "cron"})
|
||||||
|
item = run.begin_item("Call of the Night")
|
||||||
|
with item.measure("fetch_metadata"):
|
||||||
|
...
|
||||||
|
chap = item.begin_item("1")
|
||||||
|
with chap.measure("pack_cbz"):
|
||||||
|
...
|
||||||
|
chap.finish()
|
||||||
|
item.finish() # flushes the run to disk
|
||||||
|
run.finish()
|
||||||
|
|
||||||
|
When ``path`` is None every recorder is a no-op and nothing is written, so
|
||||||
|
the profiler can be left permanently wired in at negligible cost. The run
|
||||||
|
is flushed after every top-level item finishes, so a long run is observable
|
||||||
|
live and survives a crash mid-run.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# Keep the JSON small: only the most recent runs are retained on disk.
|
||||||
|
_MAX_RUNS = 30
|
||||||
|
|
||||||
|
|
||||||
|
class _StepTimer:
|
||||||
|
"""
|
||||||
|
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
|
||||||
|
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, enabled: bool = True):
|
||||||
|
self.steps: dict[str, float] = {}
|
||||||
|
self._enabled = enabled
|
||||||
|
self._t0 = time.monotonic()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def measure(self, name: str):
|
||||||
|
"""Context manager timing a named step (accumulates on repeat use)."""
|
||||||
|
if not self._enabled:
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
self.steps[name] = round(
|
||||||
|
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
|
||||||
|
|
||||||
|
def elapsed(self) -> float:
|
||||||
|
return round(time.monotonic() - self._t0, 4)
|
||||||
|
|
||||||
|
|
||||||
|
class ItemRecorder(_StepTimer):
|
||||||
|
"""
|
||||||
|
One node in a run's item tree. Has its own step timings and may contain
|
||||||
|
nested child items (e.g. a series item containing chapter items).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, run: "RunRecorder", label: str, *,
|
||||||
|
parent: "ItemRecorder | None" = None,
|
||||||
|
enabled: bool = True):
|
||||||
|
super().__init__(enabled)
|
||||||
|
self._run = run
|
||||||
|
self._label = label
|
||||||
|
self._parent = parent
|
||||||
|
self._children: list[dict] = []
|
||||||
|
|
||||||
|
def begin_item(self, label: str) -> "ItemRecorder":
|
||||||
|
return ItemRecorder(self._run, label, parent=self,
|
||||||
|
enabled=self._enabled)
|
||||||
|
|
||||||
|
def finish(self, *, ok: bool = True) -> None:
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
node = {
|
||||||
|
"label": self._label,
|
||||||
|
"totalSeconds": self.elapsed(),
|
||||||
|
"ok": ok,
|
||||||
|
"steps": self.steps,
|
||||||
|
"items": self._children,
|
||||||
|
}
|
||||||
|
if self._parent is not None:
|
||||||
|
self._parent._children.append(node)
|
||||||
|
else:
|
||||||
|
# Top-level item: attach to the run and persist progress.
|
||||||
|
self._run._items.append(node)
|
||||||
|
self._run.flush()
|
||||||
|
|
||||||
|
|
||||||
|
class RunRecorder:
|
||||||
|
"""Top-level recorder for one full run."""
|
||||||
|
|
||||||
|
def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
|
||||||
|
enabled: bool = True):
|
||||||
|
self._stats = stats
|
||||||
|
self._enabled = enabled
|
||||||
|
self._meta = meta or {}
|
||||||
|
self._items: list[dict] = []
|
||||||
|
self._started = time.time()
|
||||||
|
self._t0 = time.monotonic()
|
||||||
|
self._run_id = uuid.uuid4().hex
|
||||||
|
|
||||||
|
def begin_item(self, label: str) -> ItemRecorder:
|
||||||
|
return ItemRecorder(self, label, parent=None, enabled=self._enabled)
|
||||||
|
|
||||||
|
def _snapshot(self) -> dict:
|
||||||
|
step_totals: dict[str, float] = {}
|
||||||
|
leaf_count = 0
|
||||||
|
|
||||||
|
def walk(node: dict) -> None:
|
||||||
|
nonlocal leaf_count
|
||||||
|
for step, secs in node["steps"].items():
|
||||||
|
step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
|
||||||
|
if node["items"]:
|
||||||
|
for child in node["items"]:
|
||||||
|
walk(child)
|
||||||
|
else:
|
||||||
|
leaf_count += 1
|
||||||
|
|
||||||
|
for item in self._items:
|
||||||
|
walk(item)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"runId": self._run_id,
|
||||||
|
"startedAt": round(self._started),
|
||||||
|
"finishedAt": round(time.time()),
|
||||||
|
"totalSeconds": round(time.monotonic() - self._t0, 4),
|
||||||
|
"meta": self._meta,
|
||||||
|
"itemCount": len(self._items),
|
||||||
|
"leafCount": leaf_count,
|
||||||
|
"stepTotals": step_totals,
|
||||||
|
"items": self._items,
|
||||||
|
}
|
||||||
|
|
||||||
|
def flush(self) -> "dict | None":
|
||||||
|
"""Writes the run's current state to disk (upsert by runId)."""
|
||||||
|
if not self._enabled:
|
||||||
|
return None
|
||||||
|
run = self._snapshot()
|
||||||
|
self._stats._upsert_run(run)
|
||||||
|
return run
|
||||||
|
|
||||||
|
def finish(self) -> "dict | None":
|
||||||
|
"""Persists the final run state. Returns the run dict."""
|
||||||
|
return self.flush()
|
||||||
|
|
||||||
|
|
||||||
|
class PerfStats:
|
||||||
|
"""
|
||||||
|
Profiler facade + JSON persistence.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : Destination JSON file. None disables the profiler entirely
|
||||||
|
(every recorder becomes a no-op and nothing is written).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, path=None):
|
||||||
|
self._path = Path(path) if path else None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def enabled(self) -> bool:
|
||||||
|
return self._path is not None
|
||||||
|
|
||||||
|
def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
|
||||||
|
return RunRecorder(self, meta=meta, enabled=self.enabled)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Read / write
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def all(self) -> dict:
|
||||||
|
"""Returns the persisted runs ({"runs": [...]}); newest first."""
|
||||||
|
if not self._path or not self._path.is_file():
|
||||||
|
return {"runs": []}
|
||||||
|
try:
|
||||||
|
with self._path.open("r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return {"runs": []}
|
||||||
|
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
|
||||||
|
return {"runs": []}
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _upsert_run(self, run: dict) -> None:
|
||||||
|
"""
|
||||||
|
Inserts a new run (newest first) or replaces the existing entry with
|
||||||
|
the same runId — so incremental flushes during a run update one entry
|
||||||
|
rather than appending a duplicate after every item.
|
||||||
|
"""
|
||||||
|
if not self._path:
|
||||||
|
return
|
||||||
|
with self._lock:
|
||||||
|
runs = self.all()["runs"]
|
||||||
|
run_id = run.get("runId")
|
||||||
|
for i, existing in enumerate(runs):
|
||||||
|
if existing.get("runId") == run_id:
|
||||||
|
runs[i] = run
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
runs.insert(0, run) # newest first
|
||||||
|
del runs[_MAX_RUNS:] # cap history
|
||||||
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||||
|
with tmp.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
|
||||||
|
tmp.replace(self._path)
|
||||||
@@ -0,0 +1,160 @@
|
|||||||
|
"""
|
||||||
|
perf_web_page.py
|
||||||
|
================
|
||||||
|
|
||||||
|
Shared HTML page for browsing PerfStats output, used by both container web
|
||||||
|
UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads
|
||||||
|
``/api/perf/<name>`` and renders each run's step totals plus the nested item
|
||||||
|
tree (series -> chapter, or one person, …) and the run trigger from meta.
|
||||||
|
|
||||||
|
``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
|
||||||
|
available perf datasets in that container.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
_PERF_PAGE = """<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>__PERF_NAME__ performance</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
||||||
|
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
||||||
|
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
|
||||||
|
a { color:#60a5fa; text-decoration:none; }
|
||||||
|
a:hover { text-decoration:underline; }
|
||||||
|
.tabs { margin-bottom:1rem; }
|
||||||
|
.tabs a { margin-right:1rem; }
|
||||||
|
.tabs a.active { font-weight:bold; text-decoration:underline; }
|
||||||
|
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
|
||||||
|
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
|
||||||
|
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
|
||||||
|
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
|
||||||
|
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
|
||||||
|
th { background:#1d1d1d; }
|
||||||
|
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
|
||||||
|
.barcell { position:relative; }
|
||||||
|
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
|
||||||
|
.barcell span { position:relative; z-index:1; }
|
||||||
|
details { margin:.2rem 0 .2rem 1rem; }
|
||||||
|
summary { cursor:pointer; padding:.2rem 0; }
|
||||||
|
.chip { color:#9ca3af; font-size:.85rem; }
|
||||||
|
.err { color:#f87171; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">◂ back</a></h1>
|
||||||
|
<div class="tabs">__PERF_TABS__</div>
|
||||||
|
<div class="bar">
|
||||||
|
<label>Run: <select id="runSelect"></select></label>
|
||||||
|
<button id="reload">Reload</button>
|
||||||
|
<span class="summary" id="summary"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="content"></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const PERF_NAME = "__PERF_NAME__";
|
||||||
|
let runs = [];
|
||||||
|
|
||||||
|
for (const a of document.querySelectorAll(".tabs a")) {
|
||||||
|
if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
|
||||||
|
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
|
||||||
|
function esc(s) {
|
||||||
|
return String(s).replace(/[&<>]/g, c => ({"&":"&","<":"<",">":">"}[c]));
|
||||||
|
}
|
||||||
|
|
||||||
|
function stepTable(totals, grandTotal) {
|
||||||
|
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
|
||||||
|
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
|
||||||
|
const max = entries[0][1] || 1;
|
||||||
|
let rows = "";
|
||||||
|
for (const [name, secs] of entries) {
|
||||||
|
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
|
||||||
|
const w = (secs / max * 100);
|
||||||
|
rows += "<tr><td>" + esc(name) + "</td>"
|
||||||
|
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
|
||||||
|
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
|
||||||
|
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
|
||||||
|
+ "<span> </span></td></tr>";
|
||||||
|
}
|
||||||
|
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
|
||||||
|
+ "<th class=num>% of run</th><th> </th></tr></thead><tbody>"
|
||||||
|
+ rows + "</tbody></table>";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Renders one item node (and its children) as a nested <details> block.
|
||||||
|
function itemNode(it) {
|
||||||
|
const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
|
||||||
|
.map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "—";
|
||||||
|
const head = "<summary><b>" + esc(it.label) + "</b>"
|
||||||
|
+ (it.ok === false ? " <span class=err>(failed)</span>" : "")
|
||||||
|
+ " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
|
||||||
|
const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
|
||||||
|
const body = kids.map(itemNode).join("");
|
||||||
|
return "<details>" + head + body + "</details>";
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderRun(run) {
|
||||||
|
const c = document.getElementById("content");
|
||||||
|
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
|
||||||
|
const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
|
||||||
|
document.getElementById("summary").textContent =
|
||||||
|
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
|
||||||
|
+ run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
|
||||||
|
|
||||||
|
let html = "<h2>Steps (summed over all items)</h2>"
|
||||||
|
+ stepTable(run.stepTotals, run.totalSeconds)
|
||||||
|
+ "<h2>Detail</h2>";
|
||||||
|
const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
|
||||||
|
html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
|
||||||
|
c.innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderSelect() {
|
||||||
|
const sel = document.getElementById("runSelect");
|
||||||
|
sel.innerHTML = "";
|
||||||
|
runs.forEach((r, i) => {
|
||||||
|
const o = document.createElement("option");
|
||||||
|
o.value = i;
|
||||||
|
const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
|
||||||
|
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")" + trig;
|
||||||
|
sel.appendChild(o);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
const r = await fetch("/api/perf/" + PERF_NAME);
|
||||||
|
const data = await r.json();
|
||||||
|
runs = data.runs || [];
|
||||||
|
renderSelect();
|
||||||
|
renderRun(runs[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("runSelect").addEventListener("change", e => {
|
||||||
|
renderRun(runs[e.target.value]);
|
||||||
|
});
|
||||||
|
document.getElementById("reload").addEventListener("click", load);
|
||||||
|
load();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
|
||||||
|
"""
|
||||||
|
Returns the perf page HTML for dataset ``name``.
|
||||||
|
|
||||||
|
tabs : list of (label, dataset_name) for the cross-link bar.
|
||||||
|
"""
|
||||||
|
tab_html = " ".join(
|
||||||
|
f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
|
||||||
|
return (_PERF_PAGE
|
||||||
|
.replace("__PERF_TABS__", tab_html)
|
||||||
|
.replace("__PERF_NAME__", name))
|
||||||
@@ -70,3 +70,30 @@ def person_name_with_id(name: str, *,
|
|||||||
if al_id:
|
if al_id:
|
||||||
return f"{name} (AL {al_id})"
|
return f"{name} (AL {al_id})"
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
# Matches the suffix produced by person_name_with_id at the end of a name.
|
||||||
|
_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
|
||||||
|
"""
|
||||||
|
Inverse of person_name_with_id: extracts the tracker id from a
|
||||||
|
disambiguated Kavita person name.
|
||||||
|
|
||||||
|
"Rem (MAL 118737)" -> ("mal", 118737)
|
||||||
|
"Subaru (AL 88311)" -> ("al", 88311)
|
||||||
|
"Kotoyama" -> None (no id suffix — e.g. an author/staff record)
|
||||||
|
|
||||||
|
Returns ("mal" | "al", id) or None.
|
||||||
|
"""
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
m = _TRACKER_ID_RE.search(name)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
source = "mal" if m.group(1).upper() == "MAL" else "al"
|
||||||
|
try:
|
||||||
|
return source, int(m.group(2))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|||||||
@@ -192,14 +192,9 @@ class LightNovelOrchestrator:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return {"ok": False, "error": f"series update failed: {exc}"}
|
return {"ok": False, "error": f"series update failed: {exc}"}
|
||||||
|
|
||||||
# Persons
|
# Person sync no longer runs per series — it has its own global,
|
||||||
try:
|
# id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
|
||||||
person_report = self._person_updater.update_for_manga(
|
# on a separate cron schedule.
|
||||||
built.get("malId"),
|
|
||||||
al_manga_id=built.get("anilistId"),
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
person_report = {"error": str(exc)}
|
|
||||||
|
|
||||||
# Relationships + collection
|
# Relationships + collection
|
||||||
try:
|
try:
|
||||||
@@ -221,10 +216,20 @@ class LightNovelOrchestrator:
|
|||||||
"title": cached_title,
|
"title": cached_title,
|
||||||
"mangabakaId": built.get("mangabakaId"),
|
"mangabakaId": built.get("mangabakaId"),
|
||||||
"series": series_report,
|
"series": series_report,
|
||||||
"persons": person_report,
|
|
||||||
"relationships": relation_report,
|
"relationships": relation_report,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Person sync (global, id-based — independent of series updates)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
|
||||||
|
"""
|
||||||
|
Runs the global, id-based person updater over every Kavita person.
|
||||||
|
Covers both manga and light-novel libraries in one pass.
|
||||||
|
"""
|
||||||
|
return self._person_updater.update_all_persons(
|
||||||
|
trigger=trigger, perf=perf)
|
||||||
|
|
||||||
def update_all(self, library_ids: "list[int] | None") -> dict:
|
def update_all(self, library_ids: "list[int] | None") -> dict:
|
||||||
"""Updates every cached series in the given libraries."""
|
"""Updates every cached series in the given libraries."""
|
||||||
if library_ids is None:
|
if library_ids is None:
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ from flask import Flask, jsonify, request, Response
|
|||||||
|
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
from LightNovelMetadataBuilder import pick_thumbnail_url
|
from LightNovelMetadataBuilder import pick_thumbnail_url
|
||||||
|
from PerfWebPage import render_perf_page
|
||||||
|
|
||||||
|
# Only the person dataset exists in the LN container.
|
||||||
|
_PERF_TABS = [("persons", "person")]
|
||||||
|
|
||||||
|
|
||||||
def _int_list(values) -> list[int]:
|
def _int_list(values) -> list[int]:
|
||||||
@@ -97,7 +101,9 @@ _INDEX_HTML = r"""<!doctype html>
|
|||||||
<button id="reload">Reload</button>
|
<button id="reload">Reload</button>
|
||||||
<button id="build">Match all in libraries</button>
|
<button id="build">Match all in libraries</button>
|
||||||
<button id="updateAll" class="success">Update all in libraries</button>
|
<button id="updateAll" class="success">Update all in libraries</button>
|
||||||
|
<button id="syncPersons">Sync persons</button>
|
||||||
<button id="batchSave" class="primary">Save dirty (0)</button>
|
<button id="batchSave" class="primary">Save dirty (0)</button>
|
||||||
|
<a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
|
||||||
<span class="status" id="status"></span>
|
<span class="status" id="status"></span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -497,12 +503,25 @@ async function startUpdateAll() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function startSyncPersons() {
|
||||||
|
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
|
||||||
|
setStatus("Person sync started");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/persons/sync", { method: "POST" });
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
startPolling();
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Person sync failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
document.getElementById("filter").addEventListener("input", applyFilter);
|
document.getElementById("filter").addEventListener("input", applyFilter);
|
||||||
document.getElementById("libraries").addEventListener("change", applyFilter);
|
document.getElementById("libraries").addEventListener("change", applyFilter);
|
||||||
document.getElementById("reload").addEventListener("click", load);
|
document.getElementById("reload").addEventListener("click", load);
|
||||||
document.getElementById("batchSave").addEventListener("click", batchSave);
|
document.getElementById("batchSave").addEventListener("click", batchSave);
|
||||||
document.getElementById("build").addEventListener("click", startBuild);
|
document.getElementById("build").addEventListener("click", startBuild);
|
||||||
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
|
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
|
||||||
|
document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
|
||||||
for (const th of document.querySelectorAll("th.sortable")) {
|
for (const th of document.querySelectorAll("th.sortable")) {
|
||||||
th.addEventListener("click", () => {
|
th.addEventListener("click", () => {
|
||||||
const col = th.dataset.col;
|
const col = th.dataset.col;
|
||||||
@@ -581,11 +600,13 @@ class MatchesWebApp:
|
|||||||
def __init__(self, cache: MatchesCache, *,
|
def __init__(self, cache: MatchesCache, *,
|
||||||
orchestrator=None,
|
orchestrator=None,
|
||||||
default_library_ids: "list[int] | None" = None,
|
default_library_ids: "list[int] | None" = None,
|
||||||
|
person_perf=None,
|
||||||
host: str = "0.0.0.0",
|
host: str = "0.0.0.0",
|
||||||
port: int = 8080):
|
port: int = 8080):
|
||||||
self._cache = cache
|
self._cache = cache
|
||||||
self._orchestrator = orchestrator
|
self._orchestrator = orchestrator
|
||||||
self._defaults = list(default_library_ids or [])
|
self._defaults = list(default_library_ids or [])
|
||||||
|
self._person_perf = person_perf
|
||||||
self._host = host
|
self._host = host
|
||||||
self._port = port
|
self._port = port
|
||||||
self._job = _JobState()
|
self._job = _JobState()
|
||||||
@@ -757,8 +778,38 @@ class MatchesWebApp:
|
|||||||
return Response("a job is already running", status=409)
|
return Response("a job is already running", status=409)
|
||||||
return jsonify({"started": label})
|
return jsonify({"started": label})
|
||||||
|
|
||||||
|
@app.post("/api/persons/sync")
|
||||||
|
def api_persons_sync():
|
||||||
|
if self._orchestrator is None:
|
||||||
|
return Response("no orchestrator configured", status=503)
|
||||||
|
|
||||||
|
def task(job: _JobState):
|
||||||
|
report = self._orchestrator.sync_persons(
|
||||||
|
trigger="ln", perf=self._person_perf)
|
||||||
|
job.append(f"updated={report['updated']} "
|
||||||
|
f"skipped={report['skipped']} "
|
||||||
|
f"not_found={report['not_found']} "
|
||||||
|
f"conflicts={report['conflicts']}")
|
||||||
|
for err in report.get("errors", []):
|
||||||
|
job.append(f" {err}")
|
||||||
|
|
||||||
|
if not self._job.start("person sync", task):
|
||||||
|
return Response("a job is already running", status=409)
|
||||||
|
return jsonify({"started": "person sync"})
|
||||||
|
|
||||||
@app.get("/api/status")
|
@app.get("/api/status")
|
||||||
def api_status():
|
def api_status():
|
||||||
snap = self._job.snapshot()
|
snap = self._job.snapshot()
|
||||||
snap["defaults"] = self._defaults
|
snap["defaults"] = self._defaults
|
||||||
return jsonify(snap)
|
return jsonify(snap)
|
||||||
|
|
||||||
|
@app.get("/perf")
|
||||||
|
@app.get("/perf/<name>")
|
||||||
|
def perf_page(name: str = "person") -> Response:
|
||||||
|
return Response(render_perf_page(name, _PERF_TABS),
|
||||||
|
mimetype="text/html; charset=utf-8")
|
||||||
|
|
||||||
|
@app.get("/api/perf/<name>")
|
||||||
|
def api_perf(name: str):
|
||||||
|
stats = self._person_perf if name == "person" else None
|
||||||
|
return jsonify(stats.all() if stats is not None else {"runs": []})
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ from __future__ import annotations
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -65,6 +66,16 @@ except ImportError:
|
|||||||
_HAS_PIL = False
|
_HAS_PIL = False
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _no_measure():
|
||||||
|
"""No-op stand-in for a perf recorder's measure() context manager."""
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
# Sentinel marking a per-chapter memo slot as "not computed yet".
|
||||||
|
_UNSET = object()
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Constants
|
# Constants
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
@@ -218,10 +229,22 @@ class ComicInfoBuilder:
|
|||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
self._cover_cache = cover_cache or _default_cover_cache()
|
self._cover_cache = cover_cache or _default_cover_cache()
|
||||||
|
|
||||||
|
# Optional performance recorder (duck-typed: any object with a
|
||||||
|
# .measure(name) context manager). The mover sets this per chapter;
|
||||||
|
# when None, _measure() is a no-op so the builder stays decoupled
|
||||||
|
# from PerfStats and works standalone (e.g. the cover updater).
|
||||||
|
self.perf = None
|
||||||
|
|
||||||
self._metadata: "dict | None" = None
|
self._metadata: "dict | None" = None
|
||||||
self._pages: list[dict] = []
|
self._pages: list[dict] = []
|
||||||
self._cover_path: "Path | None" = None
|
self._cover_path: "Path | None" = None
|
||||||
self._suwayomi_data: dict = {}
|
self._suwayomi_data: dict = {}
|
||||||
|
# Per-chapter memo for _determine_volume (resolved up to 3x/chapter
|
||||||
|
# otherwise: cover download, explicit volume step, XML build).
|
||||||
|
self._volume_memo = _UNSET
|
||||||
|
# Per-series cache for full series fetches by id (parent series for
|
||||||
|
# SeriesGroup, merged-series redirects) — reused across all chapters.
|
||||||
|
self._series_by_id_cache: dict[str, dict] = {}
|
||||||
|
|
||||||
# ----- Repr -----------------------------------------------------------
|
# ----- Repr -----------------------------------------------------------
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
@@ -261,6 +284,13 @@ class ComicInfoBuilder:
|
|||||||
self._pages = []
|
self._pages = []
|
||||||
self._cover_path = None
|
self._cover_path = None
|
||||||
self._suwayomi_data = {}
|
self._suwayomi_data = {}
|
||||||
|
self._volume_memo = _UNSET
|
||||||
|
|
||||||
|
def _measure(self, name: str):
|
||||||
|
"""Times a named step on the attached recorder; no-op when unset."""
|
||||||
|
if self.perf is not None:
|
||||||
|
return self.perf.measure(name)
|
||||||
|
return _no_measure()
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
# Public XML functions
|
# Public XML functions
|
||||||
@@ -305,11 +335,13 @@ class ComicInfoBuilder:
|
|||||||
if not folder.is_dir():
|
if not folder.is_dir():
|
||||||
raise NotADirectoryError(f"Folder not found: {folder}")
|
raise NotADirectoryError(f"Folder not found: {folder}")
|
||||||
|
|
||||||
self._suwayomi_data = self._read_existing_comicinfo(folder)
|
with self._measure("read_comicinfo"):
|
||||||
|
self._suwayomi_data = self._read_existing_comicinfo(folder)
|
||||||
|
|
||||||
self._cover_path = None
|
self._cover_path = None
|
||||||
if download_cover:
|
if download_cover:
|
||||||
self._cover_path = self._download_cover(folder, cover_filename)
|
with self._measure("cover"):
|
||||||
|
self._cover_path = self._download_cover(folder, cover_filename)
|
||||||
|
|
||||||
cover_resolved = self._cover_path.resolve() if self._cover_path else None
|
cover_resolved = self._cover_path.resolve() if self._cover_path else None
|
||||||
story_images: list[Path] = []
|
story_images: list[Path] = []
|
||||||
@@ -329,20 +361,23 @@ class ComicInfoBuilder:
|
|||||||
ordered.extend((img, "Story") for img in story_images)
|
ordered.extend((img, "Story") for img in story_images)
|
||||||
|
|
||||||
self._pages = []
|
self._pages = []
|
||||||
for index, (img_path, page_type) in enumerate(ordered):
|
# Probing every page for its pixel dimensions reads each file — on a
|
||||||
width, height = self._image_dimensions(img_path)
|
# network share this is often the dominant per-chapter cost.
|
||||||
try:
|
with self._measure("image_dimensions"):
|
||||||
size = img_path.stat().st_size
|
for index, (img_path, page_type) in enumerate(ordered):
|
||||||
except OSError:
|
width, height = self._image_dimensions(img_path)
|
||||||
size = None
|
try:
|
||||||
self._pages.append({
|
size = img_path.stat().st_size
|
||||||
"image": index,
|
except OSError:
|
||||||
"type": page_type,
|
size = None
|
||||||
"width": width,
|
self._pages.append({
|
||||||
"height": height,
|
"image": index,
|
||||||
"size": size,
|
"type": page_type,
|
||||||
"double": bool(width and height and width > height),
|
"width": width,
|
||||||
})
|
"height": height,
|
||||||
|
"size": size,
|
||||||
|
"double": bool(width and height and width > height),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"page_count": len(self._pages),
|
"page_count": len(self._pages),
|
||||||
@@ -413,12 +448,20 @@ class ComicInfoBuilder:
|
|||||||
return series
|
return series
|
||||||
|
|
||||||
def _fetch_series_by_id(self, series_id) -> dict:
|
def _fetch_series_by_id(self, series_id) -> dict:
|
||||||
|
# Cached per builder (i.e. per series): SeriesGroup resolution calls
|
||||||
|
# this for the parent on every chapter — without the cache that is
|
||||||
|
# one MangaBaka request per chapter for the same parent id.
|
||||||
|
key = str(series_id)
|
||||||
|
cached = self._series_by_id_cache.get(key)
|
||||||
|
if cached is not None:
|
||||||
|
return cached
|
||||||
url = f"{self.api_base_url}/series/{series_id}"
|
url = f"{self.api_base_url}/series/{series_id}"
|
||||||
resp = self._session.get(url, timeout=self.request_timeout)
|
resp = self._session.get(url, timeout=self.request_timeout)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json().get("data")
|
data = resp.json().get("data")
|
||||||
if not data:
|
if not data:
|
||||||
raise RuntimeError(f"Series with ID {series_id} not found.")
|
raise RuntimeError(f"Series with ID {series_id} not found.")
|
||||||
|
self._series_by_id_cache[key] = data
|
||||||
return data
|
return data
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
@@ -554,6 +597,18 @@ class ComicInfoBuilder:
|
|||||||
# Volume determination
|
# Volume determination
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
def _determine_volume(self) -> "str | None":
|
def _determine_volume(self) -> "str | None":
|
||||||
|
"""
|
||||||
|
Resolves the volume for the current chapter, memoized per chapter.
|
||||||
|
|
||||||
|
The result is reused across the three call sites per chapter (cover
|
||||||
|
download, explicit volume step, XML build); the memo is cleared
|
||||||
|
whenever the chapter or manga title changes (see _clear_results).
|
||||||
|
"""
|
||||||
|
if self._volume_memo is _UNSET:
|
||||||
|
self._volume_memo = self._resolve_volume()
|
||||||
|
return self._volume_memo
|
||||||
|
|
||||||
|
def _resolve_volume(self) -> "str | None":
|
||||||
"""
|
"""
|
||||||
Resolves the volume for the current chapter via MangaDex.
|
Resolves the volume for the current chapter via MangaDex.
|
||||||
Falls back to estimation when the chapter is absent from MangaDex.
|
Falls back to estimation when the chapter is absent from MangaDex.
|
||||||
|
|||||||
@@ -45,7 +45,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import io
|
import io
|
||||||
import sys
|
import sys
|
||||||
import threading
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import zipfile
|
import zipfile
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -67,7 +66,7 @@ from MatchesCache import MatchesCache
|
|||||||
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
|
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
|
||||||
_sanitize_dirname, _normalise_volume_value)
|
_sanitize_dirname, _normalise_volume_value)
|
||||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
from CronSchedule import CronSchedule
|
from PerfStats import PerfStats
|
||||||
from CoverCache import CoverCache, _IMAGE_EXTS
|
from CoverCache import CoverCache, _IMAGE_EXTS
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -136,12 +135,12 @@ class KavitaVolumeCoverUpdater:
|
|||||||
request_timeout : HTTP timeout in seconds.
|
request_timeout : HTTP timeout in seconds.
|
||||||
log_path : File that receives one line per updated chapter.
|
log_path : File that receives one line per updated chapter.
|
||||||
Default: <kavita_path>/volume_updater.log
|
Default: <kavita_path>/volume_updater.log
|
||||||
schedule : Cron expression (5 fields) defining when scans run,
|
|
||||||
e.g. "0 19 * * 1,4" = 19:00 every Monday and
|
|
||||||
Thursday. Evaluated in local time — set the TZ env
|
|
||||||
var inside Docker. Default: "0 19 * * 1,4".
|
|
||||||
cover_cache_dir : Directory for the persistent cover cache. None ->
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
temporary cache, deleted at process exit.
|
temporary cache, deleted at process exit.
|
||||||
|
perf_stats : Optional PerfStats instance for per-step timing.
|
||||||
|
|
||||||
|
Scheduling lives outside this class (see CronRunner); call update_all()
|
||||||
|
on whatever cadence you like.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -152,8 +151,8 @@ class KavitaVolumeCoverUpdater:
|
|||||||
request_timeout: int = 30,
|
request_timeout: int = 30,
|
||||||
api_base_url: str = "https://api.mangabaka.dev/v1",
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
log_path=None,
|
log_path=None,
|
||||||
schedule: str = "0 19 * * 1,4",
|
cover_cache_dir=None,
|
||||||
cover_cache_dir=None):
|
perf_stats: "PerfStats | None" = None):
|
||||||
self._dst = Path(kavita_path)
|
self._dst = Path(kavita_path)
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
self._language = language
|
self._language = language
|
||||||
@@ -161,7 +160,7 @@ class KavitaVolumeCoverUpdater:
|
|||||||
self._api_base_url = api_base_url.rstrip("/")
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
self._log_path = (Path(log_path) if log_path
|
self._log_path = (Path(log_path) if log_path
|
||||||
else self._dst / "volume_updater.log")
|
else self._dst / "volume_updater.log")
|
||||||
self._cron = CronSchedule(schedule)
|
self._perf = perf_stats or PerfStats(None)
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
|
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
|
||||||
@@ -178,51 +177,6 @@ class KavitaVolumeCoverUpdater:
|
|||||||
self._cover_cache = CoverCache(
|
self._cover_cache = CoverCache(
|
||||||
cover_cache_dir, session=session, request_timeout=request_timeout)
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
self._stop = threading.Event()
|
|
||||||
self._thread: "threading.Thread | None" = None
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Cron API (mirrors SuwayomiFolderWatcher)
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def start(self) -> None:
|
|
||||||
"""Starts the periodic scan thread. Non-blocking."""
|
|
||||||
if self._thread is not None and self._thread.is_alive():
|
|
||||||
return
|
|
||||||
self._stop.clear()
|
|
||||||
self._thread = threading.Thread(
|
|
||||||
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
|
|
||||||
self._thread.start()
|
|
||||||
print(f"[{_now()}] [updater] scanning {self._dst} "
|
|
||||||
f"on cron '{self._cron.expression}'", flush=True)
|
|
||||||
|
|
||||||
def stop(self) -> None:
|
|
||||||
"""Stops the scan thread (current scan finishes its series first)."""
|
|
||||||
self._stop.set()
|
|
||||||
if self._thread is not None:
|
|
||||||
self._thread.join(timeout=10)
|
|
||||||
|
|
||||||
def wait(self) -> None:
|
|
||||||
"""Blocks the calling thread until stop() is invoked."""
|
|
||||||
self._stop.wait()
|
|
||||||
|
|
||||||
def _loop(self) -> None:
|
|
||||||
while not self._stop.is_set():
|
|
||||||
next_run = self._cron.next_after(datetime.now())
|
|
||||||
wait = max(0.0, (next_run - datetime.now()).total_seconds())
|
|
||||||
print(f"[{_now()}] [updater] next scheduled scan: "
|
|
||||||
f"{next_run.isoformat(timespec='minutes')}", flush=True)
|
|
||||||
if self._stop.wait(wait):
|
|
||||||
break
|
|
||||||
|
|
||||||
try:
|
|
||||||
summary = self.update_all()
|
|
||||||
print(f"[{_now()}] [updater] scan done: "
|
|
||||||
f"{summary['series_updated']} series / "
|
|
||||||
f"{summary['chapters_updated']} chapters updated",
|
|
||||||
flush=True)
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public scan API
|
# Public scan API
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -243,23 +197,25 @@ class KavitaVolumeCoverUpdater:
|
|||||||
self._vol_resolver.clear_cache()
|
self._vol_resolver.clear_cache()
|
||||||
self._works_resolver.clear_cache()
|
self._works_resolver.clear_cache()
|
||||||
|
|
||||||
for series_dir in sorted(self._dst.iterdir()):
|
run = self._perf.begin_run()
|
||||||
if self._stop.is_set():
|
try:
|
||||||
break
|
for series_dir in sorted(self._dst.iterdir()):
|
||||||
if not series_dir.is_dir():
|
if not series_dir.is_dir():
|
||||||
continue
|
continue
|
||||||
summary["series_scanned"] += 1
|
summary["series_scanned"] += 1
|
||||||
try:
|
try:
|
||||||
updated = self.update_series(series_dir)
|
updated = self.update_series(series_dir, run)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
|
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
|
||||||
continue
|
continue
|
||||||
if updated:
|
if updated:
|
||||||
summary["series_updated"] += 1
|
summary["series_updated"] += 1
|
||||||
summary["chapters_updated"] += updated
|
summary["chapters_updated"] += updated
|
||||||
|
finally:
|
||||||
|
run.finish()
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
def update_series(self, series_dir: Path) -> int:
|
def update_series(self, series_dir: Path, run=None) -> int:
|
||||||
"""
|
"""
|
||||||
Updates one series folder. Returns the number of updated chapters.
|
Updates one series folder. Returns the number of updated chapters.
|
||||||
|
|
||||||
@@ -300,20 +256,24 @@ class KavitaVolumeCoverUpdater:
|
|||||||
md = builder.fetch_metadata()
|
md = builder.fetch_metadata()
|
||||||
series_id = str(md.get("id") or "")
|
series_id = str(md.get("id") or "")
|
||||||
|
|
||||||
|
series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
|
||||||
|
|
||||||
# Resolve volumes for all null-volume chapters first (API only).
|
# Resolve volumes for all null-volume chapters first (API only).
|
||||||
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
|
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
|
||||||
for num in sorted(missing, key=_chapter_sort_value):
|
with series_rec.measure("resolve_volumes"):
|
||||||
builder.chapter = num
|
for num in sorted(missing, key=_chapter_sort_value):
|
||||||
try:
|
builder.chapter = num
|
||||||
volume = builder._determine_volume()
|
try:
|
||||||
except Exception:
|
volume = builder._determine_volume()
|
||||||
volume = None
|
except Exception:
|
||||||
if not volume:
|
volume = None
|
||||||
continue
|
if not volume:
|
||||||
updates[num] = {"volume": volume,
|
continue
|
||||||
"cover": self._fetch_cover(series_id, volume)}
|
updates[num] = {"volume": volume,
|
||||||
|
"cover": self._fetch_cover(series_id, volume)}
|
||||||
|
|
||||||
if not updates:
|
if not updates:
|
||||||
|
series_rec.finish(ok=True)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
first = min(chapters, key=_chapter_sort_value)
|
first = min(chapters, key=_chapter_sort_value)
|
||||||
@@ -328,10 +288,13 @@ class KavitaVolumeCoverUpdater:
|
|||||||
continue
|
continue
|
||||||
# The first chapter gets a full metadata rebuild (Kavita reads
|
# The first chapter gets a full metadata rebuild (Kavita reads
|
||||||
# series metadata from it); other chapters only a volume edit.
|
# series metadata from it); other chapters only a volume edit.
|
||||||
ok, cover_swapped = self._apply_update(
|
chap_rec = series_rec.begin_item(num)
|
||||||
cbz, builder, num,
|
with chap_rec.measure("archive_rewrite"):
|
||||||
volume=up["volume"], cover=up["cover"],
|
ok, cover_swapped = self._apply_update(
|
||||||
full_rebuild=(num == first))
|
cbz, builder, num,
|
||||||
|
volume=up["volume"], cover=up["cover"],
|
||||||
|
full_rebuild=(num == first))
|
||||||
|
chap_rec.finish(ok=ok)
|
||||||
if not ok:
|
if not ok:
|
||||||
continue
|
continue
|
||||||
entry["volume"] = _normalise_volume_value(up["volume"])
|
entry["volume"] = _normalise_volume_value(up["volume"])
|
||||||
@@ -346,15 +309,19 @@ class KavitaVolumeCoverUpdater:
|
|||||||
first_entry = chapters.get(first) or {}
|
first_entry = chapters.get(first) or {}
|
||||||
cbz = series_dir / (first_entry.get("archiveName") or "")
|
cbz = series_dir / (first_entry.get("archiveName") or "")
|
||||||
if first_entry.get("archiveName") and cbz.is_file():
|
if first_entry.get("archiveName") and cbz.is_file():
|
||||||
ok, _ = self._apply_update(
|
chap_rec = series_rec.begin_item(f"{first} (refresh)")
|
||||||
cbz, builder, first,
|
with chap_rec.measure("archive_rewrite"):
|
||||||
volume=None, cover=None, full_rebuild=True)
|
ok, _ = self._apply_update(
|
||||||
|
cbz, builder, first,
|
||||||
|
volume=None, cover=None, full_rebuild=True)
|
||||||
|
chap_rec.finish(ok=ok)
|
||||||
if ok:
|
if ok:
|
||||||
self._log(f"{series_dir.name} | chapter {first} | "
|
self._log(f"{series_dir.name} | chapter {first} | "
|
||||||
f"first-chapter metadata refreshed | {cbz.name}")
|
f"first-chapter metadata refreshed | {cbz.name}")
|
||||||
|
|
||||||
if updated:
|
if updated:
|
||||||
_save_chapter_index(series_dir, index)
|
_save_chapter_index(series_dir, index)
|
||||||
|
series_rec.finish(ok=True)
|
||||||
return updated
|
return updated
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -545,10 +512,7 @@ if __name__ == "__main__":
|
|||||||
matches_cache=MatchesCache(MATCHES_PATH),
|
matches_cache=MatchesCache(MATCHES_PATH),
|
||||||
)
|
)
|
||||||
|
|
||||||
# One-shot scan (no cron thread):
|
# One-shot scan. Scheduling is handled externally via CronRunner
|
||||||
|
# (see main_manga.py).
|
||||||
summary = updater.update_all()
|
summary = updater.update_all()
|
||||||
print(f"\n[updater] {summary}")
|
print(f"\n[updater] {summary}")
|
||||||
|
|
||||||
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
|
|
||||||
# updater.start()
|
|
||||||
# updater.wait()
|
|
||||||
|
|||||||
@@ -93,6 +93,9 @@ class MangaDexVolumeResolver:
|
|||||||
self._cache: dict[str, dict] = {}
|
self._cache: dict[str, dict] = {}
|
||||||
# Cache: manga_id -> {relation_type: [title, ...]}
|
# Cache: manga_id -> {relation_type: [title, ...]}
|
||||||
self._relations_cache: dict[str, dict] = {}
|
self._relations_cache: dict[str, dict] = {}
|
||||||
|
# Cache: title_lower -> manga_id (or None) — avoids repeating the
|
||||||
|
# MangaDex search for every chapter of the same series.
|
||||||
|
self._id_cache: dict[str, "str | None"] = {}
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
# Locate the manga ID
|
# Locate the manga ID
|
||||||
@@ -105,15 +108,25 @@ class MangaDexVolumeResolver:
|
|||||||
if not title or not title.strip():
|
if not title or not title.strip():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
resp = self._session.get(
|
key = title.strip().lower()
|
||||||
f"{self.base_url}/manga",
|
if key in self._id_cache:
|
||||||
params={"title": title, "limit": 5,
|
return self._id_cache[key]
|
||||||
"contentRating[]": ["safe", "suggestive",
|
|
||||||
"erotica", "pornographic"]},
|
try:
|
||||||
timeout=self.request_timeout)
|
resp = self._session.get(
|
||||||
resp.raise_for_status()
|
f"{self.base_url}/manga",
|
||||||
results = resp.json().get("data") or []
|
params={"title": title, "limit": 5,
|
||||||
|
"contentRating[]": ["safe", "suggestive",
|
||||||
|
"erotica", "pornographic"]},
|
||||||
|
timeout=self.request_timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
results = resp.json().get("data") or []
|
||||||
|
except requests.RequestException:
|
||||||
|
# Don't cache transient failures — allow a retry next time.
|
||||||
|
return None
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
|
self._id_cache[key] = None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def score(entry) -> float:
|
def score(entry) -> float:
|
||||||
@@ -130,7 +143,9 @@ class MangaDexVolumeResolver:
|
|||||||
return best
|
return best
|
||||||
|
|
||||||
results.sort(key=score, reverse=True)
|
results.sort(key=score, reverse=True)
|
||||||
return results[0].get("id")
|
manga_id = results[0].get("id")
|
||||||
|
self._id_cache[key] = manga_id
|
||||||
|
return manga_id
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
# Main function: retrieve and return volume / chapter data
|
# Main function: retrieve and return volume / chapter data
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
|
|||||||
|
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
from ComicInfoBuilder import _pick_thumbnail_url
|
from ComicInfoBuilder import _pick_thumbnail_url
|
||||||
|
from PerfWebPage import render_perf_page
|
||||||
|
|
||||||
|
# Cross-link tabs shown on every perf page in the manga container.
|
||||||
|
_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
|
||||||
|
|
||||||
|
|
||||||
_INDEX_HTML = """<!doctype html>
|
_INDEX_HTML = """<!doctype html>
|
||||||
@@ -71,6 +75,8 @@ _INDEX_HTML = """<!doctype html>
|
|||||||
<button id="batchSave" class="primary">Save dirty (0)</button>
|
<button id="batchSave" class="primary">Save dirty (0)</button>
|
||||||
<button id="build">Build all (rescan)</button>
|
<button id="build">Build all (rescan)</button>
|
||||||
<button id="move">Start move</button>
|
<button id="move">Start move</button>
|
||||||
|
<button id="syncPersons">Sync persons</button>
|
||||||
|
<a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
|
||||||
<span class="status" id="status"></span>
|
<span class="status" id="status"></span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
|
|||||||
btn.disabled = false;
|
btn.disabled = false;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
document.getElementById("syncPersons").addEventListener("click", async () => {
|
||||||
|
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
|
||||||
|
const btn = document.getElementById("syncPersons");
|
||||||
|
btn.disabled = true;
|
||||||
|
setStatus("Syncing persons… (running on the server)");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/persons/sync", { method: "POST" });
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
const d = await r.json();
|
||||||
|
setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
|
||||||
|
+ d.not_found + " not found, " + d.conflicts + " conflicts");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Person sync failed: " + err.message);
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
for (const th of document.querySelectorAll("th.sortable")) {
|
for (const th of document.querySelectorAll("th.sortable")) {
|
||||||
th.addEventListener("click", () => {
|
th.addEventListener("click", () => {
|
||||||
const col = th.dataset.col;
|
const col = th.dataset.col;
|
||||||
@@ -357,6 +380,8 @@ load();
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MatchesWebApp:
|
class MatchesWebApp:
|
||||||
"""
|
"""
|
||||||
Flask app exposing the MatchesCache. `mover` is required when you want
|
Flask app exposing the MatchesCache. `mover` is required when you want
|
||||||
@@ -367,14 +392,22 @@ class MatchesWebApp:
|
|||||||
|
|
||||||
def __init__(self, cache: MatchesCache, *,
|
def __init__(self, cache: MatchesCache, *,
|
||||||
mover=None,
|
mover=None,
|
||||||
|
person_updater=None,
|
||||||
|
person_trigger: str = "web",
|
||||||
|
perf_stats=None,
|
||||||
host: str = "0.0.0.0",
|
host: str = "0.0.0.0",
|
||||||
port: int = 8080):
|
port: int = 8080):
|
||||||
self._cache = cache
|
self._cache = cache
|
||||||
self._mover = mover
|
self._mover = mover
|
||||||
|
self._person_updater = person_updater
|
||||||
|
self._person_trigger = person_trigger
|
||||||
|
# perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
|
||||||
|
self._perf = perf_stats or {}
|
||||||
self._host = host
|
self._host = host
|
||||||
self._port = port
|
self._port = port
|
||||||
self._build_lock = threading.Lock()
|
self._build_lock = threading.Lock()
|
||||||
self._move_lock = threading.Lock()
|
self._move_lock = threading.Lock()
|
||||||
|
self._person_lock = threading.Lock()
|
||||||
self._app = Flask(__name__)
|
self._app = Flask(__name__)
|
||||||
self._thread: "threading.Thread | None" = None
|
self._thread: "threading.Thread | None" = None
|
||||||
self._register_routes()
|
self._register_routes()
|
||||||
@@ -498,3 +531,31 @@ class MatchesWebApp:
|
|||||||
finally:
|
finally:
|
||||||
self._move_lock.release()
|
self._move_lock.release()
|
||||||
return jsonify({"results": results})
|
return jsonify({"results": results})
|
||||||
|
|
||||||
|
@app.post("/api/persons/sync")
|
||||||
|
def api_persons_sync():
|
||||||
|
if self._person_updater is None:
|
||||||
|
return Response("no person updater configured", status=503)
|
||||||
|
if not self._person_lock.acquire(blocking=False):
|
||||||
|
return Response("person sync already running", status=409)
|
||||||
|
try:
|
||||||
|
report = self._person_updater.update_all_persons(
|
||||||
|
trigger=self._person_trigger,
|
||||||
|
perf=self._perf.get("person"))
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"person sync failed: {exc}", status=500)
|
||||||
|
finally:
|
||||||
|
self._person_lock.release()
|
||||||
|
return jsonify(report)
|
||||||
|
|
||||||
|
# Perf pages: /perf (move) + /perf/<name> for the updaters.
|
||||||
|
@app.get("/perf")
|
||||||
|
@app.get("/perf/<name>")
|
||||||
|
def perf_page(name: str = "move") -> Response:
|
||||||
|
return Response(render_perf_page(name, _PERF_TABS),
|
||||||
|
mimetype="text/html; charset=utf-8")
|
||||||
|
|
||||||
|
@app.get("/api/perf/<name>")
|
||||||
|
def api_perf(name: str):
|
||||||
|
stats = self._perf.get(name)
|
||||||
|
return jsonify(stats.all() if stats is not None else {"runs": []})
|
||||||
|
|||||||
+56
-61
@@ -64,11 +64,10 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
|
|||||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
from KavitaClient import KavitaClient
|
|
||||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
from CoverCache import CoverCache, _IMAGE_EXTS
|
from CoverCache import CoverCache, _IMAGE_EXTS
|
||||||
|
from PerfStats import PerfStats
|
||||||
|
|
||||||
|
|
||||||
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
||||||
@@ -305,28 +304,30 @@ class SuwayomiMover:
|
|||||||
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
|
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
|
||||||
kavita_path : Root of the Kavita library.
|
kavita_path : Root of the Kavita library.
|
||||||
Series sub-directories are created automatically.
|
Series sub-directories are created automatically.
|
||||||
kavita_base_url : Kavita server URL — required only for person sync,
|
|
||||||
e.g. "http://192.168.2.2:5000".
|
|
||||||
kavita_api_key : Kavita API key — required only for person sync.
|
|
||||||
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
||||||
request_timeout : HTTP timeout in seconds for all API / image requests.
|
request_timeout : HTTP timeout in seconds for all API / image requests.
|
||||||
delete_source : Remove the source chapter folder after successful pack.
|
delete_source : Remove the source chapter folder after successful pack.
|
||||||
cover_cache_dir : Directory for the persistent cover cache. None ->
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
temporary cache, deleted at process exit.
|
temporary cache, deleted at process exit.
|
||||||
|
perf_stats : Optional PerfStats instance for per-step timing. None
|
||||||
|
(default) disables profiling.
|
||||||
|
|
||||||
|
Note: Kavita person sync is no longer done here — it runs as a separate,
|
||||||
|
global, id-based updater on its own cron schedule (KavitaPersonUpdater).
|
||||||
|
The mover only touches MangaBaka / MangaDex / MAL / AniList.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
suwayomi_path,
|
suwayomi_path,
|
||||||
kavita_path,
|
kavita_path,
|
||||||
*,
|
*,
|
||||||
kavita_base_url: "str | None" = None,
|
|
||||||
kavita_api_key: "str | None" = None,
|
|
||||||
language: str = "en",
|
language: str = "en",
|
||||||
request_timeout: int = 30,
|
request_timeout: int = 30,
|
||||||
delete_source: bool = True,
|
delete_source: bool = True,
|
||||||
matches_cache: "MatchesCache | None" = None,
|
matches_cache: "MatchesCache | None" = None,
|
||||||
api_base_url: str = "https://api.mangabaka.dev/v1",
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
cover_cache_dir=None):
|
cover_cache_dir=None,
|
||||||
|
perf_stats: "PerfStats | None" = None):
|
||||||
self._src = Path(suwayomi_path)
|
self._src = Path(suwayomi_path)
|
||||||
self._dst = Path(kavita_path)
|
self._dst = Path(kavita_path)
|
||||||
self._language = language
|
self._language = language
|
||||||
@@ -334,6 +335,7 @@ class SuwayomiMover:
|
|||||||
self._delete_source = delete_source
|
self._delete_source = delete_source
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
self._api_base_url = api_base_url.rstrip("/")
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
self._perf = perf_stats or PerfStats(None)
|
||||||
|
|
||||||
# Shared HTTP session and resolvers — reused across all series/chapters
|
# Shared HTTP session and resolvers — reused across all series/chapters
|
||||||
# to maximise cache hits and minimise API round-trips.
|
# to maximise cache hits and minimise API round-trips.
|
||||||
@@ -352,16 +354,6 @@ class SuwayomiMover:
|
|||||||
self._cover_cache = CoverCache(
|
self._cover_cache = CoverCache(
|
||||||
cover_cache_dir, session=session, request_timeout=request_timeout)
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
self._person_updater: "KavitaPersonUpdater | None" = None
|
|
||||||
if kavita_base_url and kavita_api_key:
|
|
||||||
kavita_client = KavitaClient(
|
|
||||||
kavita_base_url, kavita_api_key,
|
|
||||||
request_timeout=request_timeout)
|
|
||||||
self._person_updater = KavitaPersonUpdater(
|
|
||||||
kavita_client,
|
|
||||||
mal_resolver=self._mal,
|
|
||||||
al_resolver=self._al)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public API
|
# Public API
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -376,15 +368,19 @@ class SuwayomiMover:
|
|||||||
dict from _process_series_dir.
|
dict from _process_series_dir.
|
||||||
"""
|
"""
|
||||||
results: dict = {}
|
results: dict = {}
|
||||||
for source_dir in sorted(self._src.iterdir()):
|
run = self._perf.begin_run()
|
||||||
if not source_dir.is_dir():
|
try:
|
||||||
continue
|
for source_dir in sorted(self._src.iterdir()):
|
||||||
for manga_dir in sorted(source_dir.iterdir()):
|
if not source_dir.is_dir():
|
||||||
if not manga_dir.is_dir():
|
|
||||||
continue
|
continue
|
||||||
title = manga_dir.name
|
for manga_dir in sorted(source_dir.iterdir()):
|
||||||
print(f"[SuwayomiMover] {title}")
|
if not manga_dir.is_dir():
|
||||||
results[title] = self._process_series_dir(manga_dir)
|
continue
|
||||||
|
title = manga_dir.name
|
||||||
|
print(f"[SuwayomiMover] {title}")
|
||||||
|
results[title] = self._process_series_dir(manga_dir, run)
|
||||||
|
finally:
|
||||||
|
run.finish()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def process_series(self, manga_title: str) -> dict:
|
def process_series(self, manga_title: str) -> dict:
|
||||||
@@ -400,7 +396,11 @@ class SuwayomiMover:
|
|||||||
continue
|
continue
|
||||||
candidate = source_dir / manga_title
|
candidate = source_dir / manga_title
|
||||||
if candidate.is_dir():
|
if candidate.is_dir():
|
||||||
return self._process_series_dir(candidate)
|
run = self._perf.begin_run()
|
||||||
|
try:
|
||||||
|
return self._process_series_dir(candidate, run)
|
||||||
|
finally:
|
||||||
|
run.finish()
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
||||||
|
|
||||||
@@ -487,8 +487,9 @@ class SuwayomiMover:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Internal: series
|
# Internal: series
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def _process_series_dir(self, manga_dir: Path) -> dict:
|
def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
|
||||||
manga_title = manga_dir.name
|
manga_title = manga_dir.name
|
||||||
|
series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
|
||||||
|
|
||||||
chapter_dirs = sorted(
|
chapter_dirs = sorted(
|
||||||
(d for d in manga_dir.iterdir() if d.is_dir()),
|
(d for d in manga_dir.iterdir() if d.is_dir()),
|
||||||
@@ -539,7 +540,8 @@ class SuwayomiMover:
|
|||||||
md: "dict | None" = None
|
md: "dict | None" = None
|
||||||
mangabaka_title = manga_title
|
mangabaka_title = manga_title
|
||||||
try:
|
try:
|
||||||
md = builder.fetch_metadata()
|
with series_rec.measure("fetch_metadata"):
|
||||||
|
md = builder.fetch_metadata()
|
||||||
mangabaka_title = md.get("title") or manga_title
|
mangabaka_title = md.get("title") or manga_title
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f" [warn] metadata fetch failed: {exc}")
|
print(f" [warn] metadata fetch failed: {exc}")
|
||||||
@@ -571,7 +573,7 @@ class SuwayomiMover:
|
|||||||
chapter_results: list[dict] = []
|
chapter_results: list[dict] = []
|
||||||
for chapter_dir, _fields, chapter_num in pending:
|
for chapter_dir, _fields, chapter_num in pending:
|
||||||
result = self._process_chapter(
|
result = self._process_chapter(
|
||||||
builder, chapter_num, chapter_dir, dest_series)
|
builder, chapter_num, chapter_dir, dest_series, series_rec)
|
||||||
chapter_results.append(result)
|
chapter_results.append(result)
|
||||||
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
|
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
|
||||||
print(f" Chapter {chapter_num}: {status}")
|
print(f" Chapter {chapter_num}: {status}")
|
||||||
@@ -582,25 +584,11 @@ class SuwayomiMover:
|
|||||||
}
|
}
|
||||||
_save_chapter_index(dest_series, chapter_index)
|
_save_chapter_index(dest_series, chapter_index)
|
||||||
|
|
||||||
# Sync Kavita persons once per series.
|
# Person sync no longer runs here — it has its own global,
|
||||||
# Both MAL and AniList IDs come from MangaBaka's source map;
|
# id-based updater on a separate cron schedule (see
|
||||||
# AniList is used as fallback when MAL returns no characters/staff.
|
# KavitaPersonUpdater.update_all_persons).
|
||||||
person_result: "dict | None" = None
|
series_rec.finish()
|
||||||
if self._person_updater:
|
return {"chapters": chapter_results}
|
||||||
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
|
|
||||||
or self._mal.find_mal_id(builder_title))
|
|
||||||
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
|
|
||||||
if mal_id or al_id:
|
|
||||||
try:
|
|
||||||
person_result = self._person_updater.update_for_manga(
|
|
||||||
mal_id, al_manga_id=al_id)
|
|
||||||
print(f" Persons: chars={person_result['characters'].get('updated')} "
|
|
||||||
f"staff={person_result['staff'].get('updated')}")
|
|
||||||
except Exception as exc:
|
|
||||||
person_result = {"error": str(exc)}
|
|
||||||
print(f" Persons: ERROR {exc}")
|
|
||||||
|
|
||||||
return {"chapters": chapter_results, "persons": person_result}
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Internal: chapter
|
# Internal: chapter
|
||||||
@@ -609,7 +597,8 @@ class SuwayomiMover:
|
|||||||
builder: ComicInfoBuilder,
|
builder: ComicInfoBuilder,
|
||||||
chapter_num: str,
|
chapter_num: str,
|
||||||
chapter_dir: Path,
|
chapter_dir: Path,
|
||||||
dest_series: Path) -> dict:
|
dest_series: Path,
|
||||||
|
series_rec=None) -> dict:
|
||||||
"""
|
"""
|
||||||
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
|
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
|
||||||
optionally removes the source folder.
|
optionally removes the source folder.
|
||||||
@@ -619,6 +608,11 @@ class SuwayomiMover:
|
|||||||
<Pages> element correctly points to the front cover).
|
<Pages> element correctly points to the front cover).
|
||||||
"""
|
"""
|
||||||
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
|
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
|
||||||
|
chap_rec = (series_rec or self._perf.begin_run().begin_item("")
|
||||||
|
).begin_item(chapter_num)
|
||||||
|
# add_pages_from_folder records its own sub-steps on this recorder.
|
||||||
|
builder.perf = chap_rec
|
||||||
|
ok = False
|
||||||
try:
|
try:
|
||||||
builder.chapter = chapter_num
|
builder.chapter = chapter_num
|
||||||
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
|
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
|
||||||
@@ -626,32 +620,35 @@ class SuwayomiMover:
|
|||||||
# by add_pages_from_folder, so it's effectively free. Used by
|
# by add_pages_from_folder, so it's effectively free. Used by
|
||||||
# the chapter index in the Kavita destination folder.
|
# the chapter index in the Kavita destination folder.
|
||||||
try:
|
try:
|
||||||
volume = builder._determine_volume()
|
with chap_rec.measure("volume"):
|
||||||
|
volume = builder._determine_volume()
|
||||||
except Exception:
|
except Exception:
|
||||||
volume = None
|
volume = None
|
||||||
builder.save_xml(chapter_dir)
|
with chap_rec.measure("save_xml"):
|
||||||
_pack_to_cbz(chapter_dir, cbz_path)
|
builder.save_xml(chapter_dir)
|
||||||
|
with chap_rec.measure("pack_cbz"):
|
||||||
|
_pack_to_cbz(chapter_dir, cbz_path)
|
||||||
if self._delete_source:
|
if self._delete_source:
|
||||||
shutil.rmtree(chapter_dir)
|
with chap_rec.measure("delete_source"):
|
||||||
|
shutil.rmtree(chapter_dir)
|
||||||
|
ok = True
|
||||||
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
||||||
"ok": True, "volume": volume}
|
"ok": True, "volume": volume}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
||||||
"ok": False, "error": str(exc)}
|
"ok": False, "error": str(exc)}
|
||||||
|
finally:
|
||||||
|
builder.perf = None
|
||||||
|
chap_rec.finish(ok=ok)
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Usage example
|
# Usage example
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import os
|
|
||||||
|
|
||||||
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
||||||
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
|
|
||||||
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
|
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
|
||||||
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||||
KAVITA_URL = "http://192.168.2.2:5000"
|
|
||||||
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
|
|
||||||
|
|
||||||
# matches.json lives next to this script during local testing.
|
# matches.json lives next to this script during local testing.
|
||||||
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
||||||
@@ -660,8 +657,6 @@ if __name__ == "__main__":
|
|||||||
mover = SuwayomiMover(
|
mover = SuwayomiMover(
|
||||||
SUWAYOMI_PATH,
|
SUWAYOMI_PATH,
|
||||||
KAVITA_PATH,
|
KAVITA_PATH,
|
||||||
kavita_base_url=KAVITA_URL,
|
|
||||||
kavita_api_key=KAVITA_KEY,
|
|
||||||
delete_source=False,
|
delete_source=False,
|
||||||
matches_cache=matches_cache,
|
matches_cache=matches_cache,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user