diff --git a/.env.example b/.env.example index e3b464f..13eb8fb 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,7 @@ DELETE_SOURCE=true UPDATER_ENABLED=true UPDATER_SCHEDULE=0 19 * * 1,4 COVER_CACHE_PATH=/config/covers +PERF_PATH=/config/perf_stats.json # Light-novel container (kavita-lightnovel-metadata-fetcher) HOST_LN_CONFIG_PATH=/path/to/ln-config diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index bde59bb..4d03f31 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -21,6 +21,8 @@ services: UPDATER_LOG: "/config/volume_updater.log" # Persistent cover cache (empty = temp dir, deleted on container stop) COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}" + # Per-step move timing stats (viewable at /perf); empty disables it + PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}" # Timezone for the cron schedule — without this 19:00 means 19:00 UTC TZ: "${TZ:-Europe/Berlin}" ports: diff --git a/main_manga.py b/main_manga.py index a055700..6210494 100644 --- a/main_manga.py +++ b/main_manga.py @@ -35,6 +35,8 @@ Environment variables UPDATER_LOG default /config/volume_updater.log COVER_CACHE_PATH directory for the persistent cover cache; empty (default) = temporary cache, deleted on exit + PERF_PATH JSON file for per-step move timing stats; + empty disables profiling. Default /config/perf_stats.json """ from __future__ import annotations @@ -61,6 +63,7 @@ from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401 from MatchesCache import MatchesCache # noqa: E402 from MatchesWebApp import MatchesWebApp # noqa: E402 from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 +from PerfStats import PerfStats # noqa: E402 def _env_str(name: str, default: "str | None" = None, @@ -107,6 +110,7 @@ def main() -> int: updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4") updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log") cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None + perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True) @@ -118,6 +122,7 @@ def main() -> int: print(f"[main] web = {web_host}:{web_port}", flush=True) matches_cache = MatchesCache(match_path) + perf_stats = PerfStats(perf_path) mover = SuwayomiMover( suwayomi_path, kavita_path, @@ -128,11 +133,13 @@ def main() -> int: delete_source=delete_source, matches_cache=matches_cache, cover_cache_dir=cover_cache_path, + perf_stats=perf_stats, ) # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds) - web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port) + web_app = MatchesWebApp(matches_cache, mover=mover, perf_stats=perf_stats, + host=web_host, port=web_port) web_app.start() if updater_enabled: diff --git a/src/manga/ComicInfoBuilder.py b/src/manga/ComicInfoBuilder.py index d337f36..c6f9abf 100644 --- a/src/manga/ComicInfoBuilder.py +++ b/src/manga/ComicInfoBuilder.py @@ -40,6 +40,7 @@ from __future__ import annotations import re import sys import xml.etree.ElementTree as ET +from contextlib import contextmanager from pathlib import Path import requests @@ -65,6 +66,12 @@ except ImportError: _HAS_PIL = False +@contextmanager +def _no_measure(): + """No-op stand-in for a perf recorder's measure() context manager.""" + yield + + # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- @@ -218,6 +225,12 @@ class ComicInfoBuilder: self._matches_cache = matches_cache self._cover_cache = cover_cache or _default_cover_cache() + # Optional performance recorder (duck-typed: any object with a + # .measure(name) context manager). The mover sets this per chapter; + # when None, _measure() is a no-op so the builder stays decoupled + # from PerfStats and works standalone (e.g. the cover updater). + self.perf = None + self._metadata: "dict | None" = None self._pages: list[dict] = [] self._cover_path: "Path | None" = None @@ -262,6 +275,12 @@ class ComicInfoBuilder: self._cover_path = None self._suwayomi_data = {} + def _measure(self, name: str): + """Times a named step on the attached recorder; no-op when unset.""" + if self.perf is not None: + return self.perf.measure(name) + return _no_measure() + # ====================================================================== # Public XML functions # ====================================================================== @@ -305,11 +324,13 @@ class ComicInfoBuilder: if not folder.is_dir(): raise NotADirectoryError(f"Folder not found: {folder}") - self._suwayomi_data = self._read_existing_comicinfo(folder) + with self._measure("read_comicinfo"): + self._suwayomi_data = self._read_existing_comicinfo(folder) self._cover_path = None if download_cover: - self._cover_path = self._download_cover(folder, cover_filename) + with self._measure("cover"): + self._cover_path = self._download_cover(folder, cover_filename) cover_resolved = self._cover_path.resolve() if self._cover_path else None story_images: list[Path] = [] @@ -329,20 +350,23 @@ class ComicInfoBuilder: ordered.extend((img, "Story") for img in story_images) self._pages = [] - for index, (img_path, page_type) in enumerate(ordered): - width, height = self._image_dimensions(img_path) - try: - size = img_path.stat().st_size - except OSError: - size = None - self._pages.append({ - "image": index, - "type": page_type, - "width": width, - "height": height, - "size": size, - "double": bool(width and height and width > height), - }) + # Probing every page for its pixel dimensions reads each file — on a + # network share this is often the dominant per-chapter cost. + with self._measure("image_dimensions"): + for index, (img_path, page_type) in enumerate(ordered): + width, height = self._image_dimensions(img_path) + try: + size = img_path.stat().st_size + except OSError: + size = None + self._pages.append({ + "image": index, + "type": page_type, + "width": width, + "height": height, + "size": size, + "double": bool(width and height and width > height), + }) return { "page_count": len(self._pages), diff --git a/src/manga/MatchesWebApp.py b/src/manga/MatchesWebApp.py index b4734c3..a90639c 100644 --- a/src/manga/MatchesWebApp.py +++ b/src/manga/MatchesWebApp.py @@ -71,6 +71,7 @@ _INDEX_HTML = """ + Performance ▸ @@ -357,6 +358,135 @@ load(); """ +_PERF_HTML = """ + + + + Move performance + + + +

Move performance ◂ back to matches

+
+ + + +
+ +
+ + + + +""" + + class MatchesWebApp: """ Flask app exposing the MatchesCache. `mover` is required when you want @@ -367,10 +497,12 @@ class MatchesWebApp: def __init__(self, cache: MatchesCache, *, mover=None, + perf_stats=None, host: str = "0.0.0.0", port: int = 8080): self._cache = cache self._mover = mover + self._perf = perf_stats self._host = host self._port = port self._build_lock = threading.Lock() @@ -498,3 +630,13 @@ class MatchesWebApp: finally: self._move_lock.release() return jsonify({"results": results}) + + @app.get("/perf") + def perf_page() -> Response: + return Response(_PERF_HTML, mimetype="text/html; charset=utf-8") + + @app.get("/api/perf") + def api_perf(): + if self._perf is None: + return jsonify({"runs": []}) + return jsonify(self._perf.all()) diff --git a/src/manga/SuwayomiMover.py b/src/manga/SuwayomiMover.py index 51ae101..1c35f73 100644 --- a/src/manga/SuwayomiMover.py +++ b/src/manga/SuwayomiMover.py @@ -69,6 +69,7 @@ from KavitaPersonUpdater import KavitaPersonUpdater from MatchesCache import MatchesCache from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from CoverCache import CoverCache, _IMAGE_EXTS +from PerfStats import PerfStats _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') @@ -313,6 +314,8 @@ class SuwayomiMover: delete_source : Remove the source chapter folder after successful pack. cover_cache_dir : Directory for the persistent cover cache. None -> temporary cache, deleted at process exit. + perf_stats : Optional PerfStats instance for per-step timing. None + (default) disables profiling. """ def __init__(self, @@ -326,7 +329,8 @@ class SuwayomiMover: delete_source: bool = True, matches_cache: "MatchesCache | None" = None, api_base_url: str = "https://api.mangabaka.dev/v1", - cover_cache_dir=None): + cover_cache_dir=None, + perf_stats: "PerfStats | None" = None): self._src = Path(suwayomi_path) self._dst = Path(kavita_path) self._language = language @@ -334,6 +338,7 @@ class SuwayomiMover: self._delete_source = delete_source self._matches_cache = matches_cache self._api_base_url = api_base_url.rstrip("/") + self._perf = perf_stats or PerfStats(None) # Shared HTTP session and resolvers — reused across all series/chapters # to maximise cache hits and minimise API round-trips. @@ -376,15 +381,19 @@ class SuwayomiMover: dict from _process_series_dir. """ results: dict = {} - for source_dir in sorted(self._src.iterdir()): - if not source_dir.is_dir(): - continue - for manga_dir in sorted(source_dir.iterdir()): - if not manga_dir.is_dir(): + run = self._perf.begin_run() + try: + for source_dir in sorted(self._src.iterdir()): + if not source_dir.is_dir(): continue - title = manga_dir.name - print(f"[SuwayomiMover] {title}") - results[title] = self._process_series_dir(manga_dir) + for manga_dir in sorted(source_dir.iterdir()): + if not manga_dir.is_dir(): + continue + title = manga_dir.name + print(f"[SuwayomiMover] {title}") + results[title] = self._process_series_dir(manga_dir, run) + finally: + run.finish() return results def process_series(self, manga_title: str) -> dict: @@ -400,7 +409,11 @@ class SuwayomiMover: continue candidate = source_dir / manga_title if candidate.is_dir(): - return self._process_series_dir(candidate) + run = self._perf.begin_run() + try: + return self._process_series_dir(candidate, run) + finally: + run.finish() raise FileNotFoundError( f"No Suwayomi directory found for '{manga_title}' under {self._src}") @@ -487,8 +500,9 @@ class SuwayomiMover: # ------------------------------------------------------------------ # Internal: series # ------------------------------------------------------------------ - def _process_series_dir(self, manga_dir: Path) -> dict: + def _process_series_dir(self, manga_dir: Path, run=None) -> dict: manga_title = manga_dir.name + series_rec = (run or self._perf.begin_run()).begin_series(manga_title) chapter_dirs = sorted( (d for d in manga_dir.iterdir() if d.is_dir()), @@ -539,7 +553,8 @@ class SuwayomiMover: md: "dict | None" = None mangabaka_title = manga_title try: - md = builder.fetch_metadata() + with series_rec.measure("fetch_metadata"): + md = builder.fetch_metadata() mangabaka_title = md.get("title") or manga_title except Exception as exc: print(f" [warn] metadata fetch failed: {exc}") @@ -571,7 +586,7 @@ class SuwayomiMover: chapter_results: list[dict] = [] for chapter_dir, _fields, chapter_num in pending: result = self._process_chapter( - builder, chapter_num, chapter_dir, dest_series) + builder, chapter_num, chapter_dir, dest_series, series_rec) chapter_results.append(result) status = "ok" if result["ok"] else f"ERROR: {result.get('error')}" print(f" Chapter {chapter_num}: {status}") @@ -592,14 +607,16 @@ class SuwayomiMover: al_id = ComicInfoBuilder._al_id_from_source(md) if md else None if mal_id or al_id: try: - person_result = self._person_updater.update_for_manga( - mal_id, al_manga_id=al_id) + with series_rec.measure("person_sync"): + person_result = self._person_updater.update_for_manga( + mal_id, al_manga_id=al_id) print(f" Persons: chars={person_result['characters'].get('updated')} " f"staff={person_result['staff'].get('updated')}") except Exception as exc: person_result = {"error": str(exc)} print(f" Persons: ERROR {exc}") + series_rec.finish() return {"chapters": chapter_results, "persons": person_result} # ------------------------------------------------------------------ @@ -609,7 +626,8 @@ class SuwayomiMover: builder: ComicInfoBuilder, chapter_num: str, chapter_dir: Path, - dest_series: Path) -> dict: + dest_series: Path, + series_rec=None) -> dict: """ Generates ComicInfo.xml for one chapter, packs it to CBZ, and optionally removes the source folder. @@ -619,6 +637,11 @@ class SuwayomiMover: element correctly points to the front cover). """ cbz_path = dest_series / f"{chapter_dir.name}.cbz" + chap_rec = (series_rec or self._perf.begin_run().begin_series("") + ).begin_chapter(chapter_num) + # add_pages_from_folder records its own sub-steps on this recorder. + builder.perf = chap_rec + ok = False try: builder.chapter = chapter_num builder.add_pages_from_folder(chapter_dir, cover_filename="000") @@ -626,18 +649,26 @@ class SuwayomiMover: # by add_pages_from_folder, so it's effectively free. Used by # the chapter index in the Kavita destination folder. try: - volume = builder._determine_volume() + with chap_rec.measure("volume"): + volume = builder._determine_volume() except Exception: volume = None - builder.save_xml(chapter_dir) - _pack_to_cbz(chapter_dir, cbz_path) + with chap_rec.measure("save_xml"): + builder.save_xml(chapter_dir) + with chap_rec.measure("pack_cbz"): + _pack_to_cbz(chapter_dir, cbz_path) if self._delete_source: - shutil.rmtree(chapter_dir) + with chap_rec.measure("delete_source"): + shutil.rmtree(chapter_dir) + ok = True return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True, "volume": volume} except Exception as exc: return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": False, "error": str(exc)} + finally: + builder.perf = None + chap_rec.finish(ok=ok) # --------------------------------------------------------------------------