diff --git a/main_manga.py b/main_manga.py index 6210494..606accf 100644 --- a/main_manga.py +++ b/main_manga.py @@ -44,7 +44,6 @@ from __future__ import annotations import os import sys from pathlib import Path - try: from dotenv import load_dotenv load_dotenv() @@ -107,7 +106,7 @@ def main() -> int: web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_port = _env_int("WEB_PORT", 8080) updater_enabled = _env_bool("UPDATER_ENABLED", True) - updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4") + updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 1,4") updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log") cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None diff --git a/src/MangaBakaWorksResolver.py b/src/MangaBakaWorksResolver.py index 5415071..fa3fbc5 100644 --- a/src/MangaBakaWorksResolver.py +++ b/src/MangaBakaWorksResolver.py @@ -151,9 +151,10 @@ class MangaBakaWorksResolver: Returns volume-level works for a series, filtered to those that have a usable cover image. - Non-empty results are cached per series; empty results are not, so - works added on MangaBaka later become visible without restarting - the (long-running) process. + Results are cached per series — including empty results, so a series + without works is not re-paginated for every chapter of a move run. + The periodic cover updater calls clear_cache() before each scan, so + works added on MangaBaka later are still picked up there. """ if not series_id: return [] @@ -165,8 +166,7 @@ class MangaBakaWorksResolver: # Discard works that carry no usable cover works_with_cover = [w for w in all_works if w.get("images")] - if works_with_cover: - self._cache[series_id] = works_with_cover + self._cache[series_id] = works_with_cover return works_with_cover def get_work_for_volume(self, series_id: str, volume) -> "dict | None": @@ -228,10 +228,10 @@ class MangaBakaWorksResolver: if url: result[norm] = url - # Empty results are not cached — covers added on MangaBaka later - # become visible without restarting the long-running process. - if result: - self._images_cache[series_id] = result + # Cache even an empty result so a series without volume images is not + # re-paginated for every chapter. The periodic cover updater clears + # this cache before each scan, so newly added images are still found. + self._images_cache[series_id] = result return result def get_cover_for_volume_from_images(self, series_id: str, diff --git a/src/manga/ComicInfoBuilder.py b/src/manga/ComicInfoBuilder.py index c6f9abf..50d09a0 100644 --- a/src/manga/ComicInfoBuilder.py +++ b/src/manga/ComicInfoBuilder.py @@ -72,6 +72,10 @@ def _no_measure(): yield +# Sentinel marking a per-chapter memo slot as "not computed yet". +_UNSET = object() + + # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- @@ -235,6 +239,12 @@ class ComicInfoBuilder: self._pages: list[dict] = [] self._cover_path: "Path | None" = None self._suwayomi_data: dict = {} + # Per-chapter memo for _determine_volume (resolved up to 3x/chapter + # otherwise: cover download, explicit volume step, XML build). + self._volume_memo = _UNSET + # Per-series cache for full series fetches by id (parent series for + # SeriesGroup, merged-series redirects) — reused across all chapters. + self._series_by_id_cache: dict[str, dict] = {} # ----- Repr ----------------------------------------------------------- def __repr__(self) -> str: @@ -274,6 +284,7 @@ class ComicInfoBuilder: self._pages = [] self._cover_path = None self._suwayomi_data = {} + self._volume_memo = _UNSET def _measure(self, name: str): """Times a named step on the attached recorder; no-op when unset.""" @@ -437,12 +448,20 @@ class ComicInfoBuilder: return series def _fetch_series_by_id(self, series_id) -> dict: + # Cached per builder (i.e. per series): SeriesGroup resolution calls + # this for the parent on every chapter — without the cache that is + # one MangaBaka request per chapter for the same parent id. + key = str(series_id) + cached = self._series_by_id_cache.get(key) + if cached is not None: + return cached url = f"{self.api_base_url}/series/{series_id}" resp = self._session.get(url, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") if not data: raise RuntimeError(f"Series with ID {series_id} not found.") + self._series_by_id_cache[key] = data return data # ====================================================================== @@ -578,6 +597,18 @@ class ComicInfoBuilder: # Volume determination # ====================================================================== def _determine_volume(self) -> "str | None": + """ + Resolves the volume for the current chapter, memoized per chapter. + + The result is reused across the three call sites per chapter (cover + download, explicit volume step, XML build); the memo is cleared + whenever the chapter or manga title changes (see _clear_results). + """ + if self._volume_memo is _UNSET: + self._volume_memo = self._resolve_volume() + return self._volume_memo + + def _resolve_volume(self) -> "str | None": """ Resolves the volume for the current chapter via MangaDex. Falls back to estimation when the chapter is absent from MangaDex. diff --git a/src/manga/MangadexVolumeResolver.py b/src/manga/MangadexVolumeResolver.py index 1c7c544..b121233 100644 --- a/src/manga/MangadexVolumeResolver.py +++ b/src/manga/MangadexVolumeResolver.py @@ -93,6 +93,9 @@ class MangaDexVolumeResolver: self._cache: dict[str, dict] = {} # Cache: manga_id -> {relation_type: [title, ...]} self._relations_cache: dict[str, dict] = {} + # Cache: title_lower -> manga_id (or None) — avoids repeating the + # MangaDex search for every chapter of the same series. + self._id_cache: dict[str, "str | None"] = {} # ---------------------------------------------------------------------- # Locate the manga ID @@ -105,15 +108,25 @@ class MangaDexVolumeResolver: if not title or not title.strip(): return None - resp = self._session.get( - f"{self.base_url}/manga", - params={"title": title, "limit": 5, - "contentRating[]": ["safe", "suggestive", - "erotica", "pornographic"]}, - timeout=self.request_timeout) - resp.raise_for_status() - results = resp.json().get("data") or [] + key = title.strip().lower() + if key in self._id_cache: + return self._id_cache[key] + + try: + resp = self._session.get( + f"{self.base_url}/manga", + params={"title": title, "limit": 5, + "contentRating[]": ["safe", "suggestive", + "erotica", "pornographic"]}, + timeout=self.request_timeout) + resp.raise_for_status() + results = resp.json().get("data") or [] + except requests.RequestException: + # Don't cache transient failures — allow a retry next time. + return None + if not results: + self._id_cache[key] = None return None def score(entry) -> float: @@ -130,7 +143,9 @@ class MangaDexVolumeResolver: return best results.sort(key=score, reverse=True) - return results[0].get("id") + manga_id = results[0].get("id") + self._id_cache[key] = manga_id + return manga_id # ---------------------------------------------------------------------- # Main function: retrieve and return volume / chapter data diff --git a/src/manga/PerfStats.py b/src/manga/PerfStats.py index 714a46f..8b70d33 100644 --- a/src/manga/PerfStats.py +++ b/src/manga/PerfStats.py @@ -63,6 +63,7 @@ from __future__ import annotations import json import threading import time +import uuid from contextlib import contextmanager from pathlib import Path @@ -143,6 +144,9 @@ class SeriesRecorder(_StepTimer): "steps": self.steps, "chapters": self._chapters, }) + # Persist the run's progress after every series so a long run is + # observable live and survives a crash mid-run. + self._run.flush() class RunRecorder: @@ -154,15 +158,15 @@ class RunRecorder: self._series: list[dict] = [] self._started = time.time() self._t0 = time.monotonic() + # Stable identity so incremental flushes update the same run entry + # instead of inserting a duplicate on every series. + self._run_id = uuid.uuid4().hex def begin_series(self, title: str) -> SeriesRecorder: return SeriesRecorder(self, title, enabled=self._enabled) - def finish(self) -> dict | None: - """Aggregates the run and persists it. Returns the run dict.""" - if not self._enabled: - return None - + def _snapshot(self) -> dict: + """Aggregates the run's current state into a serialisable dict.""" step_totals: dict[str, float] = {} series_step_totals: dict[str, float] = {} chapter_count = 0 @@ -176,7 +180,8 @@ class RunRecorder: step_totals[step] = round( step_totals.get(step, 0.0) + secs, 4) - run = { + return { + "runId": self._run_id, "startedAt": round(self._started), "finishedAt": round(time.time()), "totalSeconds": round(time.monotonic() - self._t0, 4), @@ -186,9 +191,19 @@ class RunRecorder: "seriesStepTotals": series_step_totals, "series": self._series, } - self._stats._append_run(run) + + def flush(self) -> dict | None: + """Writes the run's current state to disk (upsert by runId).""" + if not self._enabled: + return None + run = self._snapshot() + self._stats._upsert_run(run) return run + def finish(self) -> dict | None: + """Persists the final run state. Returns the run dict.""" + return self.flush() + class PerfStats: """ @@ -227,14 +242,24 @@ class PerfStats: return {"runs": []} return data - def _append_run(self, run: dict) -> None: + def _upsert_run(self, run: dict) -> None: + """ + Inserts a new run (newest first) or replaces the existing entry with + the same runId — so incremental flushes during a run update one entry + rather than appending a duplicate after every series. + """ if not self._path: return with self._lock: - data = self.all() - runs = data["runs"] - runs.insert(0, run) # newest first - del runs[_MAX_RUNS:] # cap history + runs = self.all()["runs"] + run_id = run.get("runId") + for i, existing in enumerate(runs): + if existing.get("runId") == run_id: + runs[i] = run + break + else: + runs.insert(0, run) # newest first + del runs[_MAX_RUNS:] # cap history self._path.parent.mkdir(parents=True, exist_ok=True) tmp = self._path.with_suffix(self._path.suffix + ".tmp") with tmp.open("w", encoding="utf-8") as f: