Performance and Person Updater Improvements #7
+1
-2
@@ -44,7 +44,6 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -107,7 +106,7 @@ def main() -> int:
|
|||||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
web_port = _env_int("WEB_PORT", 8080)
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 1,4")
|
||||||
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
||||||
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
||||||
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
|
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
|
||||||
|
|||||||
@@ -151,9 +151,10 @@ class MangaBakaWorksResolver:
|
|||||||
Returns volume-level works for a series, filtered to those that have
|
Returns volume-level works for a series, filtered to those that have
|
||||||
a usable cover image.
|
a usable cover image.
|
||||||
|
|
||||||
Non-empty results are cached per series; empty results are not, so
|
Results are cached per series — including empty results, so a series
|
||||||
works added on MangaBaka later become visible without restarting
|
without works is not re-paginated for every chapter of a move run.
|
||||||
the (long-running) process.
|
The periodic cover updater calls clear_cache() before each scan, so
|
||||||
|
works added on MangaBaka later are still picked up there.
|
||||||
"""
|
"""
|
||||||
if not series_id:
|
if not series_id:
|
||||||
return []
|
return []
|
||||||
@@ -165,8 +166,7 @@ class MangaBakaWorksResolver:
|
|||||||
|
|
||||||
# Discard works that carry no usable cover
|
# Discard works that carry no usable cover
|
||||||
works_with_cover = [w for w in all_works if w.get("images")]
|
works_with_cover = [w for w in all_works if w.get("images")]
|
||||||
if works_with_cover:
|
self._cache[series_id] = works_with_cover
|
||||||
self._cache[series_id] = works_with_cover
|
|
||||||
return works_with_cover
|
return works_with_cover
|
||||||
|
|
||||||
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
||||||
@@ -228,10 +228,10 @@ class MangaBakaWorksResolver:
|
|||||||
if url:
|
if url:
|
||||||
result[norm] = url
|
result[norm] = url
|
||||||
|
|
||||||
# Empty results are not cached — covers added on MangaBaka later
|
# Cache even an empty result so a series without volume images is not
|
||||||
# become visible without restarting the long-running process.
|
# re-paginated for every chapter. The periodic cover updater clears
|
||||||
if result:
|
# this cache before each scan, so newly added images are still found.
|
||||||
self._images_cache[series_id] = result
|
self._images_cache[series_id] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_cover_for_volume_from_images(self, series_id: str,
|
def get_cover_for_volume_from_images(self, series_id: str,
|
||||||
|
|||||||
@@ -72,6 +72,10 @@ def _no_measure():
|
|||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
# Sentinel marking a per-chapter memo slot as "not computed yet".
|
||||||
|
_UNSET = object()
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Constants
|
# Constants
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
@@ -235,6 +239,12 @@ class ComicInfoBuilder:
|
|||||||
self._pages: list[dict] = []
|
self._pages: list[dict] = []
|
||||||
self._cover_path: "Path | None" = None
|
self._cover_path: "Path | None" = None
|
||||||
self._suwayomi_data: dict = {}
|
self._suwayomi_data: dict = {}
|
||||||
|
# Per-chapter memo for _determine_volume (resolved up to 3x/chapter
|
||||||
|
# otherwise: cover download, explicit volume step, XML build).
|
||||||
|
self._volume_memo = _UNSET
|
||||||
|
# Per-series cache for full series fetches by id (parent series for
|
||||||
|
# SeriesGroup, merged-series redirects) — reused across all chapters.
|
||||||
|
self._series_by_id_cache: dict[str, dict] = {}
|
||||||
|
|
||||||
# ----- Repr -----------------------------------------------------------
|
# ----- Repr -----------------------------------------------------------
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
@@ -274,6 +284,7 @@ class ComicInfoBuilder:
|
|||||||
self._pages = []
|
self._pages = []
|
||||||
self._cover_path = None
|
self._cover_path = None
|
||||||
self._suwayomi_data = {}
|
self._suwayomi_data = {}
|
||||||
|
self._volume_memo = _UNSET
|
||||||
|
|
||||||
def _measure(self, name: str):
|
def _measure(self, name: str):
|
||||||
"""Times a named step on the attached recorder; no-op when unset."""
|
"""Times a named step on the attached recorder; no-op when unset."""
|
||||||
@@ -437,12 +448,20 @@ class ComicInfoBuilder:
|
|||||||
return series
|
return series
|
||||||
|
|
||||||
def _fetch_series_by_id(self, series_id) -> dict:
|
def _fetch_series_by_id(self, series_id) -> dict:
|
||||||
|
# Cached per builder (i.e. per series): SeriesGroup resolution calls
|
||||||
|
# this for the parent on every chapter — without the cache that is
|
||||||
|
# one MangaBaka request per chapter for the same parent id.
|
||||||
|
key = str(series_id)
|
||||||
|
cached = self._series_by_id_cache.get(key)
|
||||||
|
if cached is not None:
|
||||||
|
return cached
|
||||||
url = f"{self.api_base_url}/series/{series_id}"
|
url = f"{self.api_base_url}/series/{series_id}"
|
||||||
resp = self._session.get(url, timeout=self.request_timeout)
|
resp = self._session.get(url, timeout=self.request_timeout)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json().get("data")
|
data = resp.json().get("data")
|
||||||
if not data:
|
if not data:
|
||||||
raise RuntimeError(f"Series with ID {series_id} not found.")
|
raise RuntimeError(f"Series with ID {series_id} not found.")
|
||||||
|
self._series_by_id_cache[key] = data
|
||||||
return data
|
return data
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
@@ -578,6 +597,18 @@ class ComicInfoBuilder:
|
|||||||
# Volume determination
|
# Volume determination
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
def _determine_volume(self) -> "str | None":
|
def _determine_volume(self) -> "str | None":
|
||||||
|
"""
|
||||||
|
Resolves the volume for the current chapter, memoized per chapter.
|
||||||
|
|
||||||
|
The result is reused across the three call sites per chapter (cover
|
||||||
|
download, explicit volume step, XML build); the memo is cleared
|
||||||
|
whenever the chapter or manga title changes (see _clear_results).
|
||||||
|
"""
|
||||||
|
if self._volume_memo is _UNSET:
|
||||||
|
self._volume_memo = self._resolve_volume()
|
||||||
|
return self._volume_memo
|
||||||
|
|
||||||
|
def _resolve_volume(self) -> "str | None":
|
||||||
"""
|
"""
|
||||||
Resolves the volume for the current chapter via MangaDex.
|
Resolves the volume for the current chapter via MangaDex.
|
||||||
Falls back to estimation when the chapter is absent from MangaDex.
|
Falls back to estimation when the chapter is absent from MangaDex.
|
||||||
|
|||||||
@@ -93,6 +93,9 @@ class MangaDexVolumeResolver:
|
|||||||
self._cache: dict[str, dict] = {}
|
self._cache: dict[str, dict] = {}
|
||||||
# Cache: manga_id -> {relation_type: [title, ...]}
|
# Cache: manga_id -> {relation_type: [title, ...]}
|
||||||
self._relations_cache: dict[str, dict] = {}
|
self._relations_cache: dict[str, dict] = {}
|
||||||
|
# Cache: title_lower -> manga_id (or None) — avoids repeating the
|
||||||
|
# MangaDex search for every chapter of the same series.
|
||||||
|
self._id_cache: dict[str, "str | None"] = {}
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
# Locate the manga ID
|
# Locate the manga ID
|
||||||
@@ -105,15 +108,25 @@ class MangaDexVolumeResolver:
|
|||||||
if not title or not title.strip():
|
if not title or not title.strip():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
resp = self._session.get(
|
key = title.strip().lower()
|
||||||
f"{self.base_url}/manga",
|
if key in self._id_cache:
|
||||||
params={"title": title, "limit": 5,
|
return self._id_cache[key]
|
||||||
"contentRating[]": ["safe", "suggestive",
|
|
||||||
"erotica", "pornographic"]},
|
try:
|
||||||
timeout=self.request_timeout)
|
resp = self._session.get(
|
||||||
resp.raise_for_status()
|
f"{self.base_url}/manga",
|
||||||
results = resp.json().get("data") or []
|
params={"title": title, "limit": 5,
|
||||||
|
"contentRating[]": ["safe", "suggestive",
|
||||||
|
"erotica", "pornographic"]},
|
||||||
|
timeout=self.request_timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
results = resp.json().get("data") or []
|
||||||
|
except requests.RequestException:
|
||||||
|
# Don't cache transient failures — allow a retry next time.
|
||||||
|
return None
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
|
self._id_cache[key] = None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def score(entry) -> float:
|
def score(entry) -> float:
|
||||||
@@ -130,7 +143,9 @@ class MangaDexVolumeResolver:
|
|||||||
return best
|
return best
|
||||||
|
|
||||||
results.sort(key=score, reverse=True)
|
results.sort(key=score, reverse=True)
|
||||||
return results[0].get("id")
|
manga_id = results[0].get("id")
|
||||||
|
self._id_cache[key] = manga_id
|
||||||
|
return manga_id
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
# Main function: retrieve and return volume / chapter data
|
# Main function: retrieve and return volume / chapter data
|
||||||
|
|||||||
+37
-12
@@ -63,6 +63,7 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -143,6 +144,9 @@ class SeriesRecorder(_StepTimer):
|
|||||||
"steps": self.steps,
|
"steps": self.steps,
|
||||||
"chapters": self._chapters,
|
"chapters": self._chapters,
|
||||||
})
|
})
|
||||||
|
# Persist the run's progress after every series so a long run is
|
||||||
|
# observable live and survives a crash mid-run.
|
||||||
|
self._run.flush()
|
||||||
|
|
||||||
|
|
||||||
class RunRecorder:
|
class RunRecorder:
|
||||||
@@ -154,15 +158,15 @@ class RunRecorder:
|
|||||||
self._series: list[dict] = []
|
self._series: list[dict] = []
|
||||||
self._started = time.time()
|
self._started = time.time()
|
||||||
self._t0 = time.monotonic()
|
self._t0 = time.monotonic()
|
||||||
|
# Stable identity so incremental flushes update the same run entry
|
||||||
|
# instead of inserting a duplicate on every series.
|
||||||
|
self._run_id = uuid.uuid4().hex
|
||||||
|
|
||||||
def begin_series(self, title: str) -> SeriesRecorder:
|
def begin_series(self, title: str) -> SeriesRecorder:
|
||||||
return SeriesRecorder(self, title, enabled=self._enabled)
|
return SeriesRecorder(self, title, enabled=self._enabled)
|
||||||
|
|
||||||
def finish(self) -> dict | None:
|
def _snapshot(self) -> dict:
|
||||||
"""Aggregates the run and persists it. Returns the run dict."""
|
"""Aggregates the run's current state into a serialisable dict."""
|
||||||
if not self._enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
step_totals: dict[str, float] = {}
|
step_totals: dict[str, float] = {}
|
||||||
series_step_totals: dict[str, float] = {}
|
series_step_totals: dict[str, float] = {}
|
||||||
chapter_count = 0
|
chapter_count = 0
|
||||||
@@ -176,7 +180,8 @@ class RunRecorder:
|
|||||||
step_totals[step] = round(
|
step_totals[step] = round(
|
||||||
step_totals.get(step, 0.0) + secs, 4)
|
step_totals.get(step, 0.0) + secs, 4)
|
||||||
|
|
||||||
run = {
|
return {
|
||||||
|
"runId": self._run_id,
|
||||||
"startedAt": round(self._started),
|
"startedAt": round(self._started),
|
||||||
"finishedAt": round(time.time()),
|
"finishedAt": round(time.time()),
|
||||||
"totalSeconds": round(time.monotonic() - self._t0, 4),
|
"totalSeconds": round(time.monotonic() - self._t0, 4),
|
||||||
@@ -186,9 +191,19 @@ class RunRecorder:
|
|||||||
"seriesStepTotals": series_step_totals,
|
"seriesStepTotals": series_step_totals,
|
||||||
"series": self._series,
|
"series": self._series,
|
||||||
}
|
}
|
||||||
self._stats._append_run(run)
|
|
||||||
|
def flush(self) -> dict | None:
|
||||||
|
"""Writes the run's current state to disk (upsert by runId)."""
|
||||||
|
if not self._enabled:
|
||||||
|
return None
|
||||||
|
run = self._snapshot()
|
||||||
|
self._stats._upsert_run(run)
|
||||||
return run
|
return run
|
||||||
|
|
||||||
|
def finish(self) -> dict | None:
|
||||||
|
"""Persists the final run state. Returns the run dict."""
|
||||||
|
return self.flush()
|
||||||
|
|
||||||
|
|
||||||
class PerfStats:
|
class PerfStats:
|
||||||
"""
|
"""
|
||||||
@@ -227,14 +242,24 @@ class PerfStats:
|
|||||||
return {"runs": []}
|
return {"runs": []}
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _append_run(self, run: dict) -> None:
|
def _upsert_run(self, run: dict) -> None:
|
||||||
|
"""
|
||||||
|
Inserts a new run (newest first) or replaces the existing entry with
|
||||||
|
the same runId — so incremental flushes during a run update one entry
|
||||||
|
rather than appending a duplicate after every series.
|
||||||
|
"""
|
||||||
if not self._path:
|
if not self._path:
|
||||||
return
|
return
|
||||||
with self._lock:
|
with self._lock:
|
||||||
data = self.all()
|
runs = self.all()["runs"]
|
||||||
runs = data["runs"]
|
run_id = run.get("runId")
|
||||||
runs.insert(0, run) # newest first
|
for i, existing in enumerate(runs):
|
||||||
del runs[_MAX_RUNS:] # cap history
|
if existing.get("runId") == run_id:
|
||||||
|
runs[i] = run
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
runs.insert(0, run) # newest first
|
||||||
|
del runs[_MAX_RUNS:] # cap history
|
||||||
self._path.parent.mkdir(parents=True, exist_ok=True)
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||||
with tmp.open("w", encoding="utf-8") as f:
|
with tmp.open("w", encoding="utf-8") as f:
|
||||||
|
|||||||
Reference in New Issue
Block a user