feat(updater): add KavitaVolumeCoverUpdater for back-filling null volumes
Introduce a new background service that periodically re-checks chapters whose volume could not be resolved at move time. - Add KavitaVolumeCoverUpdater.py to resolve null volumes via MangaDex, update ComicInfo.xml in-archive, and swap in MangaBaka volume covers - Wire updater into main.py entry point with UPDATER_ENABLED env flag - Add UPDATER_ENABLED env var to docker-compose.prod.yml - Update CronSchedule.py to schedule updater runs
This commit is contained in:
@@ -11,6 +11,14 @@ services:
|
|||||||
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
||||||
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
|
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
|
||||||
WEB_PORT: "${WEB_PORT:-8080}"
|
WEB_PORT: "${WEB_PORT:-8080}"
|
||||||
|
# Volume/cover back-fill updater
|
||||||
|
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
|
||||||
|
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
|
||||||
|
# (local time, see TZ)
|
||||||
|
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
|
||||||
|
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
|
||||||
|
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
|
||||||
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
ports:
|
ports:
|
||||||
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
|
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
|
||||||
volumes:
|
volumes:
|
||||||
|
|||||||
@@ -27,6 +27,11 @@ Environment variables
|
|||||||
MATCH_PATH default /config/matches.json
|
MATCH_PATH default /config/matches.json
|
||||||
WEB_PORT default 8080 (Flask web UI for matches.json)
|
WEB_PORT default 8080 (Flask web UI for matches.json)
|
||||||
WEB_HOST default 0.0.0.0
|
WEB_HOST default 0.0.0.0
|
||||||
|
UPDATER_ENABLED default true (volume/cover back-fill cron)
|
||||||
|
UPDATER_SCHEDULE cron expression for the updater scans,
|
||||||
|
default "0 19 * * 1,4" = 19:00 every Mon + Thu
|
||||||
|
(local time — set TZ inside the container!)
|
||||||
|
UPDATER_LOG default /config/volume_updater.log
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -43,6 +48,7 @@ from src.SuwayomiMover import SuwayomiMover # noqa: E402
|
|||||||
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
|
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
|
||||||
from src.MatchesCache import MatchesCache # noqa: E402
|
from src.MatchesCache import MatchesCache # noqa: E402
|
||||||
from src.MatchesWebApp import MatchesWebApp # noqa: E402
|
from src.MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
|
from src.KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: "str | None" = None,
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
@@ -85,6 +91,9 @@ def main() -> int:
|
|||||||
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
||||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
web_port = _env_int("WEB_PORT", 8080)
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
||||||
|
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
||||||
|
|
||||||
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
||||||
print(f"[main] kavita = {kavita_path}", flush=True)
|
print(f"[main] kavita = {kavita_path}", flush=True)
|
||||||
@@ -112,6 +121,23 @@ def main() -> int:
|
|||||||
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
|
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
|
||||||
web_app.start()
|
web_app.start()
|
||||||
|
|
||||||
|
if updater_enabled:
|
||||||
|
try:
|
||||||
|
updater = KavitaVolumeCoverUpdater(
|
||||||
|
kavita_path,
|
||||||
|
matches_cache=matches_cache,
|
||||||
|
language=language,
|
||||||
|
request_timeout=request_timeout,
|
||||||
|
log_path=updater_log,
|
||||||
|
schedule=updater_schedule,
|
||||||
|
)
|
||||||
|
updater.start()
|
||||||
|
except ValueError as exc:
|
||||||
|
# Invalid cron expression — keep the service up, just without
|
||||||
|
# the updater, and make the config error obvious in the logs.
|
||||||
|
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
|
||||||
|
f"volume/cover updater DISABLED", flush=True)
|
||||||
|
|
||||||
# def shutdown(signum, _frame):
|
# def shutdown(signum, _frame):
|
||||||
# print(f"[main] received signal {signum}", flush=True)
|
# print(f"[main] received signal {signum}", flush=True)
|
||||||
# watcher.stop()
|
# watcher.stop()
|
||||||
|
|||||||
+17
-5
@@ -1030,12 +1030,14 @@ class ComicInfoBuilder:
|
|||||||
return unique
|
return unique
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _read_existing_comicinfo(folder: Path) -> dict:
|
def read_comicinfo_fields(xml_source) -> dict:
|
||||||
xml_path = folder / "ComicInfo.xml"
|
"""
|
||||||
if not xml_path.is_file():
|
Parses ComicInfo.xml content (bytes or str) and returns the fields
|
||||||
return {}
|
relevant as supplementary Suwayomi data. Returns {} on parse errors.
|
||||||
|
Reusable for XML read directly from a CBZ archive (no extraction).
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
root = ET.parse(xml_path).getroot()
|
root = ET.fromstring(xml_source)
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -1049,6 +1051,16 @@ class ComicInfoBuilder:
|
|||||||
data[tag] = child.text.strip()
|
data[tag] = child.text.strip()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _read_existing_comicinfo(folder: Path) -> dict:
|
||||||
|
xml_path = folder / "ComicInfo.xml"
|
||||||
|
if not xml_path.is_file():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return ComicInfoBuilder.read_comicinfo_fields(xml_path.read_bytes())
|
||||||
|
except OSError:
|
||||||
|
return {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _image_dimensions(path: Path):
|
def _image_dimensions(path: Path):
|
||||||
if not _HAS_PIL:
|
if not _HAS_PIL:
|
||||||
|
|||||||
@@ -0,0 +1,159 @@
|
|||||||
|
"""
|
||||||
|
cron_schedule.py
|
||||||
|
================
|
||||||
|
|
||||||
|
Minimal cron-expression parser — no external dependency.
|
||||||
|
|
||||||
|
Supports the classic 5-field syntax::
|
||||||
|
|
||||||
|
┌──────── minute (0-59)
|
||||||
|
│ ┌────── hour (0-23)
|
||||||
|
│ │ ┌──── day of month (1-31)
|
||||||
|
│ │ │ ┌── month (1-12 or jan-dec)
|
||||||
|
│ │ │ │ ┌ day of week (0-7 or sun-sat; 0 and 7 = Sunday)
|
||||||
|
│ │ │ │ │
|
||||||
|
0 19 * * 1,4 -> 19:00 every Monday and Thursday
|
||||||
|
|
||||||
|
Field syntax: ``*``, single values, ranges (``a-b``), steps (``*/n``,
|
||||||
|
``a-b/n``) and comma lists. Month / weekday names (``jan``, ``mon``, …)
|
||||||
|
are accepted case-insensitively.
|
||||||
|
|
||||||
|
As in Vixie cron, when *both* day-of-month and day-of-week are restricted
|
||||||
|
the job runs when **either** matches.
|
||||||
|
|
||||||
|
Times are evaluated against the local system clock (``datetime.now()``) —
|
||||||
|
in Docker set the ``TZ`` environment variable so "19:00" means local time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
_MONTH_NAMES = {"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6,
|
||||||
|
"jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12}
|
||||||
|
_DAY_NAMES = {"sun": 0, "mon": 1, "tue": 2, "wed": 3, "thu": 4,
|
||||||
|
"fri": 5, "sat": 6}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_value(token: str, lo: int, hi: int,
|
||||||
|
names: "dict[str, int] | None") -> int:
|
||||||
|
token = token.strip().lower()
|
||||||
|
if names and token in names:
|
||||||
|
return names[token]
|
||||||
|
try:
|
||||||
|
value = int(token)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"invalid cron value {token!r}") from None
|
||||||
|
if not (lo <= value <= hi):
|
||||||
|
raise ValueError(f"cron value {value} out of range {lo}-{hi}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_field(field: str, lo: int, hi: int,
|
||||||
|
names: "dict[str, int] | None" = None) -> "set[int]":
|
||||||
|
"""Parses one cron field into the set of matching integer values."""
|
||||||
|
result: set[int] = set()
|
||||||
|
for part in field.split(","):
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
raise ValueError(f"empty element in cron field {field!r}")
|
||||||
|
|
||||||
|
step = 1
|
||||||
|
if "/" in part:
|
||||||
|
part, step_text = part.split("/", 1)
|
||||||
|
try:
|
||||||
|
step = int(step_text)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"invalid cron step {step_text!r}") from None
|
||||||
|
if step < 1:
|
||||||
|
raise ValueError(f"cron step must be >= 1, got {step}")
|
||||||
|
|
||||||
|
if part == "*":
|
||||||
|
start, end = lo, hi
|
||||||
|
elif "-" in part:
|
||||||
|
a, b = part.split("-", 1)
|
||||||
|
start = _parse_value(a, lo, hi, names)
|
||||||
|
end = _parse_value(b, lo, hi, names)
|
||||||
|
if end < start:
|
||||||
|
raise ValueError(f"inverted cron range {part!r}")
|
||||||
|
else:
|
||||||
|
start = end = _parse_value(part, lo, hi, names)
|
||||||
|
|
||||||
|
result.update(range(start, end + 1, step))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class CronSchedule:
|
||||||
|
"""
|
||||||
|
Parsed 5-field cron expression with ``next_after()`` evaluation.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
cron = CronSchedule("0 19 * * mon,thu")
|
||||||
|
run_at = cron.next_after(datetime.now())
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, expression: str):
|
||||||
|
self.expression = expression.strip()
|
||||||
|
fields = self.expression.split()
|
||||||
|
if len(fields) != 5:
|
||||||
|
raise ValueError(
|
||||||
|
f"cron expression needs 5 fields "
|
||||||
|
f"(minute hour dom month dow), got {len(fields)}: "
|
||||||
|
f"{expression!r}")
|
||||||
|
|
||||||
|
minute, hour, dom, month, dow = fields
|
||||||
|
self._minutes = _parse_field(minute, 0, 59)
|
||||||
|
self._hours = _parse_field(hour, 0, 23)
|
||||||
|
self._dom = _parse_field(dom, 1, 31)
|
||||||
|
self._months = _parse_field(month, 1, 12, _MONTH_NAMES)
|
||||||
|
dow_values = _parse_field(dow, 0, 7, _DAY_NAMES)
|
||||||
|
# 7 is an alias for Sunday (= 0)
|
||||||
|
self._dow = {0 if v == 7 else v for v in dow_values}
|
||||||
|
|
||||||
|
# Vixie-cron rule: dom/dow are OR-combined when both are restricted.
|
||||||
|
self._dom_restricted = dom != "*"
|
||||||
|
self._dow_restricted = dow != "*"
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"CronSchedule({self.expression!r})"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _day_matches(self, day: "datetime.date") -> bool:
|
||||||
|
if day.month not in self._months:
|
||||||
|
return False
|
||||||
|
dom_ok = day.day in self._dom
|
||||||
|
# Python: Monday=0 … Sunday=6 -> cron: Sunday=0 … Saturday=6
|
||||||
|
dow_ok = ((day.weekday() + 1) % 7) in self._dow
|
||||||
|
if self._dom_restricted and self._dow_restricted:
|
||||||
|
return dom_ok or dow_ok
|
||||||
|
if self._dom_restricted:
|
||||||
|
return dom_ok
|
||||||
|
if self._dow_restricted:
|
||||||
|
return dow_ok
|
||||||
|
return True
|
||||||
|
|
||||||
|
def next_after(self, dt: datetime) -> datetime:
|
||||||
|
"""
|
||||||
|
Returns the first matching time strictly after ``dt``
|
||||||
|
(second/microsecond precision is dropped).
|
||||||
|
"""
|
||||||
|
cand = (dt + timedelta(minutes=1)).replace(second=0, microsecond=0)
|
||||||
|
hours = sorted(self._hours)
|
||||||
|
minutes = sorted(self._minutes)
|
||||||
|
|
||||||
|
# Walk day by day (covers rare dom/month combos like Feb 29).
|
||||||
|
for _ in range(366 * 5):
|
||||||
|
if self._day_matches(cand.date()):
|
||||||
|
for h in hours:
|
||||||
|
if h < cand.hour:
|
||||||
|
continue
|
||||||
|
for m in minutes:
|
||||||
|
if h == cand.hour and m < cand.minute:
|
||||||
|
continue
|
||||||
|
return cand.replace(hour=h, minute=m)
|
||||||
|
cand = (cand + timedelta(days=1)).replace(hour=0, minute=0)
|
||||||
|
|
||||||
|
raise ValueError(
|
||||||
|
f"cron {self.expression!r}: no occurrence within 5 years")
|
||||||
@@ -0,0 +1,536 @@
|
|||||||
|
"""
|
||||||
|
kavita_volume_cover_updater.py
|
||||||
|
==============================
|
||||||
|
|
||||||
|
Periodically re-checks chapters already moved to the Kavita library whose
|
||||||
|
volume could not be resolved at move time (``"volume": null`` in the
|
||||||
|
series' ``chapter_index.json``).
|
||||||
|
|
||||||
|
When MangaDex has since assigned the chapter to a volume, the updater:
|
||||||
|
|
||||||
|
1. writes the volume into ``chapter_index.json``,
|
||||||
|
2. updates ``<Volume>`` inside the chapter's ComicInfo.xml (in-archive),
|
||||||
|
3. downloads the MangaBaka volume cover and swaps it in for the
|
||||||
|
placeholder ``000.<ext>`` series cover, and
|
||||||
|
4. refreshes the *first* chapter's ComicInfo.xml with full metadata —
|
||||||
|
Kavita can be configured to take series metadata from the lowest
|
||||||
|
chapter, so it must reflect the latest state.
|
||||||
|
|
||||||
|
Host-IO policy
|
||||||
|
--------------
|
||||||
|
* Per series only ``chapter_index.json`` is read (no archive is opened to
|
||||||
|
discover its contents).
|
||||||
|
* Series without null-volume chapters are skipped before any API call.
|
||||||
|
* An archive is read+rewritten exactly once per update (single pass,
|
||||||
|
written to a ``.tmp`` file, then atomically replaced).
|
||||||
|
|
||||||
|
Every updated chapter is appended to a log file (one line per update).
|
||||||
|
|
||||||
|
Reused components
|
||||||
|
-----------------
|
||||||
|
* ``SuwayomiMover`` — chapter index helpers, dirname sanitizer
|
||||||
|
* ``ComicInfoBuilder`` — metadata fetch (matches-cache ID lookup),
|
||||||
|
chapter→volume resolution, XML build
|
||||||
|
* ``MangaBakaWorksResolver`` — volume covers (/images with /works fallback)
|
||||||
|
* ``MangaDexVolumeResolver`` — chapter→volume aggregate (shared cache)
|
||||||
|
* ``MangaBakaRateLimit`` — process-wide API throttle
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
requests -> pip install requests
|
||||||
|
Pillow -> pip install pillow (optional, page-0 dimensions)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import threading
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import zipfile
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
|
||||||
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
|
from MALResolver import MALResolver
|
||||||
|
from AniListResolver import AniListResolver
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
|
||||||
|
_sanitize_dirname, _normalise_volume_value)
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CronSchedule import CronSchedule
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
_HAS_PIL = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_PIL = False
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> str:
|
||||||
|
return datetime.now().isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
|
||||||
|
def _image_dims_from_bytes(data: bytes) -> tuple:
|
||||||
|
"""Returns (width, height) of an image byte blob, or (None, None)."""
|
||||||
|
if not _HAS_PIL:
|
||||||
|
return (None, None)
|
||||||
|
try:
|
||||||
|
with Image.open(io.BytesIO(data)) as im:
|
||||||
|
return im.size
|
||||||
|
except Exception:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _chapter_sort_value(num: str) -> float:
|
||||||
|
try:
|
||||||
|
return float(num)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return float("inf")
|
||||||
|
|
||||||
|
|
||||||
|
def _update_page0_attrs(pages_el: "ET.Element", cover_bytes: bytes) -> None:
|
||||||
|
"""Refreshes size/dimension attributes of the FrontCover page entry."""
|
||||||
|
for page in pages_el:
|
||||||
|
if page.get("Image") == "0":
|
||||||
|
page.set("ImageSize", str(len(cover_bytes)))
|
||||||
|
width, height = _image_dims_from_bytes(cover_bytes)
|
||||||
|
if width and height:
|
||||||
|
page.set("ImageWidth", str(width))
|
||||||
|
page.set("ImageHeight", str(height))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_tree(root: "ET.Element") -> str:
|
||||||
|
tree = ET.ElementTree(root)
|
||||||
|
try:
|
||||||
|
ET.indent(tree, space=" ")
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
return ('<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||||
|
+ ET.tostring(root, encoding="unicode"))
|
||||||
|
|
||||||
|
|
||||||
|
class KavitaVolumeCoverUpdater:
|
||||||
|
"""
|
||||||
|
Scans the Kavita library for chapters whose volume was unknown at move
|
||||||
|
time and back-fills volume + volume cover once MangaDex / MangaBaka
|
||||||
|
provide the data. Runs periodically on a background thread.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
kavita_path : Root of the Kavita library (series folders inside).
|
||||||
|
matches_cache : MatchesCache — provides the MangaBaka series ID per
|
||||||
|
series (mandatory; folders without a match are skipped).
|
||||||
|
language : ComicInfo language (passed to ComicInfoBuilder).
|
||||||
|
request_timeout : HTTP timeout in seconds.
|
||||||
|
log_path : File that receives one line per updated chapter.
|
||||||
|
Default: <kavita_path>/volume_updater.log
|
||||||
|
schedule : Cron expression (5 fields) defining when scans run,
|
||||||
|
e.g. "0 19 * * 1,4" = 19:00 every Monday and
|
||||||
|
Thursday. Evaluated in local time — set the TZ env
|
||||||
|
var inside Docker. Default: "0 19 * * 1,4".
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
kavita_path,
|
||||||
|
*,
|
||||||
|
matches_cache: MatchesCache,
|
||||||
|
language: str = "en",
|
||||||
|
request_timeout: int = 30,
|
||||||
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
|
log_path=None,
|
||||||
|
schedule: str = "0 19 * * 1,4"):
|
||||||
|
self._dst = Path(kavita_path)
|
||||||
|
self._matches_cache = matches_cache
|
||||||
|
self._language = language
|
||||||
|
self._timeout = request_timeout
|
||||||
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
self._log_path = (Path(log_path) if log_path
|
||||||
|
else self._dst / "volume_updater.log")
|
||||||
|
self._cron = CronSchedule(schedule)
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
|
||||||
|
_apply_mangabaka_rate_limit(session)
|
||||||
|
self._session = session
|
||||||
|
|
||||||
|
self._mal = MALResolver(request_timeout=request_timeout)
|
||||||
|
self._al = AniListResolver(request_timeout=request_timeout)
|
||||||
|
self._vol_resolver = MangaDexVolumeResolver(
|
||||||
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._works_resolver = MangaBakaWorksResolver(
|
||||||
|
api_base_url=api_base_url,
|
||||||
|
request_timeout=request_timeout, session=session)
|
||||||
|
|
||||||
|
self._stop = threading.Event()
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cron API (mirrors SuwayomiFolderWatcher)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Starts the periodic scan thread. Non-blocking."""
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return
|
||||||
|
self._stop.clear()
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
print(f"[{_now()}] [updater] scanning {self._dst} "
|
||||||
|
f"on cron '{self._cron.expression}'", flush=True)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stops the scan thread (current scan finishes its series first)."""
|
||||||
|
self._stop.set()
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join(timeout=10)
|
||||||
|
|
||||||
|
def wait(self) -> None:
|
||||||
|
"""Blocks the calling thread until stop() is invoked."""
|
||||||
|
self._stop.wait()
|
||||||
|
|
||||||
|
def _loop(self) -> None:
|
||||||
|
while not self._stop.is_set():
|
||||||
|
next_run = self._cron.next_after(datetime.now())
|
||||||
|
wait = max(0.0, (next_run - datetime.now()).total_seconds())
|
||||||
|
print(f"[{_now()}] [updater] next scheduled scan: "
|
||||||
|
f"{next_run.isoformat(timespec='minutes')}", flush=True)
|
||||||
|
if self._stop.wait(wait):
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary = self.update_all()
|
||||||
|
print(f"[{_now()}] [updater] scan done: "
|
||||||
|
f"{summary['series_updated']} series / "
|
||||||
|
f"{summary['chapters_updated']} chapters updated",
|
||||||
|
flush=True)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public scan API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def update_all(self) -> dict:
|
||||||
|
"""
|
||||||
|
Scans every series folder under the Kavita root once.
|
||||||
|
Returns {"series_scanned": n, "series_updated": n, "chapters_updated": n}.
|
||||||
|
"""
|
||||||
|
summary = {"series_scanned": 0, "series_updated": 0,
|
||||||
|
"chapters_updated": 0}
|
||||||
|
if not self._dst.is_dir():
|
||||||
|
print(f"[updater] kavita path missing: {self._dst}", flush=True)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
for series_dir in sorted(self._dst.iterdir()):
|
||||||
|
if self._stop.is_set():
|
||||||
|
break
|
||||||
|
if not series_dir.is_dir():
|
||||||
|
continue
|
||||||
|
summary["series_scanned"] += 1
|
||||||
|
try:
|
||||||
|
updated = self.update_series(series_dir)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
|
||||||
|
continue
|
||||||
|
if updated:
|
||||||
|
summary["series_updated"] += 1
|
||||||
|
summary["chapters_updated"] += updated
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def update_series(self, series_dir: Path) -> int:
|
||||||
|
"""
|
||||||
|
Updates one series folder. Returns the number of updated chapters.
|
||||||
|
|
||||||
|
Only chapters listed in chapter_index.json with ``"volume": null``
|
||||||
|
are candidates; everything else costs no further host reads.
|
||||||
|
"""
|
||||||
|
index = _load_chapter_index(series_dir)
|
||||||
|
chapters: dict = index["chapter"]
|
||||||
|
if not chapters:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
missing = [num for num, e in chapters.items()
|
||||||
|
if isinstance(e, dict) and e.get("volume") is None]
|
||||||
|
if not missing:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
match_key, match = self._find_match_for_folder(series_dir.name)
|
||||||
|
if not match or not match.get("mangabakaId"):
|
||||||
|
print(f"[updater] {series_dir.name}: no matches.json entry — skip",
|
||||||
|
flush=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Builder resolves metadata via the cached MangaBaka ID and gives us
|
||||||
|
# the exact same chapter→volume logic the mover uses.
|
||||||
|
builder = ComicInfoBuilder(
|
||||||
|
match_key, chapter=missing[0],
|
||||||
|
api_base_url=self._api_base_url,
|
||||||
|
language=self._language,
|
||||||
|
request_timeout=self._timeout,
|
||||||
|
session=self._session,
|
||||||
|
volume_resolver=self._vol_resolver,
|
||||||
|
works_resolver=self._works_resolver,
|
||||||
|
mal_resolver=self._mal,
|
||||||
|
al_resolver=self._al,
|
||||||
|
matches_cache=self._matches_cache,
|
||||||
|
)
|
||||||
|
md = builder.fetch_metadata()
|
||||||
|
series_id = str(md.get("id") or "")
|
||||||
|
|
||||||
|
# Resolve volumes for all null-volume chapters first (API only).
|
||||||
|
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
|
||||||
|
for num in sorted(missing, key=_chapter_sort_value):
|
||||||
|
builder.chapter = num
|
||||||
|
try:
|
||||||
|
volume = builder._determine_volume()
|
||||||
|
except Exception:
|
||||||
|
volume = None
|
||||||
|
if not volume:
|
||||||
|
continue
|
||||||
|
updates[num] = {"volume": volume,
|
||||||
|
"cover": self._fetch_cover(series_id, volume)}
|
||||||
|
|
||||||
|
if not updates:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
first = min(chapters, key=_chapter_sort_value)
|
||||||
|
updated = 0
|
||||||
|
|
||||||
|
for num, up in updates.items():
|
||||||
|
entry = chapters[num]
|
||||||
|
cbz = series_dir / (entry.get("archiveName") or "")
|
||||||
|
if not entry.get("archiveName") or not cbz.is_file():
|
||||||
|
print(f"[updater] {series_dir.name} ch.{num}: archive missing "
|
||||||
|
f"({entry.get('archiveName')!r}) — skip", flush=True)
|
||||||
|
continue
|
||||||
|
# The first chapter gets a full metadata rebuild (Kavita reads
|
||||||
|
# series metadata from it); other chapters only a volume edit.
|
||||||
|
ok, cover_swapped = self._apply_update(
|
||||||
|
cbz, builder, num,
|
||||||
|
volume=up["volume"], cover=up["cover"],
|
||||||
|
full_rebuild=(num == first))
|
||||||
|
if not ok:
|
||||||
|
continue
|
||||||
|
entry["volume"] = _normalise_volume_value(up["volume"])
|
||||||
|
updated += 1
|
||||||
|
self._log(f"{series_dir.name} | chapter {num} -> volume "
|
||||||
|
f"{up['volume']} | cover "
|
||||||
|
f"{'replaced' if cover_swapped else 'kept'} | {cbz.name}")
|
||||||
|
|
||||||
|
# Refresh the first chapter's metadata when any other chapter changed
|
||||||
|
# (skip when it was already fully rebuilt in the loop above).
|
||||||
|
if updated and first not in updates:
|
||||||
|
first_entry = chapters.get(first) or {}
|
||||||
|
cbz = series_dir / (first_entry.get("archiveName") or "")
|
||||||
|
if first_entry.get("archiveName") and cbz.is_file():
|
||||||
|
ok, _ = self._apply_update(
|
||||||
|
cbz, builder, first,
|
||||||
|
volume=None, cover=None, full_rebuild=True)
|
||||||
|
if ok:
|
||||||
|
self._log(f"{series_dir.name} | chapter {first} | "
|
||||||
|
f"first-chapter metadata refreshed | {cbz.name}")
|
||||||
|
|
||||||
|
if updated:
|
||||||
|
_save_chapter_index(series_dir, index)
|
||||||
|
return updated
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Matching Kavita folder -> matches.json entry
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _find_match_for_folder(self, folder_name: str) -> tuple:
|
||||||
|
"""
|
||||||
|
Maps a Kavita series folder back to its matches.json entry.
|
||||||
|
|
||||||
|
The folder was created as ``_sanitize_dirname(mangabaka_title)``, so
|
||||||
|
the comparison sanitizes each entry's mangabakaName the same way.
|
||||||
|
Falls back to the folderTitle (Suwayomi name) for robustness.
|
||||||
|
Returns (match_key, entry) or (None, None).
|
||||||
|
"""
|
||||||
|
target = folder_name.strip().casefold()
|
||||||
|
matches = self._matches_cache.all()["matches"]
|
||||||
|
for key, entry in matches.items():
|
||||||
|
name = entry.get("mangabakaName") or ""
|
||||||
|
if name and _sanitize_dirname(name).strip().casefold() == target:
|
||||||
|
return key, entry
|
||||||
|
for key, entry in matches.items():
|
||||||
|
folder = entry.get("folderTitle") or key
|
||||||
|
if _sanitize_dirname(folder).strip().casefold() == target:
|
||||||
|
return key, entry
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cover download
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
|
||||||
|
"""
|
||||||
|
Downloads the MangaBaka volume cover.
|
||||||
|
Returns ("000<ext>", bytes) or None when no cover is available.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
url = self._works_resolver.get_cover_for_volume(series_id, volume)
|
||||||
|
except Exception:
|
||||||
|
url = None
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
resp = self._session.get(url, timeout=self._timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException:
|
||||||
|
return None
|
||||||
|
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
|
||||||
|
return (f"000{ext}", resp.content)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Archive update (single read + single write per archive)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _apply_update(self, cbz_path: Path, builder: ComicInfoBuilder,
|
||||||
|
chapter_num: str, *,
|
||||||
|
volume, cover, full_rebuild: bool) -> tuple:
|
||||||
|
"""
|
||||||
|
Rewrites one CBZ archive with an updated ComicInfo.xml and (when
|
||||||
|
provided and a placeholder exists) a new cover image.
|
||||||
|
|
||||||
|
Returns (ok, cover_swapped).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(cbz_path, "r") as zin:
|
||||||
|
try:
|
||||||
|
old_xml = zin.read("ComicInfo.xml")
|
||||||
|
except KeyError:
|
||||||
|
old_xml = None
|
||||||
|
|
||||||
|
if full_rebuild or old_xml is None:
|
||||||
|
new_xml = self._build_full_xml(
|
||||||
|
builder, chapter_num, old_xml, cover)
|
||||||
|
else:
|
||||||
|
new_xml = self._edit_volume_xml(old_xml, volume, cover)
|
||||||
|
if new_xml is None: # parse error -> full rebuild
|
||||||
|
new_xml = self._build_full_xml(
|
||||||
|
builder, chapter_num, None, cover)
|
||||||
|
|
||||||
|
infos = zin.infolist()
|
||||||
|
# Cover is only ever *replaced*: inserting one would shift
|
||||||
|
# every <Pages> image index in the existing XML.
|
||||||
|
has_placeholder = any(
|
||||||
|
Path(i.filename).stem == "000"
|
||||||
|
and Path(i.filename).suffix.lower() in _IMAGE_EXTS
|
||||||
|
for i in infos)
|
||||||
|
swap_cover = cover is not None and has_placeholder
|
||||||
|
|
||||||
|
tmp = cbz_path.with_suffix(cbz_path.suffix + ".tmp")
|
||||||
|
wrote_xml = False
|
||||||
|
with zipfile.ZipFile(tmp, "w", zipfile.ZIP_STORED) as zout:
|
||||||
|
for info in infos:
|
||||||
|
p = Path(info.filename)
|
||||||
|
if (swap_cover and p.stem == "000"
|
||||||
|
and p.suffix.lower() in _IMAGE_EXTS):
|
||||||
|
zout.writestr(cover[0], cover[1])
|
||||||
|
elif info.filename == "ComicInfo.xml":
|
||||||
|
zout.writestr("ComicInfo.xml", new_xml)
|
||||||
|
wrote_xml = True
|
||||||
|
else:
|
||||||
|
zout.writestr(info, zin.read(info.filename))
|
||||||
|
if not wrote_xml:
|
||||||
|
zout.writestr("ComicInfo.xml", new_xml)
|
||||||
|
tmp.replace(cbz_path)
|
||||||
|
return True, swap_cover
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[updater] {cbz_path.name}: update failed: {exc}",
|
||||||
|
flush=True)
|
||||||
|
return False, False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# XML builders
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _edit_volume_xml(self, old_xml: bytes, volume,
|
||||||
|
cover) -> "str | None":
|
||||||
|
"""
|
||||||
|
Sets <Volume> in an existing ComicInfo.xml and refreshes the
|
||||||
|
FrontCover page attributes when the cover gets replaced.
|
||||||
|
Returns None when the XML is unparseable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(old_xml)
|
||||||
|
except ET.ParseError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
el = root.find("Volume")
|
||||||
|
if el is None:
|
||||||
|
el = ET.SubElement(root, "Volume")
|
||||||
|
el.text = str(volume)
|
||||||
|
|
||||||
|
if cover is not None:
|
||||||
|
pages = root.find("Pages")
|
||||||
|
if pages is not None:
|
||||||
|
_update_page0_attrs(pages, cover[1])
|
||||||
|
|
||||||
|
return _serialize_tree(root)
|
||||||
|
|
||||||
|
def _build_full_xml(self, builder: ComicInfoBuilder, chapter_num: str,
|
||||||
|
old_xml: "bytes | None", cover) -> str:
|
||||||
|
"""
|
||||||
|
Rebuilds the complete ComicInfo.xml via ComicInfoBuilder (fresh
|
||||||
|
MangaBaka/MAL metadata). Suwayomi-derived fields and the <Pages>
|
||||||
|
section are carried over from the previous XML.
|
||||||
|
"""
|
||||||
|
builder.chapter = chapter_num # also clears builder page state
|
||||||
|
builder._suwayomi_data = (
|
||||||
|
ComicInfoBuilder.read_comicinfo_fields(old_xml) if old_xml else {})
|
||||||
|
root = builder._build_tree().getroot()
|
||||||
|
|
||||||
|
if old_xml:
|
||||||
|
try:
|
||||||
|
old_root = ET.fromstring(old_xml)
|
||||||
|
except ET.ParseError:
|
||||||
|
old_root = None
|
||||||
|
if old_root is not None:
|
||||||
|
pages = old_root.find("Pages")
|
||||||
|
if pages is not None and cover is not None:
|
||||||
|
_update_page0_attrs(pages, cover[1])
|
||||||
|
page_count = old_root.find("PageCount")
|
||||||
|
if page_count is not None:
|
||||||
|
root.append(page_count)
|
||||||
|
if pages is not None:
|
||||||
|
root.append(pages)
|
||||||
|
|
||||||
|
return _serialize_tree(root)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _log(self, msg: str) -> None:
|
||||||
|
line = f"[{_now()}] {msg}"
|
||||||
|
print(f"[updater] {msg}", flush=True)
|
||||||
|
try:
|
||||||
|
self._log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with self._log_path.open("a", encoding="utf-8") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
except OSError as exc:
|
||||||
|
print(f"[updater] cannot write log file {self._log_path}: {exc}",
|
||||||
|
flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Usage example
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
||||||
|
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||||
|
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
||||||
|
|
||||||
|
updater = KavitaVolumeCoverUpdater(
|
||||||
|
KAVITA_PATH,
|
||||||
|
matches_cache=MatchesCache(MATCHES_PATH),
|
||||||
|
)
|
||||||
|
|
||||||
|
# One-shot scan (no cron thread):
|
||||||
|
summary = updater.update_all()
|
||||||
|
print(f"\n[updater] {summary}")
|
||||||
|
|
||||||
|
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
|
||||||
|
# updater.start()
|
||||||
|
# updater.wait()
|
||||||
Reference in New Issue
Block a user