cleanup
This commit is contained in:
@@ -17,6 +17,8 @@ services:
|
|||||||
# (local time, see TZ)
|
# (local time, see TZ)
|
||||||
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
|
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
|
||||||
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
|
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
|
||||||
|
# Persistent cover cache (empty = temp dir, deleted on container stop)
|
||||||
|
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
|
||||||
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
|
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
|
||||||
TZ: "${TZ:-Europe/Berlin}"
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@@ -32,12 +32,13 @@ Environment variables
|
|||||||
default "0 19 * * 1,4" = 19:00 every Mon + Thu
|
default "0 19 * * 1,4" = 19:00 every Mon + Thu
|
||||||
(local time — set TZ inside the container!)
|
(local time — set TZ inside the container!)
|
||||||
UPDATER_LOG default /config/volume_updater.log
|
UPDATER_LOG default /config/volume_updater.log
|
||||||
|
COVER_CACHE_PATH directory for the persistent cover cache;
|
||||||
|
empty (default) = temporary cache, deleted on exit
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import signal
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -94,6 +95,7 @@ def main() -> int:
|
|||||||
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
||||||
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
||||||
|
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
||||||
|
|
||||||
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
||||||
print(f"[main] kavita = {kavita_path}", flush=True)
|
print(f"[main] kavita = {kavita_path}", flush=True)
|
||||||
@@ -114,6 +116,7 @@ def main() -> int:
|
|||||||
request_timeout=request_timeout,
|
request_timeout=request_timeout,
|
||||||
delete_source=delete_source,
|
delete_source=delete_source,
|
||||||
matches_cache=matches_cache,
|
matches_cache=matches_cache,
|
||||||
|
cover_cache_dir=cover_cache_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
||||||
@@ -130,6 +133,7 @@ def main() -> int:
|
|||||||
request_timeout=request_timeout,
|
request_timeout=request_timeout,
|
||||||
log_path=updater_log,
|
log_path=updater_log,
|
||||||
schedule=updater_schedule,
|
schedule=updater_schedule,
|
||||||
|
cover_cache_dir=cover_cache_path,
|
||||||
)
|
)
|
||||||
updater.start()
|
updater.start()
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
|||||||
+63
-64
@@ -37,7 +37,6 @@ Data source notes
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import difflib
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -50,6 +49,7 @@ from MALResolver import MALResolver
|
|||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CoverCache import CoverCache
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@@ -179,7 +179,8 @@ class ComicInfoBuilder:
|
|||||||
works_resolver: "MangaBakaWorksResolver | None" = None,
|
works_resolver: "MangaBakaWorksResolver | None" = None,
|
||||||
mal_resolver: "MALResolver | None" = None,
|
mal_resolver: "MALResolver | None" = None,
|
||||||
al_resolver: "AniListResolver | None" = None,
|
al_resolver: "AniListResolver | None" = None,
|
||||||
matches_cache: "MatchesCache | None" = None):
|
matches_cache: "MatchesCache | None" = None,
|
||||||
|
cover_cache: "CoverCache | None" = None):
|
||||||
if not manga_title or not str(manga_title).strip():
|
if not manga_title or not str(manga_title).strip():
|
||||||
raise ValueError("manga_title must not be empty.")
|
raise ValueError("manga_title must not be empty.")
|
||||||
|
|
||||||
@@ -210,6 +211,7 @@ class ComicInfoBuilder:
|
|||||||
self._al_resolver = al_resolver or AniListResolver(
|
self._al_resolver = al_resolver or AniListResolver(
|
||||||
request_timeout=request_timeout)
|
request_timeout=request_timeout)
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
|
self._cover_cache = cover_cache or _default_cover_cache()
|
||||||
|
|
||||||
self._metadata: "dict | None" = None
|
self._metadata: "dict | None" = None
|
||||||
self._pages: list[dict] = []
|
self._pages: list[dict] = []
|
||||||
@@ -580,11 +582,13 @@ class ComicInfoBuilder:
|
|||||||
# ======================================================================
|
# ======================================================================
|
||||||
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
|
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
|
||||||
"""
|
"""
|
||||||
Downloads the cover for the current chapter/volume.
|
Fetches the cover for the current chapter/volume and writes it into
|
||||||
|
`folder`.
|
||||||
|
|
||||||
If a volume is known and a volume-specific cover exists in MangaBaka
|
If a volume is known and a volume-specific cover exists in MangaBaka,
|
||||||
works, that cover is used. Otherwise the series default cover is
|
that cover is used; otherwise the series default cover. The image
|
||||||
downloaded (raw variant preferred).
|
itself comes from the CoverCache, so a cover shared by many chapters
|
||||||
|
is downloaded only once.
|
||||||
"""
|
"""
|
||||||
md = self._get_metadata()
|
md = self._get_metadata()
|
||||||
volume = self._determine_volume()
|
volume = self._determine_volume()
|
||||||
@@ -602,18 +606,13 @@ class ComicInfoBuilder:
|
|||||||
if not cover_url:
|
if not cover_url:
|
||||||
cover_url = _pick_cover_url(md.get("cover"))
|
cover_url = _pick_cover_url(md.get("cover"))
|
||||||
|
|
||||||
if not cover_url:
|
fetched = self._cover_cache.get(cover_url) if cover_url else None
|
||||||
|
if not fetched:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
data, ext = fetched
|
||||||
resp = self._session.get(cover_url, timeout=self.request_timeout)
|
|
||||||
resp.raise_for_status()
|
|
||||||
except requests.RequestException:
|
|
||||||
return None
|
|
||||||
|
|
||||||
ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
|
|
||||||
target = folder / f"{cover_filename}{ext}"
|
target = folder / f"{cover_filename}{ext}"
|
||||||
target.write_bytes(resp.content)
|
target.write_bytes(data)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
@@ -656,6 +655,41 @@ class ComicInfoBuilder:
|
|||||||
"manhua": ("zh-latn",),
|
"manhua": ("zh-latn",),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _pick_best_title(titles, language_codes: tuple,
|
||||||
|
prefer_trait: "str | None" = None) -> "str | None":
|
||||||
|
"""
|
||||||
|
Picks the highest-scoring entry from a MangaBaka `titles` list for
|
||||||
|
any of the given language codes.
|
||||||
|
|
||||||
|
Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
|
||||||
|
(+1); first seen wins on ties. Returns None when no entry matches.
|
||||||
|
"""
|
||||||
|
if not isinstance(titles, list):
|
||||||
|
return None
|
||||||
|
best_score = -1
|
||||||
|
best_title: "str | None" = None
|
||||||
|
for entry in titles:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||||
|
if lang not in language_codes:
|
||||||
|
continue
|
||||||
|
title = entry.get("title")
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
traits = entry.get("traits") or []
|
||||||
|
score = 0
|
||||||
|
if prefer_trait and prefer_trait in traits:
|
||||||
|
score += 4
|
||||||
|
if "official" in traits:
|
||||||
|
score += 2
|
||||||
|
if entry.get("is_primary"):
|
||||||
|
score += 1
|
||||||
|
if score > best_score:
|
||||||
|
best_score, best_title = score, title
|
||||||
|
return best_title
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _romanized_for_native(cls, md: dict) -> "str | None":
|
def _romanized_for_native(cls, md: dict) -> "str | None":
|
||||||
"""
|
"""
|
||||||
@@ -686,30 +720,7 @@ class ComicInfoBuilder:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
titles = md.get("titles") or md.get("alt_titles") or []
|
titles = md.get("titles") or md.get("alt_titles") or []
|
||||||
if not isinstance(titles, list):
|
return cls._pick_best_title(titles, langs)
|
||||||
return None
|
|
||||||
|
|
||||||
best_score = -1
|
|
||||||
best_title: "str | None" = None
|
|
||||||
for entry in titles:
|
|
||||||
if not isinstance(entry, dict):
|
|
||||||
continue
|
|
||||||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
|
||||||
if lang not in langs:
|
|
||||||
continue
|
|
||||||
title = entry.get("title")
|
|
||||||
if not title:
|
|
||||||
continue
|
|
||||||
traits = entry.get("traits") or []
|
|
||||||
score = 0
|
|
||||||
if "official" in traits:
|
|
||||||
score += 2
|
|
||||||
if entry.get("is_primary"):
|
|
||||||
score += 1
|
|
||||||
if score > best_score:
|
|
||||||
best_score = score
|
|
||||||
best_title = title
|
|
||||||
return best_title
|
|
||||||
|
|
||||||
def _get_sort_title(self, md: dict) -> "str | None":
|
def _get_sort_title(self, md: dict) -> "str | None":
|
||||||
"""
|
"""
|
||||||
@@ -745,31 +756,7 @@ class ComicInfoBuilder:
|
|||||||
|
|
||||||
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
||||||
) -> "str | None":
|
) -> "str | None":
|
||||||
"""Picks the best title entry for any of the given language codes."""
|
return self._pick_best_title(titles, language_codes, prefer_trait)
|
||||||
if not isinstance(titles, list):
|
|
||||||
return None
|
|
||||||
best_score = -1
|
|
||||||
best_title: "str | None" = None
|
|
||||||
for entry in titles:
|
|
||||||
if not isinstance(entry, dict):
|
|
||||||
continue
|
|
||||||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
|
||||||
if lang not in language_codes:
|
|
||||||
continue
|
|
||||||
title = entry.get("title")
|
|
||||||
if not title:
|
|
||||||
continue
|
|
||||||
traits = entry.get("traits") or []
|
|
||||||
score = 0
|
|
||||||
if prefer_trait and prefer_trait in traits:
|
|
||||||
score += 4
|
|
||||||
if "official" in traits:
|
|
||||||
score += 2
|
|
||||||
if entry.get("is_primary"):
|
|
||||||
score += 1
|
|
||||||
if score > best_score:
|
|
||||||
best_score, best_title = score, title
|
|
||||||
return best_title
|
|
||||||
|
|
||||||
result: dict[str, str] = {}
|
result: dict[str, str] = {}
|
||||||
|
|
||||||
@@ -1080,6 +1067,18 @@ class ComicInfoBuilder:
|
|||||||
# generic image-block picker; _pick_cover_url is kept for backward compat.
|
# generic image-block picker; _pick_cover_url is kept for backward compat.
|
||||||
_pick_cover_url = _pick_image_url
|
_pick_cover_url = _pick_image_url
|
||||||
|
|
||||||
|
# Shared fallback CoverCache for builders constructed without an explicit
|
||||||
|
# one (temporary directory, removed at process exit). Created lazily so
|
||||||
|
# importing this module never touches the filesystem.
|
||||||
|
_shared_cover_cache: "CoverCache | None" = None
|
||||||
|
|
||||||
|
|
||||||
|
def _default_cover_cache() -> CoverCache:
|
||||||
|
global _shared_cover_cache
|
||||||
|
if _shared_cover_cache is None:
|
||||||
|
_shared_cover_cache = CoverCache()
|
||||||
|
return _shared_cover_cache
|
||||||
|
|
||||||
|
|
||||||
def _pick_thumbnail_url(cover) -> "str | None":
|
def _pick_thumbnail_url(cover) -> "str | None":
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -0,0 +1,136 @@
|
|||||||
|
"""
|
||||||
|
cover_cache.py
|
||||||
|
==============
|
||||||
|
|
||||||
|
Disk-backed cache for downloaded cover images, keyed by URL.
|
||||||
|
|
||||||
|
Why
|
||||||
|
---
|
||||||
|
The mover packs every chapter of a series individually, and each chapter
|
||||||
|
needs a cover image. Without caching, the same multi-megabyte cover is
|
||||||
|
downloaded once per chapter (20-chapter volume = 20 identical downloads).
|
||||||
|
This cache turns that into a single download per unique URL.
|
||||||
|
|
||||||
|
Persistence
|
||||||
|
-----------
|
||||||
|
* ``cache_dir`` given -> covers persist across runs in that directory.
|
||||||
|
* ``cache_dir`` omitted -> a temporary directory is used and removed
|
||||||
|
automatically when the process exits.
|
||||||
|
|
||||||
|
Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
|
||||||
|
from the URL / Content-Type at download time so it can be reused when
|
||||||
|
writing the cover into a chapter folder.
|
||||||
|
|
||||||
|
Thread safety: downloads are serialised per cache instance, so concurrent
|
||||||
|
mover / updater threads never fetch the same URL twice.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
requests -> pip install requests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import atexit
|
||||||
|
import hashlib
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class CoverCache:
|
||||||
|
"""
|
||||||
|
URL-keyed image cache on disk.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cache_dir : Directory for cached covers. None -> temporary
|
||||||
|
directory, deleted automatically at process exit.
|
||||||
|
session : Optional shared requests.Session for downloads.
|
||||||
|
request_timeout : HTTP timeout in seconds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache_dir=None, *,
|
||||||
|
session: "requests.Session | None" = None,
|
||||||
|
request_timeout: int = 30):
|
||||||
|
self._persistent = cache_dir is not None
|
||||||
|
if self._persistent:
|
||||||
|
self._dir = Path(cache_dir)
|
||||||
|
self._dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
else:
|
||||||
|
self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
|
||||||
|
atexit.register(self.close)
|
||||||
|
|
||||||
|
self._session = session or requests.Session()
|
||||||
|
self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
|
||||||
|
self._timeout = request_timeout
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get(self, url: str) -> "tuple[bytes, str] | None":
|
||||||
|
"""
|
||||||
|
Returns ``(image_bytes, extension)`` for the URL — from cache when
|
||||||
|
present, downloading (and caching) otherwise. Returns None when
|
||||||
|
the URL is empty or the download fails.
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
cached = self._find_cached(url)
|
||||||
|
if cached is not None:
|
||||||
|
try:
|
||||||
|
return cached.read_bytes(), cached.suffix
|
||||||
|
except OSError:
|
||||||
|
pass # unreadable cache file -> re-download
|
||||||
|
|
||||||
|
return self._download(url)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""Removes all cached covers (the directory itself is kept)."""
|
||||||
|
with self._lock:
|
||||||
|
for f in self._dir.glob("*"):
|
||||||
|
if f.is_file():
|
||||||
|
f.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Deletes the cache directory when it is non-persistent."""
|
||||||
|
if not self._persistent:
|
||||||
|
shutil.rmtree(self._dir, ignore_errors=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def _key(url: str) -> str:
|
||||||
|
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
|
||||||
|
|
||||||
|
def _find_cached(self, url: str) -> "Path | None":
|
||||||
|
matches = list(self._dir.glob(self._key(url) + ".*"))
|
||||||
|
return matches[0] if matches else None
|
||||||
|
|
||||||
|
def _download(self, url: str) -> "tuple[bytes, str] | None":
|
||||||
|
try:
|
||||||
|
resp = self._session.get(url, timeout=self._timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Local import avoids a circular module dependency:
|
||||||
|
# ComicInfoBuilder imports CoverCache at module level.
|
||||||
|
from ComicInfoBuilder import _guess_extension
|
||||||
|
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
|
||||||
|
|
||||||
|
target = self._dir / f"{self._key(url)}{ext}"
|
||||||
|
try:
|
||||||
|
tmp = target.with_suffix(target.suffix + ".tmp")
|
||||||
|
tmp.write_bytes(resp.content)
|
||||||
|
tmp.replace(target)
|
||||||
|
except OSError:
|
||||||
|
pass # cache write failure is non-fatal — still return the bytes
|
||||||
|
return resp.content, ext
|
||||||
@@ -52,7 +52,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
|
from ComicInfoBuilder import ComicInfoBuilder, _IMAGE_EXTS
|
||||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
@@ -62,6 +62,7 @@ from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
|
|||||||
_sanitize_dirname, _normalise_volume_value)
|
_sanitize_dirname, _normalise_volume_value)
|
||||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
from CronSchedule import CronSchedule
|
from CronSchedule import CronSchedule
|
||||||
|
from CoverCache import CoverCache
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@@ -133,6 +134,8 @@ class KavitaVolumeCoverUpdater:
|
|||||||
e.g. "0 19 * * 1,4" = 19:00 every Monday and
|
e.g. "0 19 * * 1,4" = 19:00 every Monday and
|
||||||
Thursday. Evaluated in local time — set the TZ env
|
Thursday. Evaluated in local time — set the TZ env
|
||||||
var inside Docker. Default: "0 19 * * 1,4".
|
var inside Docker. Default: "0 19 * * 1,4".
|
||||||
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
|
temporary cache, deleted at process exit.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -143,7 +146,8 @@ class KavitaVolumeCoverUpdater:
|
|||||||
request_timeout: int = 30,
|
request_timeout: int = 30,
|
||||||
api_base_url: str = "https://api.mangabaka.dev/v1",
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
log_path=None,
|
log_path=None,
|
||||||
schedule: str = "0 19 * * 1,4"):
|
schedule: str = "0 19 * * 1,4",
|
||||||
|
cover_cache_dir=None):
|
||||||
self._dst = Path(kavita_path)
|
self._dst = Path(kavita_path)
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
self._language = language
|
self._language = language
|
||||||
@@ -165,6 +169,8 @@ class KavitaVolumeCoverUpdater:
|
|||||||
self._works_resolver = MangaBakaWorksResolver(
|
self._works_resolver = MangaBakaWorksResolver(
|
||||||
api_base_url=api_base_url,
|
api_base_url=api_base_url,
|
||||||
request_timeout=request_timeout, session=session)
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._cover_cache = CoverCache(
|
||||||
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
self._stop = threading.Event()
|
self._stop = threading.Event()
|
||||||
self._thread: "threading.Thread | None" = None
|
self._thread: "threading.Thread | None" = None
|
||||||
@@ -225,6 +231,12 @@ class KavitaVolumeCoverUpdater:
|
|||||||
print(f"[updater] kavita path missing: {self._dst}", flush=True)
|
print(f"[updater] kavita path missing: {self._dst}", flush=True)
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
# The whole point of a scan is detecting volume assignments added
|
||||||
|
# since the previous run — start from fresh API data, not the
|
||||||
|
# process-lifetime resolver caches.
|
||||||
|
self._vol_resolver.clear_cache()
|
||||||
|
self._works_resolver.clear_cache()
|
||||||
|
|
||||||
for series_dir in sorted(self._dst.iterdir()):
|
for series_dir in sorted(self._dst.iterdir()):
|
||||||
if self._stop.is_set():
|
if self._stop.is_set():
|
||||||
break
|
break
|
||||||
@@ -277,6 +289,7 @@ class KavitaVolumeCoverUpdater:
|
|||||||
mal_resolver=self._mal,
|
mal_resolver=self._mal,
|
||||||
al_resolver=self._al,
|
al_resolver=self._al,
|
||||||
matches_cache=self._matches_cache,
|
matches_cache=self._matches_cache,
|
||||||
|
cover_cache=self._cover_cache,
|
||||||
)
|
)
|
||||||
md = builder.fetch_metadata()
|
md = builder.fetch_metadata()
|
||||||
series_id = str(md.get("id") or "")
|
series_id = str(md.get("id") or "")
|
||||||
@@ -367,7 +380,8 @@ class KavitaVolumeCoverUpdater:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
|
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
|
||||||
"""
|
"""
|
||||||
Downloads the MangaBaka volume cover.
|
Fetches the MangaBaka volume cover via the CoverCache (one download
|
||||||
|
per unique URL, even across chapters sharing a volume).
|
||||||
Returns ("000<ext>", bytes) or None when no cover is available.
|
Returns ("000<ext>", bytes) or None when no cover is available.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
@@ -376,13 +390,11 @@ class KavitaVolumeCoverUpdater:
|
|||||||
url = None
|
url = None
|
||||||
if not url:
|
if not url:
|
||||||
return None
|
return None
|
||||||
try:
|
fetched = self._cover_cache.get(url)
|
||||||
resp = self._session.get(url, timeout=self._timeout)
|
if not fetched:
|
||||||
resp.raise_for_status()
|
|
||||||
except requests.RequestException:
|
|
||||||
return None
|
return None
|
||||||
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
|
data, ext = fetched
|
||||||
return (f"000{ext}", resp.content)
|
return (f"000{ext}", data)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Archive update (single read + single write per archive)
|
# Archive update (single read + single write per archive)
|
||||||
|
|||||||
@@ -119,26 +119,18 @@ class MangaBakaWorksResolver:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public API
|
# Public API
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def get_works(self, series_id: str) -> list[dict]:
|
def _fetch_all_pages(self, endpoint: str) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Returns volume-level works for a series, filtered to those that have
|
Pages through a MangaBaka list endpoint (limit=50 per page) and
|
||||||
a usable cover image. Results are cached per series.
|
returns all collected `data` items. Network errors end the
|
||||||
|
pagination early; items fetched so far are returned.
|
||||||
Pages through the API (limit=50) until the response returns an empty
|
|
||||||
page, collecting all works before applying the cover filter.
|
|
||||||
"""
|
"""
|
||||||
if not series_id:
|
items: list[dict] = []
|
||||||
return []
|
|
||||||
|
|
||||||
if series_id in self._cache:
|
|
||||||
return self._cache[series_id]
|
|
||||||
|
|
||||||
all_works: list[dict] = []
|
|
||||||
page = 1
|
page = 1
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
resp = self._session.get(
|
resp = self._session.get(
|
||||||
f"{self.api_base_url}/series/{series_id}/works",
|
f"{self.api_base_url}/series/{endpoint}",
|
||||||
params={"limit": 50, "page": page},
|
params={"limit": 50, "page": page},
|
||||||
timeout=self.request_timeout,
|
timeout=self.request_timeout,
|
||||||
)
|
)
|
||||||
@@ -146,16 +138,34 @@ class MangaBakaWorksResolver:
|
|||||||
page_data = resp.json().get("data") or []
|
page_data = resp.json().get("data") or []
|
||||||
if not page_data:
|
if not page_data:
|
||||||
break
|
break
|
||||||
all_works.extend(page_data)
|
items.extend(page_data)
|
||||||
if len(page_data) < 50:
|
if len(page_data) < 50:
|
||||||
break
|
break
|
||||||
page += 1
|
page += 1
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
if not all_works:
|
pass
|
||||||
|
return items
|
||||||
|
|
||||||
|
def get_works(self, series_id: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Returns volume-level works for a series, filtered to those that have
|
||||||
|
a usable cover image.
|
||||||
|
|
||||||
|
Non-empty results are cached per series; empty results are not, so
|
||||||
|
works added on MangaBaka later become visible without restarting
|
||||||
|
the (long-running) process.
|
||||||
|
"""
|
||||||
|
if not series_id:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
if series_id in self._cache:
|
||||||
|
return self._cache[series_id]
|
||||||
|
|
||||||
|
all_works = self._fetch_all_pages(f"{series_id}/works")
|
||||||
|
|
||||||
# Discard works that carry no usable cover
|
# Discard works that carry no usable cover
|
||||||
works_with_cover = [w for w in all_works if w.get("images")]
|
works_with_cover = [w for w in all_works if w.get("images")]
|
||||||
|
if works_with_cover:
|
||||||
self._cache[series_id] = works_with_cover
|
self._cache[series_id] = works_with_cover
|
||||||
return works_with_cover
|
return works_with_cover
|
||||||
|
|
||||||
@@ -190,25 +200,7 @@ class MangaBakaWorksResolver:
|
|||||||
if series_id in self._images_cache:
|
if series_id in self._images_cache:
|
||||||
return self._images_cache[series_id]
|
return self._images_cache[series_id]
|
||||||
|
|
||||||
raw_items: list[dict] = []
|
raw_items = self._fetch_all_pages(f"{series_id}/images")
|
||||||
page = 1
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
resp = self._session.get(
|
|
||||||
f"{self.api_base_url}/series/{series_id}/images",
|
|
||||||
params={"limit": 50, "page": page},
|
|
||||||
timeout=self.request_timeout,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
page_data = resp.json().get("data") or []
|
|
||||||
if not page_data:
|
|
||||||
break
|
|
||||||
raw_items.extend(page_data)
|
|
||||||
if len(page_data) < 50:
|
|
||||||
break
|
|
||||||
page += 1
|
|
||||||
except requests.RequestException:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Group by normalised volume index; collect all languages per volume.
|
# Group by normalised volume index; collect all languages per volume.
|
||||||
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
|
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
|
||||||
@@ -236,6 +228,9 @@ class MangaBakaWorksResolver:
|
|||||||
if url:
|
if url:
|
||||||
result[norm] = url
|
result[norm] = url
|
||||||
|
|
||||||
|
# Empty results are not cached — covers added on MangaBaka later
|
||||||
|
# become visible without restarting the long-running process.
|
||||||
|
if result:
|
||||||
self._images_cache[series_id] = result
|
self._images_cache[series_id] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import difflib
|
import difflib
|
||||||
import re
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import queue
|
import queue
|
||||||
import threading
|
import threading
|
||||||
import time
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|||||||
+16
-39
@@ -52,7 +52,8 @@ from pathlib import Path
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from ComicInfoBuilder import (ComicInfoBuilder, _pick_cover_url, _pick_thumbnail_url, _SEARCH_TYPES)
|
from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
|
||||||
|
_SEARCH_TYPES, _IMAGE_EXTS, _natural_key)
|
||||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
@@ -60,9 +61,9 @@ from AniListResolver import AniListResolver
|
|||||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CoverCache import CoverCache
|
||||||
|
|
||||||
|
|
||||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
|
||||||
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
||||||
|
|
||||||
# JSON file written into each Kavita series folder, listing every chapter
|
# JSON file written into each Kavita series folder, listing every chapter
|
||||||
@@ -133,11 +134,6 @@ _SOURCE_LABEL_RE = re.compile(
|
|||||||
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
|
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
|
||||||
|
|
||||||
|
|
||||||
def _natural_key(name: str) -> list:
|
|
||||||
return [int(p) if p.isdigit() else p.lower()
|
|
||||||
for p in re.split(r"(\d+)", name)]
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_dirname(name: str) -> str:
|
def _sanitize_dirname(name: str) -> str:
|
||||||
"""
|
"""
|
||||||
Makes a string safe to use as a Windows (or SMB) directory name.
|
Makes a string safe to use as a Windows (or SMB) directory name.
|
||||||
@@ -192,34 +188,6 @@ def _clean_suwayomi_title(title: str) -> str:
|
|||||||
return _SOURCE_LABEL_RE.sub("", title).strip()
|
return _SOURCE_LABEL_RE.sub("", title).strip()
|
||||||
|
|
||||||
|
|
||||||
def _mal_id_from_metadata(md: dict) -> "int | None":
|
|
||||||
"""Extracts the MAL ID from a MangaBaka series dict's source map."""
|
|
||||||
for raw_key, info in (md.get("source") or {}).items():
|
|
||||||
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
|
|
||||||
if isinstance(info, dict):
|
|
||||||
mal_id = info.get("id")
|
|
||||||
if mal_id is not None:
|
|
||||||
try:
|
|
||||||
return int(mal_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _al_id_from_metadata(md: dict) -> "int | None":
|
|
||||||
"""Extracts the AniList ID from a MangaBaka series dict's source map."""
|
|
||||||
for raw_key, info in (md.get("source") or {}).items():
|
|
||||||
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
|
|
||||||
if isinstance(info, dict):
|
|
||||||
al_id = info.get("id")
|
|
||||||
if al_id is not None:
|
|
||||||
try:
|
|
||||||
return int(al_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _chapter_image_size(chapter_dir: Path) -> int:
|
def _chapter_image_size(chapter_dir: Path) -> int:
|
||||||
"""Returns the total file size of all images in a chapter folder."""
|
"""Returns the total file size of all images in a chapter folder."""
|
||||||
return sum(
|
return sum(
|
||||||
@@ -336,6 +304,8 @@ class SuwayomiMover:
|
|||||||
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
||||||
request_timeout : HTTP timeout in seconds for all API / image requests.
|
request_timeout : HTTP timeout in seconds for all API / image requests.
|
||||||
delete_source : Remove the source chapter folder after successful pack.
|
delete_source : Remove the source chapter folder after successful pack.
|
||||||
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
|
temporary cache, deleted at process exit.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -348,7 +318,8 @@ class SuwayomiMover:
|
|||||||
request_timeout: int = 30,
|
request_timeout: int = 30,
|
||||||
delete_source: bool = True,
|
delete_source: bool = True,
|
||||||
matches_cache: "MatchesCache | None" = None,
|
matches_cache: "MatchesCache | None" = None,
|
||||||
api_base_url: str = "https://api.mangabaka.dev/v1"):
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
|
cover_cache_dir=None):
|
||||||
self._src = Path(suwayomi_path)
|
self._src = Path(suwayomi_path)
|
||||||
self._dst = Path(kavita_path)
|
self._dst = Path(kavita_path)
|
||||||
self._language = language
|
self._language = language
|
||||||
@@ -371,6 +342,8 @@ class SuwayomiMover:
|
|||||||
request_timeout=request_timeout, session=session)
|
request_timeout=request_timeout, session=session)
|
||||||
self._works_resolver = MangaBakaWorksResolver(
|
self._works_resolver = MangaBakaWorksResolver(
|
||||||
request_timeout=request_timeout, session=session)
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._cover_cache = CoverCache(
|
||||||
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
self._person_updater: "KavitaPersonUpdater | None" = None
|
self._person_updater: "KavitaPersonUpdater | None" = None
|
||||||
if kavita_base_url and kavita_api_key:
|
if kavita_base_url and kavita_api_key:
|
||||||
@@ -550,6 +523,7 @@ class SuwayomiMover:
|
|||||||
mal_resolver=self._mal,
|
mal_resolver=self._mal,
|
||||||
al_resolver=self._al,
|
al_resolver=self._al,
|
||||||
matches_cache=self._matches_cache,
|
matches_cache=self._matches_cache,
|
||||||
|
cover_cache=self._cover_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
||||||
@@ -604,9 +578,9 @@ class SuwayomiMover:
|
|||||||
# AniList is used as fallback when MAL returns no characters/staff.
|
# AniList is used as fallback when MAL returns no characters/staff.
|
||||||
person_result: "dict | None" = None
|
person_result: "dict | None" = None
|
||||||
if self._person_updater:
|
if self._person_updater:
|
||||||
mal_id = (_mal_id_from_metadata(md) if md else None
|
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
|
||||||
or self._mal.find_mal_id(builder_title))
|
or self._mal.find_mal_id(builder_title))
|
||||||
al_id = _al_id_from_metadata(md) if md else None
|
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
|
||||||
if mal_id or al_id:
|
if mal_id or al_id:
|
||||||
try:
|
try:
|
||||||
person_result = self._person_updater.update_for_manga(
|
person_result = self._person_updater.update_for_manga(
|
||||||
@@ -661,11 +635,14 @@ class SuwayomiMover:
|
|||||||
# Usage example
|
# Usage example
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import os
|
||||||
|
|
||||||
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
||||||
|
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
|
||||||
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
|
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
|
||||||
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||||
KAVITA_URL = "http://192.168.2.2:5000"
|
KAVITA_URL = "http://192.168.2.2:5000"
|
||||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
|
||||||
|
|
||||||
# matches.json lives next to this script during local testing.
|
# matches.json lives next to this script during local testing.
|
||||||
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
||||||
|
|||||||
Reference in New Issue
Block a user