1 Commits

Author SHA1 Message Date
johannesbot 8a44b85a48 cleanup
Build and Deploy / build (push) Successful in 23s
Build and Deploy / deploy (push) Successful in 41s
Build Release / build (push) Successful in 16s
2026-06-11 21:31:20 +02:00
9 changed files with 276 additions and 153 deletions
+2
View File
@@ -17,6 +17,8 @@ services:
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
# Persistent cover cache (empty = temp dir, deleted on container stop)
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
TZ: "${TZ:-Europe/Berlin}"
ports:
+5 -1
View File
@@ -32,12 +32,13 @@ Environment variables
default "0 19 * * 1,4" = 19:00 every Mon + Thu
(local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log
COVER_CACHE_PATH directory for the persistent cover cache;
empty (default) = temporary cache, deleted on exit
"""
from __future__ import annotations
import os
import signal
import sys
from pathlib import Path
@@ -94,6 +95,7 @@ def main() -> int:
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True)
@@ -114,6 +116,7 @@ def main() -> int:
request_timeout=request_timeout,
delete_source=delete_source,
matches_cache=matches_cache,
cover_cache_dir=cover_cache_path,
)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
@@ -130,6 +133,7 @@ def main() -> int:
request_timeout=request_timeout,
log_path=updater_log,
schedule=updater_schedule,
cover_cache_dir=cover_cache_path,
)
updater.start()
except ValueError as exc:
+63 -64
View File
@@ -37,7 +37,6 @@ Data source notes
from __future__ import annotations
import difflib
import re
import xml.etree.ElementTree as ET
from pathlib import Path
@@ -50,6 +49,7 @@ from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache
try:
from PIL import Image
@@ -179,7 +179,8 @@ class ComicInfoBuilder:
works_resolver: "MangaBakaWorksResolver | None" = None,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None,
matches_cache: "MatchesCache | None" = None):
matches_cache: "MatchesCache | None" = None,
cover_cache: "CoverCache | None" = None):
if not manga_title or not str(manga_title).strip():
raise ValueError("manga_title must not be empty.")
@@ -210,6 +211,7 @@ class ComicInfoBuilder:
self._al_resolver = al_resolver or AniListResolver(
request_timeout=request_timeout)
self._matches_cache = matches_cache
self._cover_cache = cover_cache or _default_cover_cache()
self._metadata: "dict | None" = None
self._pages: list[dict] = []
@@ -580,11 +582,13 @@ class ComicInfoBuilder:
# ======================================================================
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
"""
Downloads the cover for the current chapter/volume.
Fetches the cover for the current chapter/volume and writes it into
`folder`.
If a volume is known and a volume-specific cover exists in MangaBaka
works, that cover is used. Otherwise the series default cover is
downloaded (raw variant preferred).
If a volume is known and a volume-specific cover exists in MangaBaka,
that cover is used; otherwise the series default cover. The image
itself comes from the CoverCache, so a cover shared by many chapters
is downloaded only once.
"""
md = self._get_metadata()
volume = self._determine_volume()
@@ -602,18 +606,13 @@ class ComicInfoBuilder:
if not cover_url:
cover_url = _pick_cover_url(md.get("cover"))
if not cover_url:
fetched = self._cover_cache.get(cover_url) if cover_url else None
if not fetched:
return None
try:
resp = self._session.get(cover_url, timeout=self.request_timeout)
resp.raise_for_status()
except requests.RequestException:
return None
ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
data, ext = fetched
target = folder / f"{cover_filename}{ext}"
target.write_bytes(resp.content)
target.write_bytes(data)
return target
# ======================================================================
@@ -656,6 +655,41 @@ class ComicInfoBuilder:
"manhua": ("zh-latn",),
}
@staticmethod
def _pick_best_title(titles, language_codes: tuple,
prefer_trait: "str | None" = None) -> "str | None":
"""
Picks the highest-scoring entry from a MangaBaka `titles` list for
any of the given language codes.
Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
(+1); first seen wins on ties. Returns None when no entry matches.
"""
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in language_codes:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if prefer_trait and prefer_trait in traits:
score += 4
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score, best_title = score, title
return best_title
@classmethod
def _romanized_for_native(cls, md: dict) -> "str | None":
"""
@@ -686,30 +720,7 @@ class ComicInfoBuilder:
return None
titles = md.get("titles") or md.get("alt_titles") or []
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in langs:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score = score
best_title = title
return best_title
return cls._pick_best_title(titles, langs)
def _get_sort_title(self, md: dict) -> "str | None":
"""
@@ -745,31 +756,7 @@ class ComicInfoBuilder:
def pick(language_codes: tuple, prefer_trait: "str | None" = None
) -> "str | None":
"""Picks the best title entry for any of the given language codes."""
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in language_codes:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if prefer_trait and prefer_trait in traits:
score += 4
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score, best_title = score, title
return best_title
return self._pick_best_title(titles, language_codes, prefer_trait)
result: dict[str, str] = {}
@@ -1080,6 +1067,18 @@ class ComicInfoBuilder:
# generic image-block picker; _pick_cover_url is kept for backward compat.
_pick_cover_url = _pick_image_url
# Shared fallback CoverCache for builders constructed without an explicit
# one (temporary directory, removed at process exit). Created lazily so
# importing this module never touches the filesystem.
_shared_cover_cache: "CoverCache | None" = None
def _default_cover_cache() -> CoverCache:
global _shared_cover_cache
if _shared_cover_cache is None:
_shared_cover_cache = CoverCache()
return _shared_cover_cache
def _pick_thumbnail_url(cover) -> "str | None":
"""
+136
View File
@@ -0,0 +1,136 @@
"""
cover_cache.py
==============
Disk-backed cache for downloaded cover images, keyed by URL.
Why
---
The mover packs every chapter of a series individually, and each chapter
needs a cover image. Without caching, the same multi-megabyte cover is
downloaded once per chapter (20-chapter volume = 20 identical downloads).
This cache turns that into a single download per unique URL.
Persistence
-----------
* ``cache_dir`` given -> covers persist across runs in that directory.
* ``cache_dir`` omitted -> a temporary directory is used and removed
automatically when the process exits.
Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
from the URL / Content-Type at download time so it can be reused when
writing the cover into a chapter folder.
Thread safety: downloads are serialised per cache instance, so concurrent
mover / updater threads never fetch the same URL twice.
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import atexit
import hashlib
import shutil
import tempfile
import threading
from pathlib import Path
import requests
class CoverCache:
"""
URL-keyed image cache on disk.
Parameters
----------
cache_dir : Directory for cached covers. None -> temporary
directory, deleted automatically at process exit.
session : Optional shared requests.Session for downloads.
request_timeout : HTTP timeout in seconds.
"""
def __init__(self, cache_dir=None, *,
session: "requests.Session | None" = None,
request_timeout: int = 30):
self._persistent = cache_dir is not None
if self._persistent:
self._dir = Path(cache_dir)
self._dir.mkdir(parents=True, exist_ok=True)
else:
self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
atexit.register(self.close)
self._session = session or requests.Session()
self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
self._timeout = request_timeout
self._lock = threading.Lock()
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get(self, url: str) -> "tuple[bytes, str] | None":
"""
Returns ``(image_bytes, extension)`` for the URL — from cache when
present, downloading (and caching) otherwise. Returns None when
the URL is empty or the download fails.
"""
if not url:
return None
with self._lock:
cached = self._find_cached(url)
if cached is not None:
try:
return cached.read_bytes(), cached.suffix
except OSError:
pass # unreadable cache file -> re-download
return self._download(url)
def clear(self) -> None:
"""Removes all cached covers (the directory itself is kept)."""
with self._lock:
for f in self._dir.glob("*"):
if f.is_file():
f.unlink(missing_ok=True)
def close(self) -> None:
"""Deletes the cache directory when it is non-persistent."""
if not self._persistent:
shutil.rmtree(self._dir, ignore_errors=True)
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
@staticmethod
def _key(url: str) -> str:
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
def _find_cached(self, url: str) -> "Path | None":
matches = list(self._dir.glob(self._key(url) + ".*"))
return matches[0] if matches else None
def _download(self, url: str) -> "tuple[bytes, str] | None":
try:
resp = self._session.get(url, timeout=self._timeout)
resp.raise_for_status()
except requests.RequestException:
return None
# Local import avoids a circular module dependency:
# ComicInfoBuilder imports CoverCache at module level.
from ComicInfoBuilder import _guess_extension
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
target = self._dir / f"{self._key(url)}{ext}"
try:
tmp = target.with_suffix(target.suffix + ".tmp")
tmp.write_bytes(resp.content)
tmp.replace(target)
except OSError:
pass # cache write failure is non-fatal — still return the bytes
return resp.content, ext
+21 -9
View File
@@ -52,7 +52,7 @@ from pathlib import Path
import requests
from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
from ComicInfoBuilder import ComicInfoBuilder, _IMAGE_EXTS
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
@@ -62,6 +62,7 @@ from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule
from CoverCache import CoverCache
try:
from PIL import Image
@@ -133,6 +134,8 @@ class KavitaVolumeCoverUpdater:
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time — set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit.
"""
def __init__(self,
@@ -143,7 +146,8 @@ class KavitaVolumeCoverUpdater:
request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None,
schedule: str = "0 19 * * 1,4"):
schedule: str = "0 19 * * 1,4",
cover_cache_dir=None):
self._dst = Path(kavita_path)
self._matches_cache = matches_cache
self._language = language
@@ -165,6 +169,8 @@ class KavitaVolumeCoverUpdater:
self._works_resolver = MangaBakaWorksResolver(
api_base_url=api_base_url,
request_timeout=request_timeout, session=session)
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
@@ -225,6 +231,12 @@ class KavitaVolumeCoverUpdater:
print(f"[updater] kavita path missing: {self._dst}", flush=True)
return summary
# The whole point of a scan is detecting volume assignments added
# since the previous run — start from fresh API data, not the
# process-lifetime resolver caches.
self._vol_resolver.clear_cache()
self._works_resolver.clear_cache()
for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
@@ -277,6 +289,7 @@ class KavitaVolumeCoverUpdater:
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
cover_cache=self._cover_cache,
)
md = builder.fetch_metadata()
series_id = str(md.get("id") or "")
@@ -367,7 +380,8 @@ class KavitaVolumeCoverUpdater:
# ------------------------------------------------------------------
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
"""
Downloads the MangaBaka volume cover.
Fetches the MangaBaka volume cover via the CoverCache (one download
per unique URL, even across chapters sharing a volume).
Returns ("000<ext>", bytes) or None when no cover is available.
"""
try:
@@ -376,13 +390,11 @@ class KavitaVolumeCoverUpdater:
url = None
if not url:
return None
try:
resp = self._session.get(url, timeout=self._timeout)
resp.raise_for_status()
except requests.RequestException:
fetched = self._cover_cache.get(url)
if not fetched:
return None
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
return (f"000{ext}", resp.content)
data, ext = fetched
return (f"000{ext}", data)
# ------------------------------------------------------------------
# Archive update (single read + single write per archive)
+33 -38
View File
@@ -119,26 +119,18 @@ class MangaBakaWorksResolver:
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get_works(self, series_id: str) -> list[dict]:
def _fetch_all_pages(self, endpoint: str) -> list[dict]:
"""
Returns volume-level works for a series, filtered to those that have
a usable cover image. Results are cached per series.
Pages through the API (limit=50) until the response returns an empty
page, collecting all works before applying the cover filter.
Pages through a MangaBaka list endpoint (limit=50 per page) and
returns all collected `data` items. Network errors end the
pagination early; items fetched so far are returned.
"""
if not series_id:
return []
if series_id in self._cache:
return self._cache[series_id]
all_works: list[dict] = []
items: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/works",
f"{self.api_base_url}/series/{endpoint}",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
@@ -146,17 +138,35 @@ class MangaBakaWorksResolver:
page_data = resp.json().get("data") or []
if not page_data:
break
all_works.extend(page_data)
items.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
if not all_works:
return []
pass
return items
def get_works(self, series_id: str) -> list[dict]:
"""
Returns volume-level works for a series, filtered to those that have
a usable cover image.
Non-empty results are cached per series; empty results are not, so
works added on MangaBaka later become visible without restarting
the (long-running) process.
"""
if not series_id:
return []
if series_id in self._cache:
return self._cache[series_id]
all_works = self._fetch_all_pages(f"{series_id}/works")
# Discard works that carry no usable cover
works_with_cover = [w for w in all_works if w.get("images")]
self._cache[series_id] = works_with_cover
if works_with_cover:
self._cache[series_id] = works_with_cover
return works_with_cover
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
@@ -190,25 +200,7 @@ class MangaBakaWorksResolver:
if series_id in self._images_cache:
return self._images_cache[series_id]
raw_items: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/images",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
resp.raise_for_status()
page_data = resp.json().get("data") or []
if not page_data:
break
raw_items.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
pass
raw_items = self._fetch_all_pages(f"{series_id}/images")
# Group by normalised volume index; collect all languages per volume.
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
@@ -236,7 +228,10 @@ class MangaBakaWorksResolver:
if url:
result[norm] = url
self._images_cache[series_id] = result
# Empty results are not cached — covers added on MangaBaka later
# become visible without restarting the long-running process.
if result:
self._images_cache[series_id] = result
return result
def get_cover_for_volume_from_images(self, series_id: str,
-1
View File
@@ -43,7 +43,6 @@ Dependencies
from __future__ import annotations
import difflib
import re
import requests
-1
View File
@@ -29,7 +29,6 @@ from __future__ import annotations
import queue
import threading
import time
from datetime import datetime
from pathlib import Path
+16 -39
View File
@@ -52,7 +52,8 @@ from pathlib import Path
import requests
from ComicInfoBuilder import (ComicInfoBuilder, _pick_cover_url, _pick_thumbnail_url, _SEARCH_TYPES)
from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
_SEARCH_TYPES, _IMAGE_EXTS, _natural_key)
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
@@ -60,9 +61,9 @@ from AniListResolver import AniListResolver
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
# JSON file written into each Kavita series folder, listing every chapter
@@ -133,11 +134,6 @@ _SOURCE_LABEL_RE = re.compile(
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
def _natural_key(name: str) -> list:
return [int(p) if p.isdigit() else p.lower()
for p in re.split(r"(\d+)", name)]
def _sanitize_dirname(name: str) -> str:
"""
Makes a string safe to use as a Windows (or SMB) directory name.
@@ -192,34 +188,6 @@ def _clean_suwayomi_title(title: str) -> str:
return _SOURCE_LABEL_RE.sub("", title).strip()
def _mal_id_from_metadata(md: dict) -> "int | None":
"""Extracts the MAL ID from a MangaBaka series dict's source map."""
for raw_key, info in (md.get("source") or {}).items():
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
if isinstance(info, dict):
mal_id = info.get("id")
if mal_id is not None:
try:
return int(mal_id)
except (TypeError, ValueError):
pass
return None
def _al_id_from_metadata(md: dict) -> "int | None":
"""Extracts the AniList ID from a MangaBaka series dict's source map."""
for raw_key, info in (md.get("source") or {}).items():
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
if isinstance(info, dict):
al_id = info.get("id")
if al_id is not None:
try:
return int(al_id)
except (TypeError, ValueError):
pass
return None
def _chapter_image_size(chapter_dir: Path) -> int:
"""Returns the total file size of all images in a chapter folder."""
return sum(
@@ -336,6 +304,8 @@ class SuwayomiMover:
language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack.
cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit.
"""
def __init__(self,
@@ -348,7 +318,8 @@ class SuwayomiMover:
request_timeout: int = 30,
delete_source: bool = True,
matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1"):
api_base_url: str = "https://api.mangabaka.dev/v1",
cover_cache_dir=None):
self._src = Path(suwayomi_path)
self._dst = Path(kavita_path)
self._language = language
@@ -371,6 +342,8 @@ class SuwayomiMover:
request_timeout=request_timeout, session=session)
self._works_resolver = MangaBakaWorksResolver(
request_timeout=request_timeout, session=session)
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
@@ -550,6 +523,7 @@ class SuwayomiMover:
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
cover_cache=self._cover_cache,
)
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
@@ -604,9 +578,9 @@ class SuwayomiMover:
# AniList is used as fallback when MAL returns no characters/staff.
person_result: "dict | None" = None
if self._person_updater:
mal_id = (_mal_id_from_metadata(md) if md else None
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
or self._mal.find_mal_id(builder_title))
al_id = _al_id_from_metadata(md) if md else None
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
if mal_id or al_id:
try:
person_result = self._person_updater.update_for_manga(
@@ -661,11 +635,14 @@ class SuwayomiMover:
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
import os
# Local (no-Docker) smoke test. Adjust paths to your environment.
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
# matches.json lives next to this script during local testing.
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"