11 Commits

Author SHA1 Message Date
johannesbot 4996026b91 release CI/CD
Build and Deploy / build (push) Successful in 17s
Build and Deploy / deploy (push) Successful in 23s
Build Release / build (push) Successful in 15s
2026-06-11 20:02:06 +02:00
johannesbot 7fbe5f94a5 release CI/CD
Build and Deploy / build (push) Successful in 52s
Build and Deploy / deploy (push) Successful in 36s
Build Release / build (push) Failing after 8s
2026-06-11 19:57:11 +02:00
johannesbot 4557137ad0 feat(updater): add KavitaVolumeCoverUpdater for back-filling null volumes
Build and Deploy / build (push) Successful in 22s
Build and Deploy / deploy (push) Successful in 36s
Introduce a new background service that periodically re-checks chapters
whose volume could not be resolved at move time.

- Add KavitaVolumeCoverUpdater.py to resolve null volumes via MangaDex,
  update ComicInfo.xml in-archive, and swap in MangaBaka volume covers
- Wire updater into main.py entry point with UPDATER_ENABLED env flag
- Add UPDATER_ENABLED env var to docker-compose.prod.yml
- Update CronSchedule.py to schedule updater runs
2026-06-10 13:09:01 +02:00
johannesbot 59ea1f8c8f added chapter index json
Build and Deploy / deploy (push) Successful in 36s
Build and Deploy / build (push) Successful in 23s
2026-06-10 12:30:24 +02:00
johannesbot d724e9ffcd LocalizedSeries from kanji to romanji
Build and Deploy / build (push) Successful in 26s
Build and Deploy / deploy (push) Successful in 42s
2026-06-10 10:41:04 +02:00
johannesbot 2f30ac4e05 matches double key fix
Build and Deploy / build (push) Successful in 26s
Build and Deploy / deploy (push) Successful in 41s
2026-06-06 20:18:11 +02:00
johannesbot 97e4b10ac8 missing cover fix
Build and Deploy / build (push) Successful in 23s
Build and Deploy / deploy (push) Successful in 39s
2026-05-30 09:23:58 +02:00
johannesbot 054f974ddc update btn for webui
Build and Deploy / build (push) Successful in 23s
Build and Deploy / deploy (push) Successful in 38s
2026-05-29 08:22:03 +02:00
johannesbot 3288ab9de7 err response
Build and Deploy / build (push) Successful in 22s
Build and Deploy / deploy (push) Successful in 36s
2026-05-26 21:18:57 +02:00
johannesbot 12ef254424 ffs
Build and Deploy / build (push) Successful in 20s
Build and Deploy / deploy (push) Successful in 35s
2026-05-26 21:13:44 +02:00
johannesbot 76050eeda9 fix
Build and Deploy / build (push) Successful in 22s
Build and Deploy / deploy (push) Successful in 35s
2026-05-26 21:12:02 +02:00
10 changed files with 1266 additions and 160 deletions
+28
View File
@@ -0,0 +1,28 @@
name: Build Release
on:
push:
tags:
- 'v*'
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Login to Gitea Registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
- name: Extract Tag
id: tag
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
- name: Build Image
run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }} .
- name: Push Image
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }}
+8
View File
@@ -11,6 +11,14 @@ services:
DELETE_SOURCE: "${DELETE_SOURCE:-true}" DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}" MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
WEB_PORT: "${WEB_PORT:-8080}" WEB_PORT: "${WEB_PORT:-8080}"
# Volume/cover back-fill updater
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
TZ: "${TZ:-Europe/Berlin}"
ports: ports:
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}" - "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
volumes: volumes:
+28 -2
View File
@@ -27,6 +27,11 @@ Environment variables
MATCH_PATH default /config/matches.json MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json) WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0 WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (volume/cover back-fill cron)
UPDATER_SCHEDULE cron expression for the updater scans,
default "0 19 * * 1,4" = 19:00 every Mon + Thu
(local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log
""" """
from __future__ import annotations from __future__ import annotations
@@ -43,6 +48,7 @@ from src.SuwayomiMover import SuwayomiMover # noqa: E402
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402 from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
from src.MatchesCache import MatchesCache # noqa: E402 from src.MatchesCache import MatchesCache # noqa: E402
from src.MatchesWebApp import MatchesWebApp # noqa: E402 from src.MatchesWebApp import MatchesWebApp # noqa: E402
from src.KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -74,7 +80,7 @@ def _env_bool(name: str, default: bool) -> bool:
def main() -> int: def main() -> int:
suwayomi_path = _env_str("SUWAYOMI_PATH", r"M:\config\downloads\mangas") suwayomi_path = _env_str("SUWAYOMI_PATH", r"/mnt/suwayomi")
kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita") kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita")
kavita_url = _env_str("KAVITA_URL", "http://kavita:5000") kavita_url = _env_str("KAVITA_URL", "http://kavita:5000")
kavita_api_key = _env_str("KAVITA_API_KEY", "") kavita_api_key = _env_str("KAVITA_API_KEY", "")
@@ -82,9 +88,12 @@ def main() -> int:
settle_seconds = _env_int("SETTLE_SECONDS", 600) settle_seconds = _env_int("SETTLE_SECONDS", 600)
request_timeout = _env_int("REQUEST_TIMEOUT", 30) request_timeout = _env_int("REQUEST_TIMEOUT", 30)
delete_source = _env_bool("DELETE_SOURCE", True) delete_source = _env_bool("DELETE_SOURCE", True)
match_path = _env_str("MATCH_PATH", "matches.json") match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080) web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True)
@@ -112,6 +121,23 @@ def main() -> int:
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port) web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
web_app.start() web_app.start()
if updater_enabled:
try:
updater = KavitaVolumeCoverUpdater(
kavita_path,
matches_cache=matches_cache,
language=language,
request_timeout=request_timeout,
log_path=updater_log,
schedule=updater_schedule,
)
updater.start()
except ValueError as exc:
# Invalid cron expression — keep the service up, just without
# the updater, and make the config error obvious in the logs.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"volume/cover updater DISABLED", flush=True)
# def shutdown(signum, _frame): # def shutdown(signum, _frame):
# print(f"[main] received signal {signum}", flush=True) # print(f"[main] received signal {signum}", flush=True)
# watcher.stop() # watcher.stop()
+87 -59
View File
@@ -45,7 +45,7 @@ from pathlib import Path
import requests import requests
from MangadexVolumeResolver import MangaDexVolumeResolver from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from MatchesCache import MatchesCache from MatchesCache import MatchesCache
@@ -439,8 +439,7 @@ class ComicInfoBuilder:
# ----- Title / Series ----------------------------------------------- # ----- Title / Series -----------------------------------------------
add("Title", sd.get("Title") or f"Chapter {self._chapter}") add("Title", sd.get("Title") or f"Chapter {self._chapter}")
add("Series", md.get("title") or self._manga_title) add("Series", md.get("title") or self._manga_title)
add("LocalizedSeries", add("LocalizedSeries", self._romanized_for_native(md))
md.get("native_title") or md.get("romanized_title"))
add("SeriesSort", self._get_sort_title(md)) add("SeriesSort", self._get_sort_title(md))
add("Number", sd.get("Number") or self._chapter) add("Number", sd.get("Number") or self._chapter)
add("Count", md.get("total_chapters")) add("Count", md.get("total_chapters"))
@@ -648,6 +647,70 @@ class ComicInfoBuilder:
# ====================================================================== # ======================================================================
# Title helpers # Title helpers
# ====================================================================== # ======================================================================
# Mapping from series type to the matching romanized language code(s)
# in the MangaBaka titles array. Used to pick the correct romaji /
# romaja / pinyin for LocalizedSeries.
_ROMANIZED_LANG_BY_TYPE = {
"manga": ("ja-latn", "ja-romaji"),
"manhwa": ("ko-latn", "ko-romaji"),
"manhua": ("zh-latn",),
}
@classmethod
def _romanized_for_native(cls, md: dict) -> "str | None":
"""
Picks the romanized title in the manga's original language from the
``titles`` array.
The series' original language is inferred from ``type``::
manga -> ja-Latn (Japanese romaji)
manhwa -> ko-Latn (Korean romaja)
manhua -> zh-Latn (Chinese pinyin)
Among multiple entries for the matching language, the one with the
highest "quality score" wins (``official`` trait > ``is_primary`` >
first seen).
The root-level ``romanized_title`` field is **deliberately not used
as a fallback** — MangaBaka frequently stores a different language's
romanization there (e.g. Korean romaja on a Japanese manga), which
is exactly what this function is meant to avoid.
Returns ``None`` when no romanized title is available for the
inferred language.
"""
mtype = (md.get("type") or "").lower()
langs = cls._ROMANIZED_LANG_BY_TYPE.get(mtype)
if not langs:
return None
titles = md.get("titles") or md.get("alt_titles") or []
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in langs:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score = score
best_title = title
return best_title
def _get_sort_title(self, md: dict) -> "str | None": def _get_sort_title(self, md: dict) -> "str | None":
""" """
Returns the SeriesSort title in the configured language. Returns the SeriesSort title in the configured language.
@@ -967,12 +1030,14 @@ class ComicInfoBuilder:
return unique return unique
@staticmethod @staticmethod
def _read_existing_comicinfo(folder: Path) -> dict: def read_comicinfo_fields(xml_source) -> dict:
xml_path = folder / "ComicInfo.xml" """
if not xml_path.is_file(): Parses ComicInfo.xml content (bytes or str) and returns the fields
return {} relevant as supplementary Suwayomi data. Returns {} on parse errors.
Reusable for XML read directly from a CBZ archive (no extraction).
"""
try: try:
root = ET.parse(xml_path).getroot() root = ET.fromstring(xml_source)
except ET.ParseError: except ET.ParseError:
return {} return {}
@@ -986,6 +1051,16 @@ class ComicInfoBuilder:
data[tag] = child.text.strip() data[tag] = child.text.strip()
return data return data
@staticmethod
def _read_existing_comicinfo(folder: Path) -> dict:
xml_path = folder / "ComicInfo.xml"
if not xml_path.is_file():
return {}
try:
return ComicInfoBuilder.read_comicinfo_fields(xml_path.read_bytes())
except OSError:
return {}
@staticmethod @staticmethod
def _image_dimensions(path: Path): def _image_dimensions(path: Path):
if not _HAS_PIL: if not _HAS_PIL:
@@ -998,59 +1073,12 @@ class ComicInfoBuilder:
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Module-level helpers (shared with MangaBakaWorksResolver logic) # Module-level helpers
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
def _pick_cover_url(cover) -> "str | None":
"""
Selects the best cover URL from a MangaBaka cover object.
Real API shape (from `GET /v1/series/{id}` and `/works`): # Alias: _pick_image_url (from MangaBakaWorksResolver) is the canonical
{ # generic image-block picker; _pick_cover_url is kept for backward compat.
"raw": {"url": "...", "size": ..., "height": ..., "width": ...}, _pick_cover_url = _pick_image_url
"x150": {"x1": "...", "x2": "...", "x3": "..."},
"x250": {"x1": "...", "x2": "...", "x3": "..."},
"x350": {"x1": "...", "x2": "...", "x3": "..."}
}
Order of preference: raw original > x350@x3 > x250@x3 > x150@x3
(falling through to lower densities and sizes as needed).
"""
if not cover:
return None
if isinstance(cover, str):
return cover
if not isinstance(cover, dict):
return None
# 1) Preferred: the unscaled "raw" image
raw = cover.get("raw")
if isinstance(raw, dict):
url = raw.get("url")
if isinstance(url, str) and url:
return url
elif isinstance(raw, str) and raw:
return raw
# 2) Fallback: size-keyed variants, largest first, highest density first
for size_key in ("x350", "x250", "x150"):
variant = cover.get(size_key)
if isinstance(variant, dict):
for density in ("x3", "x2", "x1"):
url = variant.get(density)
if isinstance(url, str) and url:
return url
elif isinstance(variant, str) and variant:
return variant
# 3) Last-ditch fallback: any http URL anywhere in the structure
for val in cover.values():
if isinstance(val, str) and val.startswith("http"):
return val
if isinstance(val, dict):
for sub in val.values():
if isinstance(sub, str) and sub.startswith("http"):
return sub
return None
def _pick_thumbnail_url(cover) -> "str | None": def _pick_thumbnail_url(cover) -> "str | None":
+159
View File
@@ -0,0 +1,159 @@
"""
cron_schedule.py
================
Minimal cron-expression parser — no external dependency.
Supports the classic 5-field syntax::
┌──────── minute (0-59)
│ ┌────── hour (0-23)
│ │ ┌──── day of month (1-31)
│ │ │ ┌── month (1-12 or jan-dec)
│ │ │ │ ┌ day of week (0-7 or sun-sat; 0 and 7 = Sunday)
│ │ │ │ │
0 19 * * 1,4 -> 19:00 every Monday and Thursday
Field syntax: ``*``, single values, ranges (``a-b``), steps (``*/n``,
``a-b/n``) and comma lists. Month / weekday names (``jan``, ``mon``, …)
are accepted case-insensitively.
As in Vixie cron, when *both* day-of-month and day-of-week are restricted
the job runs when **either** matches.
Times are evaluated against the local system clock (``datetime.now()``) —
in Docker set the ``TZ`` environment variable so "19:00" means local time.
"""
from __future__ import annotations
from datetime import datetime, timedelta
_MONTH_NAMES = {"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6,
"jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12}
_DAY_NAMES = {"sun": 0, "mon": 1, "tue": 2, "wed": 3, "thu": 4,
"fri": 5, "sat": 6}
def _parse_value(token: str, lo: int, hi: int,
names: "dict[str, int] | None") -> int:
token = token.strip().lower()
if names and token in names:
return names[token]
try:
value = int(token)
except ValueError:
raise ValueError(f"invalid cron value {token!r}") from None
if not (lo <= value <= hi):
raise ValueError(f"cron value {value} out of range {lo}-{hi}")
return value
def _parse_field(field: str, lo: int, hi: int,
names: "dict[str, int] | None" = None) -> "set[int]":
"""Parses one cron field into the set of matching integer values."""
result: set[int] = set()
for part in field.split(","):
part = part.strip()
if not part:
raise ValueError(f"empty element in cron field {field!r}")
step = 1
if "/" in part:
part, step_text = part.split("/", 1)
try:
step = int(step_text)
except ValueError:
raise ValueError(f"invalid cron step {step_text!r}") from None
if step < 1:
raise ValueError(f"cron step must be >= 1, got {step}")
if part == "*":
start, end = lo, hi
elif "-" in part:
a, b = part.split("-", 1)
start = _parse_value(a, lo, hi, names)
end = _parse_value(b, lo, hi, names)
if end < start:
raise ValueError(f"inverted cron range {part!r}")
else:
start = end = _parse_value(part, lo, hi, names)
result.update(range(start, end + 1, step))
return result
class CronSchedule:
"""
Parsed 5-field cron expression with ``next_after()`` evaluation.
Usage::
cron = CronSchedule("0 19 * * mon,thu")
run_at = cron.next_after(datetime.now())
"""
def __init__(self, expression: str):
self.expression = expression.strip()
fields = self.expression.split()
if len(fields) != 5:
raise ValueError(
f"cron expression needs 5 fields "
f"(minute hour dom month dow), got {len(fields)}: "
f"{expression!r}")
minute, hour, dom, month, dow = fields
self._minutes = _parse_field(minute, 0, 59)
self._hours = _parse_field(hour, 0, 23)
self._dom = _parse_field(dom, 1, 31)
self._months = _parse_field(month, 1, 12, _MONTH_NAMES)
dow_values = _parse_field(dow, 0, 7, _DAY_NAMES)
# 7 is an alias for Sunday (= 0)
self._dow = {0 if v == 7 else v for v in dow_values}
# Vixie-cron rule: dom/dow are OR-combined when both are restricted.
self._dom_restricted = dom != "*"
self._dow_restricted = dow != "*"
def __repr__(self) -> str:
return f"CronSchedule({self.expression!r})"
# ------------------------------------------------------------------
def _day_matches(self, day: "datetime.date") -> bool:
if day.month not in self._months:
return False
dom_ok = day.day in self._dom
# Python: Monday=0 … Sunday=6 -> cron: Sunday=0 … Saturday=6
dow_ok = ((day.weekday() + 1) % 7) in self._dow
if self._dom_restricted and self._dow_restricted:
return dom_ok or dow_ok
if self._dom_restricted:
return dom_ok
if self._dow_restricted:
return dow_ok
return True
def next_after(self, dt: datetime) -> datetime:
"""
Returns the first matching time strictly after ``dt``
(second/microsecond precision is dropped).
"""
cand = (dt + timedelta(minutes=1)).replace(second=0, microsecond=0)
hours = sorted(self._hours)
minutes = sorted(self._minutes)
# Walk day by day (covers rare dom/month combos like Feb 29).
for _ in range(366 * 5):
if self._day_matches(cand.date()):
for h in hours:
if h < cand.hour:
continue
for m in minutes:
if h == cand.hour and m < cand.minute:
continue
return cand.replace(hour=h, minute=m)
cand = (cand + timedelta(days=1)).replace(hour=0, minute=0)
raise ValueError(
f"cron {self.expression!r}: no occurrence within 5 years")
+536
View File
@@ -0,0 +1,536 @@
"""
kavita_volume_cover_updater.py
==============================
Periodically re-checks chapters already moved to the Kavita library whose
volume could not be resolved at move time (``"volume": null`` in the
series' ``chapter_index.json``).
When MangaDex has since assigned the chapter to a volume, the updater:
1. writes the volume into ``chapter_index.json``,
2. updates ``<Volume>`` inside the chapter's ComicInfo.xml (in-archive),
3. downloads the MangaBaka volume cover and swaps it in for the
placeholder ``000.<ext>`` series cover, and
4. refreshes the *first* chapter's ComicInfo.xml with full metadata —
Kavita can be configured to take series metadata from the lowest
chapter, so it must reflect the latest state.
Host-IO policy
--------------
* Per series only ``chapter_index.json`` is read (no archive is opened to
discover its contents).
* Series without null-volume chapters are skipped before any API call.
* An archive is read+rewritten exactly once per update (single pass,
written to a ``.tmp`` file, then atomically replaced).
Every updated chapter is appended to a log file (one line per update).
Reused components
-----------------
* ``SuwayomiMover`` — chapter index helpers, dirname sanitizer
* ``ComicInfoBuilder`` — metadata fetch (matches-cache ID lookup),
chapter→volume resolution, XML build
* ``MangaBakaWorksResolver`` — volume covers (/images with /works fallback)
* ``MangaDexVolumeResolver`` — chapter→volume aggregate (shared cache)
* ``MangaBakaRateLimit`` — process-wide API throttle
Dependencies
------------
requests -> pip install requests
Pillow -> pip install pillow (optional, page-0 dimensions)
"""
from __future__ import annotations
import io
import threading
import xml.etree.ElementTree as ET
import zipfile
from datetime import datetime
from pathlib import Path
import requests
from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule
try:
from PIL import Image
_HAS_PIL = True
except ImportError:
_HAS_PIL = False
def _now() -> str:
return datetime.now().isoformat(timespec="seconds")
def _image_dims_from_bytes(data: bytes) -> tuple:
"""Returns (width, height) of an image byte blob, or (None, None)."""
if not _HAS_PIL:
return (None, None)
try:
with Image.open(io.BytesIO(data)) as im:
return im.size
except Exception:
return (None, None)
def _chapter_sort_value(num: str) -> float:
try:
return float(num)
except (TypeError, ValueError):
return float("inf")
def _update_page0_attrs(pages_el: "ET.Element", cover_bytes: bytes) -> None:
"""Refreshes size/dimension attributes of the FrontCover page entry."""
for page in pages_el:
if page.get("Image") == "0":
page.set("ImageSize", str(len(cover_bytes)))
width, height = _image_dims_from_bytes(cover_bytes)
if width and height:
page.set("ImageWidth", str(width))
page.set("ImageHeight", str(height))
return
def _serialize_tree(root: "ET.Element") -> str:
tree = ET.ElementTree(root)
try:
ET.indent(tree, space=" ")
except AttributeError:
pass
return ('<?xml version="1.0" encoding="UTF-8"?>\n'
+ ET.tostring(root, encoding="unicode"))
class KavitaVolumeCoverUpdater:
"""
Scans the Kavita library for chapters whose volume was unknown at move
time and back-fills volume + volume cover once MangaDex / MangaBaka
provide the data. Runs periodically on a background thread.
Parameters
----------
kavita_path : Root of the Kavita library (series folders inside).
matches_cache : MatchesCache — provides the MangaBaka series ID per
series (mandatory; folders without a match are skipped).
language : ComicInfo language (passed to ComicInfoBuilder).
request_timeout : HTTP timeout in seconds.
log_path : File that receives one line per updated chapter.
Default: <kavita_path>/volume_updater.log
schedule : Cron expression (5 fields) defining when scans run,
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time — set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
"""
def __init__(self,
kavita_path,
*,
matches_cache: MatchesCache,
language: str = "en",
request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None,
schedule: str = "0 19 * * 1,4"):
self._dst = Path(kavita_path)
self._matches_cache = matches_cache
self._language = language
self._timeout = request_timeout
self._api_base_url = api_base_url.rstrip("/")
self._log_path = (Path(log_path) if log_path
else self._dst / "volume_updater.log")
self._cron = CronSchedule(schedule)
session = requests.Session()
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
_apply_mangabaka_rate_limit(session)
self._session = session
self._mal = MALResolver(request_timeout=request_timeout)
self._al = AniListResolver(request_timeout=request_timeout)
self._vol_resolver = MangaDexVolumeResolver(
request_timeout=request_timeout, session=session)
self._works_resolver = MangaBakaWorksResolver(
api_base_url=api_base_url,
request_timeout=request_timeout, session=session)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
# ------------------------------------------------------------------
# Cron API (mirrors SuwayomiFolderWatcher)
# ------------------------------------------------------------------
def start(self) -> None:
"""Starts the periodic scan thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
self._thread.start()
print(f"[{_now()}] [updater] scanning {self._dst} "
f"on cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Stops the scan thread (current scan finishes its series first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [updater] next scheduled scan: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
summary = self.update_all()
print(f"[{_now()}] [updater] scan done: "
f"{summary['series_updated']} series / "
f"{summary['chapters_updated']} chapters updated",
flush=True)
except Exception as exc:
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
# ------------------------------------------------------------------
# Public scan API
# ------------------------------------------------------------------
def update_all(self) -> dict:
"""
Scans every series folder under the Kavita root once.
Returns {"series_scanned": n, "series_updated": n, "chapters_updated": n}.
"""
summary = {"series_scanned": 0, "series_updated": 0,
"chapters_updated": 0}
if not self._dst.is_dir():
print(f"[updater] kavita path missing: {self._dst}", flush=True)
return summary
for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
if not series_dir.is_dir():
continue
summary["series_scanned"] += 1
try:
updated = self.update_series(series_dir)
except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue
if updated:
summary["series_updated"] += 1
summary["chapters_updated"] += updated
return summary
def update_series(self, series_dir: Path) -> int:
"""
Updates one series folder. Returns the number of updated chapters.
Only chapters listed in chapter_index.json with ``"volume": null``
are candidates; everything else costs no further host reads.
"""
index = _load_chapter_index(series_dir)
chapters: dict = index["chapter"]
if not chapters:
return 0
missing = [num for num, e in chapters.items()
if isinstance(e, dict) and e.get("volume") is None]
if not missing:
return 0
match_key, match = self._find_match_for_folder(series_dir.name)
if not match or not match.get("mangabakaId"):
print(f"[updater] {series_dir.name}: no matches.json entry — skip",
flush=True)
return 0
# Builder resolves metadata via the cached MangaBaka ID and gives us
# the exact same chapter→volume logic the mover uses.
builder = ComicInfoBuilder(
match_key, chapter=missing[0],
api_base_url=self._api_base_url,
language=self._language,
request_timeout=self._timeout,
session=self._session,
volume_resolver=self._vol_resolver,
works_resolver=self._works_resolver,
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
)
md = builder.fetch_metadata()
series_id = str(md.get("id") or "")
# Resolve volumes for all null-volume chapters first (API only).
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num
try:
volume = builder._determine_volume()
except Exception:
volume = None
if not volume:
continue
updates[num] = {"volume": volume,
"cover": self._fetch_cover(series_id, volume)}
if not updates:
return 0
first = min(chapters, key=_chapter_sort_value)
updated = 0
for num, up in updates.items():
entry = chapters[num]
cbz = series_dir / (entry.get("archiveName") or "")
if not entry.get("archiveName") or not cbz.is_file():
print(f"[updater] {series_dir.name} ch.{num}: archive missing "
f"({entry.get('archiveName')!r}) — skip", flush=True)
continue
# The first chapter gets a full metadata rebuild (Kavita reads
# series metadata from it); other chapters only a volume edit.
ok, cover_swapped = self._apply_update(
cbz, builder, num,
volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first))
if not ok:
continue
entry["volume"] = _normalise_volume_value(up["volume"])
updated += 1
self._log(f"{series_dir.name} | chapter {num} -> volume "
f"{up['volume']} | cover "
f"{'replaced' if cover_swapped else 'kept'} | {cbz.name}")
# Refresh the first chapter's metadata when any other chapter changed
# (skip when it was already fully rebuilt in the loop above).
if updated and first not in updates:
first_entry = chapters.get(first) or {}
cbz = series_dir / (first_entry.get("archiveName") or "")
if first_entry.get("archiveName") and cbz.is_file():
ok, _ = self._apply_update(
cbz, builder, first,
volume=None, cover=None, full_rebuild=True)
if ok:
self._log(f"{series_dir.name} | chapter {first} | "
f"first-chapter metadata refreshed | {cbz.name}")
if updated:
_save_chapter_index(series_dir, index)
return updated
# ------------------------------------------------------------------
# Matching Kavita folder -> matches.json entry
# ------------------------------------------------------------------
def _find_match_for_folder(self, folder_name: str) -> tuple:
"""
Maps a Kavita series folder back to its matches.json entry.
The folder was created as ``_sanitize_dirname(mangabaka_title)``, so
the comparison sanitizes each entry's mangabakaName the same way.
Falls back to the folderTitle (Suwayomi name) for robustness.
Returns (match_key, entry) or (None, None).
"""
target = folder_name.strip().casefold()
matches = self._matches_cache.all()["matches"]
for key, entry in matches.items():
name = entry.get("mangabakaName") or ""
if name and _sanitize_dirname(name).strip().casefold() == target:
return key, entry
for key, entry in matches.items():
folder = entry.get("folderTitle") or key
if _sanitize_dirname(folder).strip().casefold() == target:
return key, entry
return None, None
# ------------------------------------------------------------------
# Cover download
# ------------------------------------------------------------------
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
"""
Downloads the MangaBaka volume cover.
Returns ("000<ext>", bytes) or None when no cover is available.
"""
try:
url = self._works_resolver.get_cover_for_volume(series_id, volume)
except Exception:
url = None
if not url:
return None
try:
resp = self._session.get(url, timeout=self._timeout)
resp.raise_for_status()
except requests.RequestException:
return None
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
return (f"000{ext}", resp.content)
# ------------------------------------------------------------------
# Archive update (single read + single write per archive)
# ------------------------------------------------------------------
def _apply_update(self, cbz_path: Path, builder: ComicInfoBuilder,
chapter_num: str, *,
volume, cover, full_rebuild: bool) -> tuple:
"""
Rewrites one CBZ archive with an updated ComicInfo.xml and (when
provided and a placeholder exists) a new cover image.
Returns (ok, cover_swapped).
"""
try:
with zipfile.ZipFile(cbz_path, "r") as zin:
try:
old_xml = zin.read("ComicInfo.xml")
except KeyError:
old_xml = None
if full_rebuild or old_xml is None:
new_xml = self._build_full_xml(
builder, chapter_num, old_xml, cover)
else:
new_xml = self._edit_volume_xml(old_xml, volume, cover)
if new_xml is None: # parse error -> full rebuild
new_xml = self._build_full_xml(
builder, chapter_num, None, cover)
infos = zin.infolist()
# Cover is only ever *replaced*: inserting one would shift
# every <Pages> image index in the existing XML.
has_placeholder = any(
Path(i.filename).stem == "000"
and Path(i.filename).suffix.lower() in _IMAGE_EXTS
for i in infos)
swap_cover = cover is not None and has_placeholder
tmp = cbz_path.with_suffix(cbz_path.suffix + ".tmp")
wrote_xml = False
with zipfile.ZipFile(tmp, "w", zipfile.ZIP_STORED) as zout:
for info in infos:
p = Path(info.filename)
if (swap_cover and p.stem == "000"
and p.suffix.lower() in _IMAGE_EXTS):
zout.writestr(cover[0], cover[1])
elif info.filename == "ComicInfo.xml":
zout.writestr("ComicInfo.xml", new_xml)
wrote_xml = True
else:
zout.writestr(info, zin.read(info.filename))
if not wrote_xml:
zout.writestr("ComicInfo.xml", new_xml)
tmp.replace(cbz_path)
return True, swap_cover
except Exception as exc:
print(f"[updater] {cbz_path.name}: update failed: {exc}",
flush=True)
return False, False
# ------------------------------------------------------------------
# XML builders
# ------------------------------------------------------------------
def _edit_volume_xml(self, old_xml: bytes, volume,
cover) -> "str | None":
"""
Sets <Volume> in an existing ComicInfo.xml and refreshes the
FrontCover page attributes when the cover gets replaced.
Returns None when the XML is unparseable.
"""
try:
root = ET.fromstring(old_xml)
except ET.ParseError:
return None
el = root.find("Volume")
if el is None:
el = ET.SubElement(root, "Volume")
el.text = str(volume)
if cover is not None:
pages = root.find("Pages")
if pages is not None:
_update_page0_attrs(pages, cover[1])
return _serialize_tree(root)
def _build_full_xml(self, builder: ComicInfoBuilder, chapter_num: str,
old_xml: "bytes | None", cover) -> str:
"""
Rebuilds the complete ComicInfo.xml via ComicInfoBuilder (fresh
MangaBaka/MAL metadata). Suwayomi-derived fields and the <Pages>
section are carried over from the previous XML.
"""
builder.chapter = chapter_num # also clears builder page state
builder._suwayomi_data = (
ComicInfoBuilder.read_comicinfo_fields(old_xml) if old_xml else {})
root = builder._build_tree().getroot()
if old_xml:
try:
old_root = ET.fromstring(old_xml)
except ET.ParseError:
old_root = None
if old_root is not None:
pages = old_root.find("Pages")
if pages is not None and cover is not None:
_update_page0_attrs(pages, cover[1])
page_count = old_root.find("PageCount")
if page_count is not None:
root.append(page_count)
if pages is not None:
root.append(pages)
return _serialize_tree(root)
# ------------------------------------------------------------------
# Logging
# ------------------------------------------------------------------
def _log(self, msg: str) -> None:
line = f"[{_now()}] {msg}"
print(f"[updater] {msg}", flush=True)
try:
self._log_path.parent.mkdir(parents=True, exist_ok=True)
with self._log_path.open("a", encoding="utf-8") as f:
f.write(line + "\n")
except OSError as exc:
print(f"[updater] cannot write log file {self._log_path}: {exc}",
flush=True)
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
# Local (no-Docker) smoke test. Adjust paths to your environment.
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
updater = KavitaVolumeCoverUpdater(
KAVITA_PATH,
matches_cache=MatchesCache(MATCHES_PATH),
)
# One-shot scan (no cron thread):
summary = updater.update_all()
print(f"\n[updater] {summary}")
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
# updater.start()
# updater.wait()
+175 -64
View File
@@ -2,7 +2,7 @@
mangabaka_works_resolver.py mangabaka_works_resolver.py
=========================== ===========================
Fetches volume-level (work) data from the MangaBaka API. Fetches volume-level (work) data and volume cover images from the MangaBaka API.
Each "work" is a physical tankobon volume and may carry: Each "work" is a physical tankobon volume and may carry:
- volume number - volume number
@@ -11,10 +11,16 @@ Each "work" is a physical tankobon volume and may carry:
- release date - release date
- cover image (raw / default / small variants) - cover image (raw / default / small variants)
Only works that have a usable cover are kept in the cache. Cover resolution order (per volume)
Works without a cover are discarded at fetch time. ------------------------------------
If no volume is assigned for a chapter, callers fall back to the 1. GET /v1/series/{id}/images — covers that exist independently of a work
default series cover from the series object itself. (some series have covers but no works). English edition preferred;
original language used when no English cover is available.
2. GET /v1/series/{id}/works — physical tankobon data including covers.
Fallback when /images returns nothing for the requested volume.
If no volume cover is found at all, callers fall back to the series-level
default cover from the series object itself.
Dependencies Dependencies
------------ ------------
@@ -26,10 +32,75 @@ from __future__ import annotations
import requests import requests
# --------------------------------------------------------------------------
# Generic image-block URL picker (shared by /images and /works responses)
# --------------------------------------------------------------------------
def _pick_image_url(image) -> "str | None":
"""
Returns the best URL from a MangaBaka image block.
Handles the common ``{raw, x150, x250, x350}`` structure used by both
the ``cover`` field on series/work objects and the ``image`` field on
``/images`` endpoint items::
{
"raw": {"url": "...", "size": ..., "height": ..., "width": ...},
"x150": {"x1": "...", "x2": "...", "x3": "..."},
"x250": {...},
"x350": {...}
}
Preference: raw original > x350@x3 > x250@x3 > x150@x3 > … (falling
through to lower densities and sizes as needed).
"""
if not image:
return None
if isinstance(image, str):
return image
if not isinstance(image, dict):
return None
# 1) Raw / unscaled image
raw = image.get("raw")
if isinstance(raw, dict):
url = raw.get("url")
if isinstance(url, str) and url:
return url
elif isinstance(raw, str) and raw:
return raw
# 2) Size-keyed CDN variants, largest first, highest density first
for size_key in ("x350", "x250", "x150"):
variant = image.get(size_key)
if isinstance(variant, dict):
for density in ("x3", "x2", "x1"):
url = variant.get(density)
if isinstance(url, str) and url:
return url
elif isinstance(variant, str) and variant:
return variant
# 3) Last-ditch: any HTTP URL anywhere in the structure
for val in image.values():
if isinstance(val, str) and val.startswith("http"):
return val
if isinstance(val, dict):
for sub_val in val.values():
if isinstance(sub_val, str) and sub_val.startswith("http"):
return sub_val
return None
class MangaBakaWorksResolver: class MangaBakaWorksResolver:
""" """
Fetches and caches MangaBaka volume (work) data for a series. Fetches and caches MangaBaka volume (work) data and cover images.
Only works that have a cover image are retained in the cache.
Cover lookup order per volume
------------------------------
1. ``/v1/series/{id}/images`` — edition covers (English > original).
2. ``/v1/series/{id}/works`` — physical tankobon covers.
Only works that carry a cover image are retained in the works cache.
""" """
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1", def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
@@ -42,6 +113,8 @@ class MangaBakaWorksResolver:
# Cache: series_id (str) -> list of work dicts (only those with covers) # Cache: series_id (str) -> list of work dicts (only those with covers)
self._cache: dict[str, list[dict]] = {} self._cache: dict[str, list[dict]] = {}
# Cache: series_id (str) -> {norm_vol (str): url (str)}
self._images_cache: dict[str, dict[str, str]] = {}
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Public API # Public API
@@ -101,12 +174,100 @@ class MangaBakaWorksResolver:
return work return work
return None return None
def get_cover_for_volume(self, series_id: str, volume) -> "str | None": def get_volume_covers(self, series_id: str) -> "dict[str, str]":
"""Returns the cover URL for a specific volume, or None if not found.""" """
work = self.get_work_for_volume(series_id, volume) Fetches all volume-type cover images for a series from
if not work: ``/v1/series/{id}/images`` and returns a
``{normalised_volume_str: url}`` mapping.
English-edition covers are preferred; the first available language
is used as fallback when no English cover exists for a volume.
Results are cached per series.
"""
if not series_id:
return {}
if series_id in self._images_cache:
return self._images_cache[series_id]
raw_items: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/images",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
resp.raise_for_status()
page_data = resp.json().get("data") or []
if not page_data:
break
raw_items.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
pass
# Group by normalised volume index; collect all languages per volume.
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
for item in raw_items:
if item.get("type") != "volume":
continue
idx = item.get("index_numeric")
if idx is None:
continue
norm = _norm_vol(idx)
lang = (item.get("language") or "").lower() or "unknown"
url = _pick_image_url(item.get("image"))
if not url:
continue
if norm not in by_volume:
by_volume[norm] = {}
# First entry per language wins (API order reflects quality/rank).
if lang not in by_volume[norm]:
by_volume[norm][lang] = url
# Pick best language per volume: English first, then first available.
result: dict[str, str] = {}
for norm, lang_map in by_volume.items():
url = lang_map.get("en") or next(iter(lang_map.values()), None)
if url:
result[norm] = url
self._images_cache[series_id] = result
return result
def get_cover_for_volume_from_images(self, series_id: str,
volume) -> "str | None":
"""
Returns the cover URL for a specific volume from the /images endpoint,
or None if not available.
"""
covers = self.get_volume_covers(series_id)
if not covers:
return None return None
return self._pick_cover_url(work.get("images")[0].get("image")) return covers.get(_norm_vol(volume))
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
"""
Returns the best cover URL for a specific volume.
Tries the ``/images`` endpoint first (covers that exist even when no
physical work has been catalogued), then falls back to the ``/works``
endpoint. Returns None if neither source has a cover for the volume.
"""
# 1. /images endpoint (covers without works)
url = self.get_cover_for_volume_from_images(series_id, volume)
if url:
return url
# 2. /works endpoint fallback
work = self.get_work_for_volume(series_id, volume)
if not work or not work.get("images"):
return None
return _pick_image_url(work["images"][0].get("image"))
def get_page_counts(self, series_id: str) -> "dict[str, int]": def get_page_counts(self, series_id: str) -> "dict[str, int]":
""" """
@@ -125,59 +286,9 @@ class MangaBakaWorksResolver:
return result return result
def clear_cache(self) -> None: def clear_cache(self) -> None:
"""Clears the internal works cache.""" """Clears both the works cache and the images cover cache."""
self._cache.clear() self._cache.clear()
self._images_cache.clear()
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
@staticmethod
def _pick_cover_url(cover) -> "str | None":
"""
Selects the best cover URL from a MangaBaka cover object.
Real API shape:
"raw": {"url": "...", "size": ..., "height": ..., "width": ...}
"x150": {"x1": "...", "x2": "...", "x3": "..."}
"x250": {...}
"x350": {...}
Order: raw original > x350@x3 > x250@x3 > x150@x3 ...
"""
if not cover:
return None
if isinstance(cover, str):
return cover
if not isinstance(cover, dict):
return None
raw = cover.get("raw")
if isinstance(raw, dict):
url = raw.get("url")
if isinstance(url, str) and url:
return url
elif isinstance(raw, str) and raw:
return raw
for size_key in ("x350", "x250", "x150"):
variant = cover.get(size_key)
if isinstance(variant, dict):
for density in ("x3", "x2", "x1"):
url = variant.get(density)
if isinstance(url, str) and url:
return url
elif isinstance(variant, str) and variant:
return variant
# Last-ditch: any HTTP URL anywhere in the structure
for val in cover.values():
if isinstance(val, str) and val.startswith("http"):
return val
if isinstance(val, dict):
for sub_val in val.values():
if isinstance(sub_val, str) and sub_val.startswith("http"):
return sub_val
return None
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
+92 -13
View File
@@ -2,14 +2,15 @@
matches_cache.py matches_cache.py
================ ================
Persistent JSON cache that maps a Suwayomi/series search title to the Persistent JSON cache that maps a normalised (lowercase) search title to the
MangaBaka series it was matched against. MangaBaka series it was matched against.
Structure on disk:: Structure on disk::
{ {
"matches": { "matches": {
"<search title>": { "<normalised lowercase key>": {
"folderTitle": "Original Folder Name",
"mangabakaId": "12345", "mangabakaId": "12345",
"mangabakaName": "One-Punch Man", "mangabakaName": "One-Punch Man",
"imageUrl": "https://.../cover.jpg", "imageUrl": "https://.../cover.jpg",
@@ -19,6 +20,11 @@ Structure on disk::
} }
} }
Keys are always stored lowercase so that folder names differing only in
capitalisation (e.g. "[Oshi No Ko]" vs "[oshi no ko]") are treated as
identical entries. The original casing is preserved in the ``folderTitle``
field and is used for display purposes (e.g. the web UI title link).
The cache is consulted by ComicInfoBuilder before issuing a MangaBaka The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
search request, and is written back to disk on every mutation so a crash search request, and is written back to disk on every mutation so a crash
does not lose matches that were resolved in the current run. does not lose matches that were resolved in the current run.
@@ -32,6 +38,11 @@ import time
from pathlib import Path from pathlib import Path
def _norm_key(title: str) -> str:
"""Normalises a cache key to lowercase for case-insensitive deduplication."""
return title.lower()
class MatchesCache: class MatchesCache:
def __init__(self, path): def __init__(self, path):
self._path = Path(path) self._path = Path(path)
@@ -44,7 +55,7 @@ class MatchesCache:
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def get(self, title: str) -> "dict | None": def get(self, title: str) -> "dict | None":
with self._lock: with self._lock:
entry = self._data["matches"].get(title) entry = self._data["matches"].get(_norm_key(title))
return dict(entry) if entry else None return dict(entry) if entry else None
def add(self, title: str, *, def add(self, title: str, *,
@@ -52,13 +63,14 @@ class MatchesCache:
mangabaka_name: str, mangabaka_name: str,
image_url: "str | None") -> dict: image_url: "str | None") -> dict:
entry = { entry = {
"folderTitle": title,
"mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "", "mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "",
"mangabakaName": mangabaka_name or "", "mangabakaName": mangabaka_name or "",
"imageUrl": image_url or "", "imageUrl": image_url or "",
"firstMatchTime": int(time.time()), "firstMatchTime": int(time.time()),
} }
with self._lock: with self._lock:
self._data["matches"][title] = entry self._data["matches"][_norm_key(title)] = entry
self._save_unlocked() self._save_unlocked()
return dict(entry) return dict(entry)
@@ -67,16 +79,19 @@ class MatchesCache:
mangabaka_name=None, mangabaka_name=None,
image_url=None, image_url=None,
first_match_time=None) -> dict: first_match_time=None) -> dict:
norm = _norm_key(title)
with self._lock: with self._lock:
entry = self._data["matches"].get(title) entry = self._data["matches"].get(norm)
if entry is None: if entry is None:
entry = { entry = {
"folderTitle": title,
"mangabakaId": "", "mangabakaId": "",
"mangabakaName": "", "mangabakaName": "",
"imageUrl": "", "imageUrl": "",
"firstMatchTime": int(time.time()), "firstMatchTime": int(time.time()),
} }
self._data["matches"][title] = entry self._data["matches"][norm] = entry
# folderTitle is only set on creation; preserve original casing on updates.
if mangabaka_id is not None: if mangabaka_id is not None:
entry["mangabakaId"] = str(mangabaka_id) entry["mangabakaId"] = str(mangabaka_id)
if mangabaka_name is not None: if mangabaka_name is not None:
@@ -92,21 +107,25 @@ class MatchesCache:
return dict(entry) return dict(entry)
def rename(self, old_title: str, new_title: str) -> bool: def rename(self, old_title: str, new_title: str) -> bool:
if not new_title or old_title == new_title: old_norm = _norm_key(old_title)
new_norm = _norm_key(new_title)
if not new_title or old_norm == new_norm:
return False return False
with self._lock: with self._lock:
entry = self._data["matches"].pop(old_title, None) entry = self._data["matches"].pop(old_norm, None)
if entry is None: if entry is None:
return False return False
self._data["matches"][new_title] = entry entry["folderTitle"] = new_title
self._data["matches"][new_norm] = entry
self._save_unlocked() self._save_unlocked()
return True return True
def remove(self, title: str) -> bool: def remove(self, title: str) -> bool:
norm = _norm_key(title)
with self._lock: with self._lock:
existed = title in self._data["matches"] existed = norm in self._data["matches"]
if existed: if existed:
del self._data["matches"][title] del self._data["matches"][norm]
self._save_unlocked() self._save_unlocked()
return existed return existed
@@ -128,8 +147,68 @@ class MatchesCache:
print(f"[MatchesCache] failed to load {self._path}: {exc}", print(f"[MatchesCache] failed to load {self._path}: {exc}",
flush=True) flush=True)
return return
if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict): if not isinstance(loaded, dict) or not isinstance(loaded.get("matches"), dict):
self._data = loaded return
normalized, changed = self._normalize_on_load(loaded["matches"])
loaded["matches"] = normalized
self._data = loaded
if changed:
print(f"[MatchesCache] migrated {changed} entr{'y' if changed == 1 else 'ies'} "
f"(lowercase keys / folderTitle), saving", flush=True)
self._save_unlocked()
@staticmethod
def _normalize_on_load(raw: dict) -> "tuple[dict, int]":
"""
Normalises the raw matches dict loaded from disk.
- Keys are lowercased.
- ``folderTitle`` is added from the original key when missing.
- Duplicate keys (same normalised form) are merged by keeping the
entry with the higher ``firstMatchTime``.
Returns (normalised_dict, number_of_changed_entries).
"""
result: dict = {}
changed = 0
for orig_key, entry in raw.items():
if not isinstance(entry, dict):
continue
norm = _norm_key(orig_key)
entry = dict(entry)
# Add folderTitle if absent
if "folderTitle" not in entry:
entry["folderTitle"] = orig_key
changed += 1
if norm != orig_key:
changed += 1
# Merge duplicates: keep data from the more recent entry, but
# prefer the folderTitle that contains uppercase letters (= the
# original folder name) regardless of which entry is newer.
if norm in result:
existing = result[norm]
if entry.get("firstMatchTime", 0) > existing.get("firstMatchTime", 0):
# Newer entry wins for data; preserve better-cased folderTitle
existing_ft = existing.get("folderTitle", norm)
new_ft = entry.get("folderTitle", norm)
if existing_ft != existing_ft.lower() and new_ft == new_ft.lower():
entry["folderTitle"] = existing_ft
result[norm] = entry
else:
# Existing entry stays; but adopt new folderTitle if it has casing
existing_ft = existing.get("folderTitle", norm)
new_ft = entry.get("folderTitle", norm)
if new_ft != new_ft.lower() and existing_ft == existing_ft.lower():
existing["folderTitle"] = new_ft
else:
result[norm] = entry
return result, changed
def _save_unlocked(self) -> None: def _save_unlocked(self) -> None:
self._path.parent.mkdir(parents=True, exist_ok=True) self._path.parent.mkdir(parents=True, exist_ok=True)
+48 -7
View File
@@ -70,6 +70,7 @@ _INDEX_HTML = """<!doctype html>
<button id="reload">Reload</button> <button id="reload">Reload</button>
<button id="batchSave" class="primary">Save dirty (0)</button> <button id="batchSave" class="primary">Save dirty (0)</button>
<button id="build">Build all (rescan)</button> <button id="build">Build all (rescan)</button>
<button id="move">Start move</button>
<span class="status" id="status"></span> <span class="status" id="status"></span>
</div> </div>
@@ -114,15 +115,17 @@ function updateDirtyCount() {
function makeRow(title, e) { function makeRow(title, e) {
const tr = document.createElement("tr"); const tr = document.createElement("tr");
tr.dataset.title = title; tr.dataset.title = title;
const displayTitle = e.folderTitle || title;
tr.dataset.folderTitle = displayTitle;
// Title — link only, not editable // Title — link only, not editable; shows folderTitle (original casing)
const titleTd = document.createElement("td"); const titleTd = document.createElement("td");
titleTd.className = "title"; titleTd.className = "title";
const titleLink = document.createElement("a"); const titleLink = document.createElement("a");
titleLink.href = searchUrl(title); titleLink.href = searchUrl(displayTitle);
titleLink.target = "_blank"; titleLink.target = "_blank";
titleLink.rel = "noopener"; titleLink.rel = "noopener";
titleLink.textContent = title; titleLink.textContent = displayTitle;
titleTd.appendChild(titleLink); titleTd.appendChild(titleLink);
tr.appendChild(titleTd); tr.appendChild(titleTd);
@@ -185,7 +188,7 @@ function makeRow(title, e) {
async function saveRow(tr) { async function saveRow(tr) {
const title = tr.dataset.title; const title = tr.dataset.title;
const newId = tr._idInp.value.trim(); const newId = tr._idInp.value.trim();
setStatus("Saving " + title + ""); setStatus("Saving " + (tr.dataset.folderTitle || title) + "");
try { try {
const r = await fetch("/api/matches", { const r = await fetch("/api/matches", {
method: "POST", method: "POST",
@@ -202,7 +205,7 @@ async function saveRow(tr) {
tr._img.src = entry.imageUrl || ""; tr._img.src = entry.imageUrl || "";
tr.classList.remove("dirty"); tr.classList.remove("dirty");
updateDirtyCount(); updateDirtyCount();
setStatus("Saved " + title); setStatus("Saved " + (tr.dataset.folderTitle || title));
return true; return true;
} catch (err) { } catch (err) {
setStatus("Save failed (" + title + "): " + err.message); setStatus("Save failed (" + title + "): " + err.message);
@@ -249,7 +252,11 @@ function sortedTitles() {
const titles = Object.keys(matchesData); const titles = Object.keys(matchesData);
const dir = currentSort.asc ? 1 : -1; const dir = currentSort.asc ? 1 : -1;
if (currentSort.col === "title") { if (currentSort.col === "title") {
return titles.sort((a, b) => a.localeCompare(b) * dir); return titles.sort((a, b) => {
const fa = (matchesData[a].folderTitle || a).toLowerCase();
const fb = (matchesData[b].folderTitle || b).toLowerCase();
return fa.localeCompare(fb) * dir;
});
} }
if (currentSort.col === "firstMatchTime") { if (currentSort.col === "firstMatchTime") {
return titles.sort((a, b) => { return titles.sort((a, b) => {
@@ -297,7 +304,7 @@ async function load() {
function applyFilter() { function applyFilter() {
const q = document.getElementById("filter").value.toLowerCase(); const q = document.getElementById("filter").value.toLowerCase();
for (const tr of document.querySelectorAll("#rows tr")) { for (const tr of document.querySelectorAll("#rows tr")) {
const t = tr.dataset.title.toLowerCase(); const t = (tr.dataset.folderTitle || tr.dataset.title).toLowerCase();
tr.style.display = t.includes(q) ? "" : "none"; tr.style.display = t.includes(q) ? "" : "none";
} }
} }
@@ -317,6 +324,23 @@ document.getElementById("build").addEventListener("click", async () => {
setStatus("Build failed: " + err.message); setStatus("Build failed: " + err.message);
} }
}); });
document.getElementById("move").addEventListener("click", async () => {
if (!confirm("Start move operation? This will process all series and may take a long time.")) return;
const btn = document.getElementById("move");
btn.disabled = true;
setStatus("Moving… (running on the server)");
try {
const r = await fetch("/api/move", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
const data = await r.json();
const total = Object.keys(data.results || {}).length;
setStatus("Move finished — " + total + " series processed");
} catch (err) {
setStatus("Move failed: " + err.message);
} finally {
btn.disabled = false;
}
});
for (const th of document.querySelectorAll("th.sortable")) { for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => { th.addEventListener("click", () => {
const col = th.dataset.col; const col = th.dataset.col;
@@ -350,6 +374,7 @@ class MatchesWebApp:
self._host = host self._host = host
self._port = port self._port = port
self._build_lock = threading.Lock() self._build_lock = threading.Lock()
self._move_lock = threading.Lock()
self._app = Flask(__name__) self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None self._thread: "threading.Thread | None" = None
self._register_routes() self._register_routes()
@@ -454,6 +479,22 @@ class MatchesWebApp:
return Response("build already running", status=409) return Response("build already running", status=409)
try: try:
result = self._mover.build_matches_only() result = self._mover.build_matches_only()
except Exception as exc:
return Response(f"build failed: {exc}", status=500)
finally: finally:
self._build_lock.release() self._build_lock.release()
return jsonify(result) return jsonify(result)
@app.post("/api/move")
def api_move():
if self._mover is None:
return Response("no mover configured", status=503)
if not self._move_lock.acquire(blocking=False):
return Response("move already running", status=409)
try:
results = self._mover.process_all()
except Exception as exc:
return Response(f"move failed: {exc}", status=500)
finally:
self._move_lock.release()
return jsonify({"results": results})
+105 -15
View File
@@ -43,6 +43,7 @@ Dependencies
from __future__ import annotations from __future__ import annotations
import json
import re import re
import shutil import shutil
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@@ -64,6 +65,62 @@ from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
# JSON file written into each Kavita series folder, listing every chapter
# already moved. Avoids opening CBZ archives to determine what is present.
# Absence is interpreted as "folder empty" (per spec), not "scan the folder".
_CHAPTER_INDEX_FILENAME = "chapter_index.json"
def _normalise_volume_value(value):
"""
Normalises a volume identifier for storage in chapter_index.json.
Returns int when the value is a whole number, float for fractional
volumes, None when missing. Mirrors how the user wants volumes
rendered (``"volume": 1`` rather than ``"volume": "1"``).
"""
if value is None:
return None
text = str(value).strip()
if not text:
return None
try:
f = float(text)
return int(f) if f.is_integer() else f
except (TypeError, ValueError):
return text
def _load_chapter_index(dest_series: Path) -> dict:
"""
Reads chapter_index.json from a Kavita series folder.
Returns ``{"chapter": {}}`` when the file is missing or unreadable —
per the project spec, absence means "no chapters are present yet".
"""
path = dest_series / _CHAPTER_INDEX_FILENAME
if not path.is_file():
return {"chapter": {}}
try:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError) as exc:
print(f" [warn] chapter_index unreadable ({path.name}): {exc}"
f"treating folder as empty")
return {"chapter": {}}
if not isinstance(data, dict) or not isinstance(data.get("chapter"), dict):
return {"chapter": {}}
return data
def _save_chapter_index(dest_series: Path, index: dict) -> None:
"""Writes chapter_index.json atomically into a Kavita series folder."""
path = dest_series / _CHAPTER_INDEX_FILENAME
tmp = path.with_suffix(path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(index, f, ensure_ascii=False, indent=2)
tmp.replace(path)
# Parenthetical source labels that Suwayomi appends to series names. # Parenthetical source labels that Suwayomi appends to series names.
# These are not part of the actual title and confuse MangaBaka searches. # These are not part of the actual title and confuse MangaBaka searches.
_SOURCE_LABEL_RE = re.compile( _SOURCE_LABEL_RE = re.compile(
@@ -509,13 +566,38 @@ class SuwayomiMover:
dest_series = self._dst / _sanitize_dirname(mangabaka_title) dest_series = self._dst / _sanitize_dirname(mangabaka_title)
dest_series.mkdir(parents=True, exist_ok=True) dest_series.mkdir(parents=True, exist_ok=True)
# Skip chapters that have already been moved to Kavita. The index
# file in the destination folder is the authoritative source — we
# never open CBZ archives or stat them individually.
chapter_index = _load_chapter_index(dest_series)
already_moved = chapter_index["chapter"]
skipped: list[tuple[Path, str]] = []
pending: list[tuple[Path, dict, str]] = []
for item in chapter_items:
chapter_dir, _fields, chapter_num = item
if chapter_num in already_moved:
skipped.append((chapter_dir, chapter_num))
else:
pending.append(item)
for chapter_dir, chapter_num in skipped:
print(f" Chapter {chapter_num}: skip (already in Kavita)")
if self._delete_source:
shutil.rmtree(chapter_dir, ignore_errors=True)
chapter_results: list[dict] = [] chapter_results: list[dict] = []
for chapter_dir, _fields, chapter_num in chapter_items: for chapter_dir, _fields, chapter_num in pending:
result = self._process_chapter( result = self._process_chapter(
builder, chapter_num, chapter_dir, dest_series) builder, chapter_num, chapter_dir, dest_series)
chapter_results.append(result) chapter_results.append(result)
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}" status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
print(f" Chapter {chapter_num}: {status}") print(f" Chapter {chapter_num}: {status}")
if result["ok"]:
already_moved[chapter_num] = {
"volume": _normalise_volume_value(result.get("volume")),
"archiveName": Path(result["cbz"]).name,
}
_save_chapter_index(dest_series, chapter_index)
# Sync Kavita persons once per series. # Sync Kavita persons once per series.
# Both MAL and AniList IDs come from MangaBaka's source map; # Both MAL and AniList IDs come from MangaBaka's source map;
@@ -557,11 +639,19 @@ class SuwayomiMover:
try: try:
builder.chapter = chapter_num builder.chapter = chapter_num
builder.add_pages_from_folder(chapter_dir, cover_filename="000") builder.add_pages_from_folder(chapter_dir, cover_filename="000")
# Resolving the volume here piggy-backs on caches already warmed
# by add_pages_from_folder, so it's effectively free. Used by
# the chapter index in the Kavita destination folder.
try:
volume = builder._determine_volume()
except Exception:
volume = None
builder.save_xml(chapter_dir) builder.save_xml(chapter_dir)
_pack_to_cbz(chapter_dir, cbz_path) _pack_to_cbz(chapter_dir, cbz_path)
if self._delete_source: if self._delete_source:
shutil.rmtree(chapter_dir) shutil.rmtree(chapter_dir)
return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True} return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": True, "volume": volume}
except Exception as exc: except Exception as exc:
return {"chapter": chapter_num, "cbz": str(cbz_path), return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": False, "error": str(exc)} "ok": False, "error": str(exc)}
@@ -591,21 +681,21 @@ if __name__ == "__main__":
) )
# ---- Option A: build matches.json only (no moves / no Kavita sync) ---- # ---- Option A: build matches.json only (no moves / no Kavita sync) ----
data = mover.build_matches_only() # data = mover.build_matches_only()
matches = data.get("matches", {}) # matches = data.get("matches", {})
print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}") # print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}")
for title, entry in list(matches.items())[:10]: # for title, entry in list(matches.items())[:10]:
print(f" {title!r:50s} id={entry.get('mangabakaId')} " # print(f" {title!r:50s} id={entry.get('mangabakaId')} "
f"name={entry.get('mangabakaName')!r}") # f"name={entry.get('mangabakaName')!r}")
# ---- Option B: full pipeline for one series (uses the cache too) ---- # ---- Option B: full pipeline for one series (uses the cache too) ----
# result = mover.process_series("Yofukashi no Uta") result = mover.process_series("Wistoria - Wand and Sword")
# ok = sum(1 for c in result["chapters"] if c["ok"]) ok = sum(1 for c in result["chapters"] if c["ok"])
# failed = sum(1 for c in result["chapters"] if not c["ok"]) failed = sum(1 for c in result["chapters"] if not c["ok"])
# print(f"\nDone: {ok} ok, {failed} failed") print(f"\nDone: {ok} ok, {failed} failed")
# for c in result["chapters"]: for c in result["chapters"]:
# if not c["ok"]: if not c["ok"]:
# print(f" Chapter {c['chapter']}: {c['error']}") print(f" Chapter {c['chapter']}: {c['error']}")
# Or process everything at once: # Or process everything at once:
# results = mover.process_all() # results = mover.process_all()