Added Mover and other improvements

This commit is contained in:
2026-05-23 16:09:13 +02:00
parent 3dab98cb41
commit 377aff34d0
4 changed files with 440 additions and 5 deletions
+20 -3
View File
@@ -365,8 +365,11 @@ class ComicInfoBuilder:
add("Volume", volume) add("Volume", volume)
# ----- Description with MAL stats ----------------------------------- # ----- Description with MAL stats -----------------------------------
mal_id = self._mal_resolver.find_mal_id( # Prefer the MAL ID from MangaBaka's source map — avoids an extra
md.get("title") or self._manga_title) # Jikan title-search request and is more reliable than fuzzy matching.
mal_id = (self._mal_id_from_source(md)
or self._mal_resolver.find_mal_id(
md.get("title") or self._manga_title))
mal_stats = self._mal_resolver.get_stats(mal_id) mal_stats = self._mal_resolver.get_stats(mal_id)
add("Summary", self._build_summary(md, sd, mal_stats)) add("Summary", self._build_summary(md, sd, mal_stats))
@@ -417,7 +420,8 @@ class ComicInfoBuilder:
pass pass
# ----- ISBN (GTIN) from volume work --------------------------------- # ----- ISBN (GTIN) from volume work ---------------------------------
isbn = (work or {}).get('identifiers')[0].get("id") identifiers = (work or {}).get("identifiers") or []
isbn = identifiers[0].get("id") if identifiers else None
add("GTIN", isbn) add("GTIN", isbn)
# ----- SeriesGroup from related works ------------------------------- # ----- SeriesGroup from related works -------------------------------
@@ -741,6 +745,19 @@ class ComicInfoBuilder:
return str(info["id"]) return str(info["id"])
return None return None
@staticmethod
def _mal_id_from_source(md: dict) -> "int | None":
for raw_key, info in (md.get("source") or {}).items():
if _normalise_key(raw_key) in ("myanimelist", "mal"):
if isinstance(info, dict):
mid = info.get("id")
if mid is not None:
try:
return int(mid)
except (TypeError, ValueError):
pass
return None
@staticmethod @staticmethod
def _publishers_by_type(md: dict, ptype: str) -> "str | None": def _publishers_by_type(md: dict, ptype: str) -> "str | None":
names = [p.get("name") for p in (md.get("publishers") or []) names = [p.get("name") for p in (md.get("publishers") or [])
+1 -1
View File
@@ -416,7 +416,7 @@ if __name__ == "__main__":
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY) updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
mal = MALResolver() mal = MALResolver()
mal_id = mal.find_mal_id("One Punch-Man") mal_id = mal.find_mal_id("よふかしのうた")
print("MAL ID:", mal_id) print("MAL ID:", mal_id)
if mal_id: if mal_id:
+4 -1
View File
@@ -171,7 +171,10 @@ class MALResolver:
detailed = self.get_characters_detailed(mal_id) detailed = self.get_characters_detailed(mal_id)
names = [e["name"] for e in detailed if e.get("name")] names = [e["name"] for e in detailed if e.get("name")]
self._char_names_cache[mal_id] = names if names:
# Only cache a successful result — empty could be a transient
# API failure and we want the next call to retry.
self._char_names_cache[mal_id] = names
return names return names
def get_characters_for_manga(self, title: str) -> list[str]: def get_characters_for_manga(self, title: str) -> list[str]:
+415
View File
@@ -0,0 +1,415 @@
"""
suwayomi_mover.py
=================
Moves Suwayomi-downloaded manga chapters to a Kavita library path,
generating enriched ComicInfo.xml metadata and packing each chapter
folder into a CBZ archive. Optionally syncs Kavita person / character
records with MyAnimeList data after each series is processed.
Suwayomi folder structure (input)
----------------------------------
<suwayomi_path>/
<Source (lang)>/ e.g. "ComicK Fanmade (EN)"
<Manga Title>/ e.g. "Yofukashi no Uta"
Official_Chapter 1/ chapter folder — any prefix is fine
001.webp
...
ComicInfo.xml Suwayomi's own basic XML (read + replaced)
Kavita folder structure (output)
---------------------------------
<kavita_path>/
<Manga Title>/
Official_Chapter 1.cbz CBZ archive: images + enriched ComicInfo.xml
Official_Chapter 2.cbz
...
Cover naming convention
-----------------------
The cover image is saved as "000.<ext>" inside each chapter folder so that
it sorts before "001.webp", "002.webp", … in alphabetical order. This
ensures the <Pages Image="0" Type="FrontCover"> assignment in ComicInfo.xml
matches the actual file order inside the CBZ archive.
Dependencies
------------
requests -> pip install requests
Pillow -> pip install pillow (optional, for image dimensions)
ComicInfoBuilder, MangadexVolumeResolver, MangaBakaWorksResolver,
MALResolver, KavitaPersonUpdater must reside in the same directory.
"""
from __future__ import annotations
import re
import shutil
import xml.etree.ElementTree as ET
import zipfile
from pathlib import Path
import requests
from ComicInfoBuilder import ComicInfoBuilder
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from KavitaPersonUpdater import KavitaPersonUpdater
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
# Parenthetical source labels that Suwayomi appends to series names.
# These are not part of the actual title and confuse MangaBaka searches.
_SOURCE_LABEL_RE = re.compile(
r'\s*\(\s*(?:official|unofficial|fan(?:\s*made)?|scanlation|'
r'bato(?:to)?|mangadex|manga\s*plus|viz|yen\s*press|webtoon)\s*\)\s*$',
re.IGNORECASE,
)
def _natural_key(name: str) -> list:
return [int(p) if p.isdigit() else p.lower()
for p in re.split(r"(\d+)", name)]
_SUWAYOMI_WANTED = {"Title", "Series", "Number", "Summary",
"Writer", "Penciller", "Genre", "Web",
"Year", "Month", "Day"}
def _read_suwayomi_fields(chapter_dir: Path) -> dict:
"""
Reads metadata from Suwayomi's ComicInfo.xml inside a chapter folder.
Returns a dict of whichever fields are present, e.g.:
{"Number": "3", "Series": "Dungeon Odyssey", "Title": "Chapter 3", ...}
Returns an empty dict if the file is missing or unparseable.
"""
xml_path = chapter_dir / "ComicInfo.xml"
if not xml_path.is_file():
return {}
try:
root = ET.parse(xml_path).getroot()
except ET.ParseError:
return {}
result = {}
for child in root:
tag = child.tag.split("}")[-1]
if tag in _SUWAYOMI_WANTED and child.text and child.text.strip():
result[tag] = child.text.strip()
return result
def _clean_suwayomi_title(title: str) -> str:
"""
Removes Suwayomi source annotations from a series title.
Suwayomi sometimes appends the translation group / source type in
parentheses, e.g. "Wistoria: Wand and Sword (Official)". These labels
are not part of the canonical title and break MangaBaka / MAL lookups.
"""
return _SOURCE_LABEL_RE.sub("", title).strip()
def _mal_id_from_metadata(md: dict) -> "int | None":
"""
Extracts the MAL ID directly from a MangaBaka series dict.
MangaBaka stores tracker IDs in md["source"], e.g.:
{"myanimelist": {"id": 121480}, "mangadex": {"id": "..."}, ...}
Returns the integer MAL ID, or None if not present.
"""
for raw_key, info in (md.get("source") or {}).items():
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
if isinstance(info, dict):
mal_id = info.get("id")
if mal_id is not None:
try:
return int(mal_id)
except (TypeError, ValueError):
pass
return None
def _extract_chapter_num(folder_name: str) -> "str | None":
"""
Fallback: extracts chapter number from the folder name.
Examples: "Chapter 10" -> "10", "Official_Chapter 10.5" -> "10.5"
"""
m = _CHAPTER_RE.search(folder_name)
return m.group(1) if m else None
def _chapter_sort_key(folder_name: str) -> tuple:
"""Numeric sort key for chapter folder names."""
num = _extract_chapter_num(folder_name)
if num is None:
return (float("inf"), folder_name)
return (float(num), folder_name)
def _pack_to_cbz(folder: Path, dest: Path) -> None:
"""
Packs all files in `folder` into a CBZ archive at `dest`.
Images are stored in natural-sort order (so "000.jpg" < "001.webp").
ComicInfo.xml is appended last so image indices in the archive match
the <Pages> entries written by ComicInfoBuilder.
Files are stored without compression (ZIP_STORED) since the source
images are already compressed (webp / jpg / png / …).
"""
images = sorted(
[f for f in folder.iterdir()
if f.is_file() and f.suffix.lower() in _IMAGE_EXTS],
key=lambda p: _natural_key(p.name),
)
extras = [
f for f in folder.iterdir()
if f.is_file() and f.suffix.lower() not in _IMAGE_EXTS
]
with zipfile.ZipFile(dest, "w", zipfile.ZIP_STORED) as zf:
for f in images:
zf.write(f, f.name)
for f in extras:
zf.write(f, f.name)
class SuwayomiMover:
"""
Scans a Suwayomi download directory, generates enriched ComicInfo.xml
for each chapter, packs each chapter folder into a CBZ archive, and
moves the result to a Kavita library path.
Parameters
----------
suwayomi_path : Root of Suwayomi downloads.
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
kavita_path : Root of the Kavita library.
Series sub-directories are created automatically.
kavita_base_url : Kavita server URL — required only for person sync,
e.g. "http://192.168.2.2:5000".
kavita_api_key : Kavita API key — required only for person sync.
language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack.
"""
def __init__(self,
suwayomi_path,
kavita_path,
*,
kavita_base_url: "str | None" = None,
kavita_api_key: "str | None" = None,
language: str = "en",
request_timeout: int = 30,
delete_source: bool = True):
self._src = Path(suwayomi_path)
self._dst = Path(kavita_path)
self._language = language
self._timeout = request_timeout
self._delete_source = delete_source
# Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips.
session = requests.Session()
session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
self._session = session
self._mal = MALResolver(request_timeout=request_timeout)
self._vol_resolver = MangaDexVolumeResolver(
request_timeout=request_timeout, session=session)
self._works_resolver = MangaBakaWorksResolver(
request_timeout=request_timeout, session=session)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
self._person_updater = KavitaPersonUpdater(
kavita_base_url, kavita_api_key,
mal_resolver=self._mal,
request_timeout=request_timeout)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def process_all(self) -> dict:
"""
Processes every manga series found under the Suwayomi root.
Walks two directory levels deep:
<suwayomi_path>/<Source dir>/<Manga Title>/
Returns a dict keyed by manga title, each value being the result
dict from _process_series_dir.
"""
results: dict = {}
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
continue
title = manga_dir.name
print(f"[SuwayomiMover] {title}")
results[title] = self._process_series_dir(manga_dir)
return results
def process_series(self, manga_title: str) -> dict:
"""
Processes all chapters for a single series, located by title.
Searches every source sub-directory under the Suwayomi root for a
directory whose name matches `manga_title` exactly.
Raises FileNotFoundError if no matching directory is found.
"""
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
candidate = source_dir / manga_title
if candidate.is_dir():
return self._process_series_dir(candidate)
raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
# ------------------------------------------------------------------
# Internal: series
# ------------------------------------------------------------------
def _process_series_dir(self, manga_dir: Path) -> dict:
manga_title = manga_dir.name
chapter_dirs = sorted(
(d for d in manga_dir.iterdir() if d.is_dir()),
key=lambda p: _chapter_sort_key(p.name),
)
# Read all chapter XMLs upfront to resolve chapter numbers and series name.
chapter_items: list[tuple[Path, dict, str]] = []
for chapter_dir in chapter_dirs:
fields = _read_suwayomi_fields(chapter_dir)
chapter_num = (fields.get("Number")
or _extract_chapter_num(chapter_dir.name))
if chapter_num is None:
print(f" [skip] {chapter_dir.name} — no chapter number")
continue
chapter_items.append((chapter_dir, fields, chapter_num))
# <Series> from the first chapter's XML → strip source labels → clean title
# for the MangaBaka search. Folder name is the last resort.
raw_series = manga_title
if chapter_items:
xml_series = chapter_items[0][1].get("Series")
if xml_series:
raw_series = xml_series
builder_title = _clean_suwayomi_title(raw_series)
# One builder per series — metadata fetched once, reused for all chapters.
builder = ComicInfoBuilder(
builder_title, chapter=1,
language=self._language,
request_timeout=self._timeout,
session=self._session,
volume_resolver=self._vol_resolver,
works_resolver=self._works_resolver,
mal_resolver=self._mal,
)
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
md: "dict | None" = None
mangabaka_title = manga_title
try:
md = builder.fetch_metadata()
mangabaka_title = md.get("title") or manga_title
except Exception as exc:
print(f" [warn] metadata fetch failed: {exc}")
# Destination folder uses the MangaBaka canonical title.
dest_series = self._dst / mangabaka_title
dest_series.mkdir(parents=True, exist_ok=True)
chapter_results: list[dict] = []
for chapter_dir, _fields, chapter_num in chapter_items:
result = self._process_chapter(
builder, chapter_num, chapter_dir, dest_series)
chapter_results.append(result)
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
print(f" Chapter {chapter_num}: {status}")
# Sync Kavita persons once per series.
# MAL ID comes directly from MangaBaka; no extra Jikan title search needed.
person_result: "dict | None" = None
if self._person_updater:
mal_id = (_mal_id_from_metadata(md) if md else None
or self._mal.find_mal_id(builder_title))
if mal_id:
try:
person_result = self._person_updater.update_for_manga(mal_id)
print(f" Persons: chars={person_result['characters'].get('updated')} "
f"staff={person_result['staff'].get('updated')}")
except Exception as exc:
person_result = {"error": str(exc)}
print(f" Persons: ERROR {exc}")
return {"chapters": chapter_results, "persons": person_result}
# ------------------------------------------------------------------
# Internal: chapter
# ------------------------------------------------------------------
def _process_chapter(self,
builder: ComicInfoBuilder,
chapter_num: str,
chapter_dir: Path,
dest_series: Path) -> dict:
"""
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
optionally removes the source folder.
The cover image is saved as "000.<ext>" so it sorts before the
numbered story pages in the archive (ensuring Image=0 in the
<Pages> element correctly points to the front cover).
"""
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
try:
builder.chapter = chapter_num
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
builder.save_xml(chapter_dir)
_pack_to_cbz(chapter_dir, cbz_path)
if self._delete_source:
shutil.rmtree(chapter_dir)
return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True}
except Exception as exc:
return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": False, "error": str(exc)}
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
mover = SuwayomiMover(
SUWAYOMI_PATH,
KAVITA_PATH,
kavita_base_url=KAVITA_URL,
kavita_api_key=KAVITA_KEY,
delete_source=False
)
# Process a single series
result = mover.process_series("Yofukashi no Uta")
ok = sum(1 for c in result["chapters"] if c["ok"])
failed = sum(1 for c in result["chapters"] if not c["ok"])
print(f"\nDone: {ok} ok, {failed} failed")
for c in result["chapters"]:
if not c["ok"]:
print(f" Chapter {c['chapter']}: {c['error']}")
# Or process everything at once:
# results = mover.process_all()