Added Mover and other improvements
This commit is contained in:
+20
-3
@@ -365,8 +365,11 @@ class ComicInfoBuilder:
|
||||
add("Volume", volume)
|
||||
|
||||
# ----- Description with MAL stats -----------------------------------
|
||||
mal_id = self._mal_resolver.find_mal_id(
|
||||
md.get("title") or self._manga_title)
|
||||
# Prefer the MAL ID from MangaBaka's source map — avoids an extra
|
||||
# Jikan title-search request and is more reliable than fuzzy matching.
|
||||
mal_id = (self._mal_id_from_source(md)
|
||||
or self._mal_resolver.find_mal_id(
|
||||
md.get("title") or self._manga_title))
|
||||
mal_stats = self._mal_resolver.get_stats(mal_id)
|
||||
add("Summary", self._build_summary(md, sd, mal_stats))
|
||||
|
||||
@@ -417,7 +420,8 @@ class ComicInfoBuilder:
|
||||
pass
|
||||
|
||||
# ----- ISBN (GTIN) from volume work ---------------------------------
|
||||
isbn = (work or {}).get('identifiers')[0].get("id")
|
||||
identifiers = (work or {}).get("identifiers") or []
|
||||
isbn = identifiers[0].get("id") if identifiers else None
|
||||
add("GTIN", isbn)
|
||||
|
||||
# ----- SeriesGroup from related works -------------------------------
|
||||
@@ -741,6 +745,19 @@ class ComicInfoBuilder:
|
||||
return str(info["id"])
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _mal_id_from_source(md: dict) -> "int | None":
|
||||
for raw_key, info in (md.get("source") or {}).items():
|
||||
if _normalise_key(raw_key) in ("myanimelist", "mal"):
|
||||
if isinstance(info, dict):
|
||||
mid = info.get("id")
|
||||
if mid is not None:
|
||||
try:
|
||||
return int(mid)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _publishers_by_type(md: dict, ptype: str) -> "str | None":
|
||||
names = [p.get("name") for p in (md.get("publishers") or [])
|
||||
|
||||
@@ -416,7 +416,7 @@ if __name__ == "__main__":
|
||||
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
|
||||
|
||||
mal = MALResolver()
|
||||
mal_id = mal.find_mal_id("One Punch-Man")
|
||||
mal_id = mal.find_mal_id("よふかしのうた")
|
||||
print("MAL ID:", mal_id)
|
||||
|
||||
if mal_id:
|
||||
|
||||
+4
-1
@@ -171,7 +171,10 @@ class MALResolver:
|
||||
|
||||
detailed = self.get_characters_detailed(mal_id)
|
||||
names = [e["name"] for e in detailed if e.get("name")]
|
||||
self._char_names_cache[mal_id] = names
|
||||
if names:
|
||||
# Only cache a successful result — empty could be a transient
|
||||
# API failure and we want the next call to retry.
|
||||
self._char_names_cache[mal_id] = names
|
||||
return names
|
||||
|
||||
def get_characters_for_manga(self, title: str) -> list[str]:
|
||||
|
||||
@@ -0,0 +1,415 @@
|
||||
"""
|
||||
suwayomi_mover.py
|
||||
=================
|
||||
|
||||
Moves Suwayomi-downloaded manga chapters to a Kavita library path,
|
||||
generating enriched ComicInfo.xml metadata and packing each chapter
|
||||
folder into a CBZ archive. Optionally syncs Kavita person / character
|
||||
records with MyAnimeList data after each series is processed.
|
||||
|
||||
Suwayomi folder structure (input)
|
||||
----------------------------------
|
||||
<suwayomi_path>/
|
||||
<Source (lang)>/ e.g. "ComicK Fanmade (EN)"
|
||||
<Manga Title>/ e.g. "Yofukashi no Uta"
|
||||
Official_Chapter 1/ chapter folder — any prefix is fine
|
||||
001.webp
|
||||
...
|
||||
ComicInfo.xml Suwayomi's own basic XML (read + replaced)
|
||||
|
||||
Kavita folder structure (output)
|
||||
---------------------------------
|
||||
<kavita_path>/
|
||||
<Manga Title>/
|
||||
Official_Chapter 1.cbz CBZ archive: images + enriched ComicInfo.xml
|
||||
Official_Chapter 2.cbz
|
||||
...
|
||||
|
||||
Cover naming convention
|
||||
-----------------------
|
||||
The cover image is saved as "000.<ext>" inside each chapter folder so that
|
||||
it sorts before "001.webp", "002.webp", … in alphabetical order. This
|
||||
ensures the <Pages Image="0" Type="FrontCover"> assignment in ComicInfo.xml
|
||||
matches the actual file order inside the CBZ archive.
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
Pillow -> pip install pillow (optional, for image dimensions)
|
||||
|
||||
ComicInfoBuilder, MangadexVolumeResolver, MangaBakaWorksResolver,
|
||||
MALResolver, KavitaPersonUpdater must reside in the same directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from ComicInfoBuilder import ComicInfoBuilder
|
||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||
from MALResolver import MALResolver
|
||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||
|
||||
|
||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
||||
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
||||
|
||||
# Parenthetical source labels that Suwayomi appends to series names.
|
||||
# These are not part of the actual title and confuse MangaBaka searches.
|
||||
_SOURCE_LABEL_RE = re.compile(
|
||||
r'\s*\(\s*(?:official|unofficial|fan(?:\s*made)?|scanlation|'
|
||||
r'bato(?:to)?|mangadex|manga\s*plus|viz|yen\s*press|webtoon)\s*\)\s*$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _natural_key(name: str) -> list:
|
||||
return [int(p) if p.isdigit() else p.lower()
|
||||
for p in re.split(r"(\d+)", name)]
|
||||
|
||||
|
||||
_SUWAYOMI_WANTED = {"Title", "Series", "Number", "Summary",
|
||||
"Writer", "Penciller", "Genre", "Web",
|
||||
"Year", "Month", "Day"}
|
||||
|
||||
|
||||
def _read_suwayomi_fields(chapter_dir: Path) -> dict:
|
||||
"""
|
||||
Reads metadata from Suwayomi's ComicInfo.xml inside a chapter folder.
|
||||
|
||||
Returns a dict of whichever fields are present, e.g.:
|
||||
{"Number": "3", "Series": "Dungeon Odyssey", "Title": "Chapter 3", ...}
|
||||
Returns an empty dict if the file is missing or unparseable.
|
||||
"""
|
||||
xml_path = chapter_dir / "ComicInfo.xml"
|
||||
if not xml_path.is_file():
|
||||
return {}
|
||||
try:
|
||||
root = ET.parse(xml_path).getroot()
|
||||
except ET.ParseError:
|
||||
return {}
|
||||
result = {}
|
||||
for child in root:
|
||||
tag = child.tag.split("}")[-1]
|
||||
if tag in _SUWAYOMI_WANTED and child.text and child.text.strip():
|
||||
result[tag] = child.text.strip()
|
||||
return result
|
||||
|
||||
|
||||
def _clean_suwayomi_title(title: str) -> str:
|
||||
"""
|
||||
Removes Suwayomi source annotations from a series title.
|
||||
|
||||
Suwayomi sometimes appends the translation group / source type in
|
||||
parentheses, e.g. "Wistoria: Wand and Sword (Official)". These labels
|
||||
are not part of the canonical title and break MangaBaka / MAL lookups.
|
||||
"""
|
||||
return _SOURCE_LABEL_RE.sub("", title).strip()
|
||||
|
||||
|
||||
def _mal_id_from_metadata(md: dict) -> "int | None":
|
||||
"""
|
||||
Extracts the MAL ID directly from a MangaBaka series dict.
|
||||
|
||||
MangaBaka stores tracker IDs in md["source"], e.g.:
|
||||
{"myanimelist": {"id": 121480}, "mangadex": {"id": "..."}, ...}
|
||||
|
||||
Returns the integer MAL ID, or None if not present.
|
||||
"""
|
||||
for raw_key, info in (md.get("source") or {}).items():
|
||||
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
|
||||
if isinstance(info, dict):
|
||||
mal_id = info.get("id")
|
||||
if mal_id is not None:
|
||||
try:
|
||||
return int(mal_id)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _extract_chapter_num(folder_name: str) -> "str | None":
|
||||
"""
|
||||
Fallback: extracts chapter number from the folder name.
|
||||
Examples: "Chapter 10" -> "10", "Official_Chapter 10.5" -> "10.5"
|
||||
"""
|
||||
m = _CHAPTER_RE.search(folder_name)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def _chapter_sort_key(folder_name: str) -> tuple:
|
||||
"""Numeric sort key for chapter folder names."""
|
||||
num = _extract_chapter_num(folder_name)
|
||||
if num is None:
|
||||
return (float("inf"), folder_name)
|
||||
return (float(num), folder_name)
|
||||
|
||||
|
||||
def _pack_to_cbz(folder: Path, dest: Path) -> None:
|
||||
"""
|
||||
Packs all files in `folder` into a CBZ archive at `dest`.
|
||||
|
||||
Images are stored in natural-sort order (so "000.jpg" < "001.webp").
|
||||
ComicInfo.xml is appended last so image indices in the archive match
|
||||
the <Pages> entries written by ComicInfoBuilder.
|
||||
Files are stored without compression (ZIP_STORED) since the source
|
||||
images are already compressed (webp / jpg / png / …).
|
||||
"""
|
||||
images = sorted(
|
||||
[f for f in folder.iterdir()
|
||||
if f.is_file() and f.suffix.lower() in _IMAGE_EXTS],
|
||||
key=lambda p: _natural_key(p.name),
|
||||
)
|
||||
extras = [
|
||||
f for f in folder.iterdir()
|
||||
if f.is_file() and f.suffix.lower() not in _IMAGE_EXTS
|
||||
]
|
||||
|
||||
with zipfile.ZipFile(dest, "w", zipfile.ZIP_STORED) as zf:
|
||||
for f in images:
|
||||
zf.write(f, f.name)
|
||||
for f in extras:
|
||||
zf.write(f, f.name)
|
||||
|
||||
|
||||
class SuwayomiMover:
|
||||
"""
|
||||
Scans a Suwayomi download directory, generates enriched ComicInfo.xml
|
||||
for each chapter, packs each chapter folder into a CBZ archive, and
|
||||
moves the result to a Kavita library path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
suwayomi_path : Root of Suwayomi downloads.
|
||||
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
|
||||
kavita_path : Root of the Kavita library.
|
||||
Series sub-directories are created automatically.
|
||||
kavita_base_url : Kavita server URL — required only for person sync,
|
||||
e.g. "http://192.168.2.2:5000".
|
||||
kavita_api_key : Kavita API key — required only for person sync.
|
||||
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
||||
request_timeout : HTTP timeout in seconds for all API / image requests.
|
||||
delete_source : Remove the source chapter folder after successful pack.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
suwayomi_path,
|
||||
kavita_path,
|
||||
*,
|
||||
kavita_base_url: "str | None" = None,
|
||||
kavita_api_key: "str | None" = None,
|
||||
language: str = "en",
|
||||
request_timeout: int = 30,
|
||||
delete_source: bool = True):
|
||||
self._src = Path(suwayomi_path)
|
||||
self._dst = Path(kavita_path)
|
||||
self._language = language
|
||||
self._timeout = request_timeout
|
||||
self._delete_source = delete_source
|
||||
|
||||
# Shared HTTP session and resolvers — reused across all series/chapters
|
||||
# to maximise cache hits and minimise API round-trips.
|
||||
session = requests.Session()
|
||||
session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
|
||||
self._session = session
|
||||
|
||||
self._mal = MALResolver(request_timeout=request_timeout)
|
||||
self._vol_resolver = MangaDexVolumeResolver(
|
||||
request_timeout=request_timeout, session=session)
|
||||
self._works_resolver = MangaBakaWorksResolver(
|
||||
request_timeout=request_timeout, session=session)
|
||||
|
||||
self._person_updater: "KavitaPersonUpdater | None" = None
|
||||
if kavita_base_url and kavita_api_key:
|
||||
self._person_updater = KavitaPersonUpdater(
|
||||
kavita_base_url, kavita_api_key,
|
||||
mal_resolver=self._mal,
|
||||
request_timeout=request_timeout)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
def process_all(self) -> dict:
|
||||
"""
|
||||
Processes every manga series found under the Suwayomi root.
|
||||
|
||||
Walks two directory levels deep:
|
||||
<suwayomi_path>/<Source dir>/<Manga Title>/
|
||||
|
||||
Returns a dict keyed by manga title, each value being the result
|
||||
dict from _process_series_dir.
|
||||
"""
|
||||
results: dict = {}
|
||||
for source_dir in sorted(self._src.iterdir()):
|
||||
if not source_dir.is_dir():
|
||||
continue
|
||||
for manga_dir in sorted(source_dir.iterdir()):
|
||||
if not manga_dir.is_dir():
|
||||
continue
|
||||
title = manga_dir.name
|
||||
print(f"[SuwayomiMover] {title}")
|
||||
results[title] = self._process_series_dir(manga_dir)
|
||||
return results
|
||||
|
||||
def process_series(self, manga_title: str) -> dict:
|
||||
"""
|
||||
Processes all chapters for a single series, located by title.
|
||||
|
||||
Searches every source sub-directory under the Suwayomi root for a
|
||||
directory whose name matches `manga_title` exactly.
|
||||
Raises FileNotFoundError if no matching directory is found.
|
||||
"""
|
||||
for source_dir in sorted(self._src.iterdir()):
|
||||
if not source_dir.is_dir():
|
||||
continue
|
||||
candidate = source_dir / manga_title
|
||||
if candidate.is_dir():
|
||||
return self._process_series_dir(candidate)
|
||||
raise FileNotFoundError(
|
||||
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: series
|
||||
# ------------------------------------------------------------------
|
||||
def _process_series_dir(self, manga_dir: Path) -> dict:
|
||||
manga_title = manga_dir.name
|
||||
|
||||
chapter_dirs = sorted(
|
||||
(d for d in manga_dir.iterdir() if d.is_dir()),
|
||||
key=lambda p: _chapter_sort_key(p.name),
|
||||
)
|
||||
|
||||
# Read all chapter XMLs upfront to resolve chapter numbers and series name.
|
||||
chapter_items: list[tuple[Path, dict, str]] = []
|
||||
for chapter_dir in chapter_dirs:
|
||||
fields = _read_suwayomi_fields(chapter_dir)
|
||||
chapter_num = (fields.get("Number")
|
||||
or _extract_chapter_num(chapter_dir.name))
|
||||
if chapter_num is None:
|
||||
print(f" [skip] {chapter_dir.name} — no chapter number")
|
||||
continue
|
||||
chapter_items.append((chapter_dir, fields, chapter_num))
|
||||
|
||||
# <Series> from the first chapter's XML → strip source labels → clean title
|
||||
# for the MangaBaka search. Folder name is the last resort.
|
||||
raw_series = manga_title
|
||||
if chapter_items:
|
||||
xml_series = chapter_items[0][1].get("Series")
|
||||
if xml_series:
|
||||
raw_series = xml_series
|
||||
builder_title = _clean_suwayomi_title(raw_series)
|
||||
|
||||
# One builder per series — metadata fetched once, reused for all chapters.
|
||||
builder = ComicInfoBuilder(
|
||||
builder_title, chapter=1,
|
||||
language=self._language,
|
||||
request_timeout=self._timeout,
|
||||
session=self._session,
|
||||
volume_resolver=self._vol_resolver,
|
||||
works_resolver=self._works_resolver,
|
||||
mal_resolver=self._mal,
|
||||
)
|
||||
|
||||
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
||||
md: "dict | None" = None
|
||||
mangabaka_title = manga_title
|
||||
try:
|
||||
md = builder.fetch_metadata()
|
||||
mangabaka_title = md.get("title") or manga_title
|
||||
except Exception as exc:
|
||||
print(f" [warn] metadata fetch failed: {exc}")
|
||||
|
||||
# Destination folder uses the MangaBaka canonical title.
|
||||
dest_series = self._dst / mangabaka_title
|
||||
dest_series.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
chapter_results: list[dict] = []
|
||||
for chapter_dir, _fields, chapter_num in chapter_items:
|
||||
result = self._process_chapter(
|
||||
builder, chapter_num, chapter_dir, dest_series)
|
||||
chapter_results.append(result)
|
||||
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
|
||||
print(f" Chapter {chapter_num}: {status}")
|
||||
|
||||
# Sync Kavita persons once per series.
|
||||
# MAL ID comes directly from MangaBaka; no extra Jikan title search needed.
|
||||
person_result: "dict | None" = None
|
||||
if self._person_updater:
|
||||
mal_id = (_mal_id_from_metadata(md) if md else None
|
||||
or self._mal.find_mal_id(builder_title))
|
||||
if mal_id:
|
||||
try:
|
||||
person_result = self._person_updater.update_for_manga(mal_id)
|
||||
print(f" Persons: chars={person_result['characters'].get('updated')} "
|
||||
f"staff={person_result['staff'].get('updated')}")
|
||||
except Exception as exc:
|
||||
person_result = {"error": str(exc)}
|
||||
print(f" Persons: ERROR {exc}")
|
||||
|
||||
return {"chapters": chapter_results, "persons": person_result}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: chapter
|
||||
# ------------------------------------------------------------------
|
||||
def _process_chapter(self,
|
||||
builder: ComicInfoBuilder,
|
||||
chapter_num: str,
|
||||
chapter_dir: Path,
|
||||
dest_series: Path) -> dict:
|
||||
"""
|
||||
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
|
||||
optionally removes the source folder.
|
||||
|
||||
The cover image is saved as "000.<ext>" so it sorts before the
|
||||
numbered story pages in the archive (ensuring Image=0 in the
|
||||
<Pages> element correctly points to the front cover).
|
||||
"""
|
||||
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
|
||||
try:
|
||||
builder.chapter = chapter_num
|
||||
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
|
||||
builder.save_xml(chapter_dir)
|
||||
_pack_to_cbz(chapter_dir, cbz_path)
|
||||
if self._delete_source:
|
||||
shutil.rmtree(chapter_dir)
|
||||
return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True}
|
||||
except Exception as exc:
|
||||
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
||||
"ok": False, "error": str(exc)}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas"
|
||||
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||
KAVITA_URL = "http://192.168.2.2:5000"
|
||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
||||
|
||||
mover = SuwayomiMover(
|
||||
SUWAYOMI_PATH,
|
||||
KAVITA_PATH,
|
||||
kavita_base_url=KAVITA_URL,
|
||||
kavita_api_key=KAVITA_KEY,
|
||||
delete_source=False
|
||||
)
|
||||
|
||||
# Process a single series
|
||||
result = mover.process_series("Yofukashi no Uta")
|
||||
ok = sum(1 for c in result["chapters"] if c["ok"])
|
||||
failed = sum(1 for c in result["chapters"] if not c["ok"])
|
||||
print(f"\nDone: {ok} ok, {failed} failed")
|
||||
for c in result["chapters"]:
|
||||
if not c["ok"]:
|
||||
print(f" Chapter {c['chapter']}: {c['error']}")
|
||||
|
||||
# Or process everything at once:
|
||||
# results = mover.process_all()
|
||||
Reference in New Issue
Block a user