diff --git a/src/ComicInfoBuilder.py b/src/ComicInfoBuilder.py index 91b8df1..d3a28ce 100644 --- a/src/ComicInfoBuilder.py +++ b/src/ComicInfoBuilder.py @@ -365,8 +365,11 @@ class ComicInfoBuilder: add("Volume", volume) # ----- Description with MAL stats ----------------------------------- - mal_id = self._mal_resolver.find_mal_id( - md.get("title") or self._manga_title) + # Prefer the MAL ID from MangaBaka's source map — avoids an extra + # Jikan title-search request and is more reliable than fuzzy matching. + mal_id = (self._mal_id_from_source(md) + or self._mal_resolver.find_mal_id( + md.get("title") or self._manga_title)) mal_stats = self._mal_resolver.get_stats(mal_id) add("Summary", self._build_summary(md, sd, mal_stats)) @@ -417,7 +420,8 @@ class ComicInfoBuilder: pass # ----- ISBN (GTIN) from volume work --------------------------------- - isbn = (work or {}).get('identifiers')[0].get("id") + identifiers = (work or {}).get("identifiers") or [] + isbn = identifiers[0].get("id") if identifiers else None add("GTIN", isbn) # ----- SeriesGroup from related works ------------------------------- @@ -741,6 +745,19 @@ class ComicInfoBuilder: return str(info["id"]) return None + @staticmethod + def _mal_id_from_source(md: dict) -> "int | None": + for raw_key, info in (md.get("source") or {}).items(): + if _normalise_key(raw_key) in ("myanimelist", "mal"): + if isinstance(info, dict): + mid = info.get("id") + if mid is not None: + try: + return int(mid) + except (TypeError, ValueError): + pass + return None + @staticmethod def _publishers_by_type(md: dict, ptype: str) -> "str | None": names = [p.get("name") for p in (md.get("publishers") or []) diff --git a/src/KavitaPersonUpdater.py b/src/KavitaPersonUpdater.py index 859f05a..77e1f58 100644 --- a/src/KavitaPersonUpdater.py +++ b/src/KavitaPersonUpdater.py @@ -416,7 +416,7 @@ if __name__ == "__main__": updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY) mal = MALResolver() - mal_id = mal.find_mal_id("One Punch-Man") + mal_id = mal.find_mal_id("よふかしのうた") print("MAL ID:", mal_id) if mal_id: diff --git a/src/MALResolver.py b/src/MALResolver.py index 1f6b7f6..08ecf09 100644 --- a/src/MALResolver.py +++ b/src/MALResolver.py @@ -171,7 +171,10 @@ class MALResolver: detailed = self.get_characters_detailed(mal_id) names = [e["name"] for e in detailed if e.get("name")] - self._char_names_cache[mal_id] = names + if names: + # Only cache a successful result — empty could be a transient + # API failure and we want the next call to retry. + self._char_names_cache[mal_id] = names return names def get_characters_for_manga(self, title: str) -> list[str]: diff --git a/src/SuwayomiMover.py b/src/SuwayomiMover.py new file mode 100644 index 0000000..ab29468 --- /dev/null +++ b/src/SuwayomiMover.py @@ -0,0 +1,415 @@ +""" +suwayomi_mover.py +================= + +Moves Suwayomi-downloaded manga chapters to a Kavita library path, +generating enriched ComicInfo.xml metadata and packing each chapter +folder into a CBZ archive. Optionally syncs Kavita person / character +records with MyAnimeList data after each series is processed. + +Suwayomi folder structure (input) +---------------------------------- + / + / e.g. "ComicK Fanmade (EN)" + / e.g. "Yofukashi no Uta" + Official_Chapter 1/ chapter folder — any prefix is fine + 001.webp + ... + ComicInfo.xml Suwayomi's own basic XML (read + replaced) + +Kavita folder structure (output) +--------------------------------- + / + / + Official_Chapter 1.cbz CBZ archive: images + enriched ComicInfo.xml + Official_Chapter 2.cbz + ... + +Cover naming convention +----------------------- +The cover image is saved as "000." inside each chapter folder so that +it sorts before "001.webp", "002.webp", … in alphabetical order. This +ensures the assignment in ComicInfo.xml +matches the actual file order inside the CBZ archive. + +Dependencies +------------ + requests -> pip install requests + Pillow -> pip install pillow (optional, for image dimensions) + + ComicInfoBuilder, MangadexVolumeResolver, MangaBakaWorksResolver, + MALResolver, KavitaPersonUpdater must reside in the same directory. +""" + +from __future__ import annotations + +import re +import shutil +import xml.etree.ElementTree as ET +import zipfile +from pathlib import Path + +import requests + +from ComicInfoBuilder import ComicInfoBuilder +from MangadexVolumeResolver import MangaDexVolumeResolver +from MangaBakaWorksResolver import MangaBakaWorksResolver +from MALResolver import MALResolver +from KavitaPersonUpdater import KavitaPersonUpdater + + +_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} +_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') + +# Parenthetical source labels that Suwayomi appends to series names. +# These are not part of the actual title and confuse MangaBaka searches. +_SOURCE_LABEL_RE = re.compile( + r'\s*\(\s*(?:official|unofficial|fan(?:\s*made)?|scanlation|' + r'bato(?:to)?|mangadex|manga\s*plus|viz|yen\s*press|webtoon)\s*\)\s*$', + re.IGNORECASE, +) + + +def _natural_key(name: str) -> list: + return [int(p) if p.isdigit() else p.lower() + for p in re.split(r"(\d+)", name)] + + +_SUWAYOMI_WANTED = {"Title", "Series", "Number", "Summary", + "Writer", "Penciller", "Genre", "Web", + "Year", "Month", "Day"} + + +def _read_suwayomi_fields(chapter_dir: Path) -> dict: + """ + Reads metadata from Suwayomi's ComicInfo.xml inside a chapter folder. + + Returns a dict of whichever fields are present, e.g.: + {"Number": "3", "Series": "Dungeon Odyssey", "Title": "Chapter 3", ...} + Returns an empty dict if the file is missing or unparseable. + """ + xml_path = chapter_dir / "ComicInfo.xml" + if not xml_path.is_file(): + return {} + try: + root = ET.parse(xml_path).getroot() + except ET.ParseError: + return {} + result = {} + for child in root: + tag = child.tag.split("}")[-1] + if tag in _SUWAYOMI_WANTED and child.text and child.text.strip(): + result[tag] = child.text.strip() + return result + + +def _clean_suwayomi_title(title: str) -> str: + """ + Removes Suwayomi source annotations from a series title. + + Suwayomi sometimes appends the translation group / source type in + parentheses, e.g. "Wistoria: Wand and Sword (Official)". These labels + are not part of the canonical title and break MangaBaka / MAL lookups. + """ + return _SOURCE_LABEL_RE.sub("", title).strip() + + +def _mal_id_from_metadata(md: dict) -> "int | None": + """ + Extracts the MAL ID directly from a MangaBaka series dict. + + MangaBaka stores tracker IDs in md["source"], e.g.: + {"myanimelist": {"id": 121480}, "mangadex": {"id": "..."}, ...} + + Returns the integer MAL ID, or None if not present. + """ + for raw_key, info in (md.get("source") or {}).items(): + if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"): + if isinstance(info, dict): + mal_id = info.get("id") + if mal_id is not None: + try: + return int(mal_id) + except (TypeError, ValueError): + pass + return None + + +def _extract_chapter_num(folder_name: str) -> "str | None": + """ + Fallback: extracts chapter number from the folder name. + Examples: "Chapter 10" -> "10", "Official_Chapter 10.5" -> "10.5" + """ + m = _CHAPTER_RE.search(folder_name) + return m.group(1) if m else None + + +def _chapter_sort_key(folder_name: str) -> tuple: + """Numeric sort key for chapter folder names.""" + num = _extract_chapter_num(folder_name) + if num is None: + return (float("inf"), folder_name) + return (float(num), folder_name) + + +def _pack_to_cbz(folder: Path, dest: Path) -> None: + """ + Packs all files in `folder` into a CBZ archive at `dest`. + + Images are stored in natural-sort order (so "000.jpg" < "001.webp"). + ComicInfo.xml is appended last so image indices in the archive match + the entries written by ComicInfoBuilder. + Files are stored without compression (ZIP_STORED) since the source + images are already compressed (webp / jpg / png / …). + """ + images = sorted( + [f for f in folder.iterdir() + if f.is_file() and f.suffix.lower() in _IMAGE_EXTS], + key=lambda p: _natural_key(p.name), + ) + extras = [ + f for f in folder.iterdir() + if f.is_file() and f.suffix.lower() not in _IMAGE_EXTS + ] + + with zipfile.ZipFile(dest, "w", zipfile.ZIP_STORED) as zf: + for f in images: + zf.write(f, f.name) + for f in extras: + zf.write(f, f.name) + + +class SuwayomiMover: + """ + Scans a Suwayomi download directory, generates enriched ComicInfo.xml + for each chapter, packs each chapter folder into a CBZ archive, and + moves the result to a Kavita library path. + + Parameters + ---------- + suwayomi_path : Root of Suwayomi downloads. + Expected layout: ///<Chapter N>/ + kavita_path : Root of the Kavita library. + Series sub-directories are created automatically. + kavita_base_url : Kavita server URL — required only for person sync, + e.g. "http://192.168.2.2:5000". + kavita_api_key : Kavita API key — required only for person sync. + language : ComicInfo LanguageISO and SeriesSort language ("en"). + request_timeout : HTTP timeout in seconds for all API / image requests. + delete_source : Remove the source chapter folder after successful pack. + """ + + def __init__(self, + suwayomi_path, + kavita_path, + *, + kavita_base_url: "str | None" = None, + kavita_api_key: "str | None" = None, + language: str = "en", + request_timeout: int = 30, + delete_source: bool = True): + self._src = Path(suwayomi_path) + self._dst = Path(kavita_path) + self._language = language + self._timeout = request_timeout + self._delete_source = delete_source + + # Shared HTTP session and resolvers — reused across all series/chapters + # to maximise cache hits and minimise API round-trips. + session = requests.Session() + session.headers.setdefault("User-Agent", "SuwayomiMover/1.0") + self._session = session + + self._mal = MALResolver(request_timeout=request_timeout) + self._vol_resolver = MangaDexVolumeResolver( + request_timeout=request_timeout, session=session) + self._works_resolver = MangaBakaWorksResolver( + request_timeout=request_timeout, session=session) + + self._person_updater: "KavitaPersonUpdater | None" = None + if kavita_base_url and kavita_api_key: + self._person_updater = KavitaPersonUpdater( + kavita_base_url, kavita_api_key, + mal_resolver=self._mal, + request_timeout=request_timeout) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + def process_all(self) -> dict: + """ + Processes every manga series found under the Suwayomi root. + + Walks two directory levels deep: + <suwayomi_path>/<Source dir>/<Manga Title>/ + + Returns a dict keyed by manga title, each value being the result + dict from _process_series_dir. + """ + results: dict = {} + for source_dir in sorted(self._src.iterdir()): + if not source_dir.is_dir(): + continue + for manga_dir in sorted(source_dir.iterdir()): + if not manga_dir.is_dir(): + continue + title = manga_dir.name + print(f"[SuwayomiMover] {title}") + results[title] = self._process_series_dir(manga_dir) + return results + + def process_series(self, manga_title: str) -> dict: + """ + Processes all chapters for a single series, located by title. + + Searches every source sub-directory under the Suwayomi root for a + directory whose name matches `manga_title` exactly. + Raises FileNotFoundError if no matching directory is found. + """ + for source_dir in sorted(self._src.iterdir()): + if not source_dir.is_dir(): + continue + candidate = source_dir / manga_title + if candidate.is_dir(): + return self._process_series_dir(candidate) + raise FileNotFoundError( + f"No Suwayomi directory found for '{manga_title}' under {self._src}") + + # ------------------------------------------------------------------ + # Internal: series + # ------------------------------------------------------------------ + def _process_series_dir(self, manga_dir: Path) -> dict: + manga_title = manga_dir.name + + chapter_dirs = sorted( + (d for d in manga_dir.iterdir() if d.is_dir()), + key=lambda p: _chapter_sort_key(p.name), + ) + + # Read all chapter XMLs upfront to resolve chapter numbers and series name. + chapter_items: list[tuple[Path, dict, str]] = [] + for chapter_dir in chapter_dirs: + fields = _read_suwayomi_fields(chapter_dir) + chapter_num = (fields.get("Number") + or _extract_chapter_num(chapter_dir.name)) + if chapter_num is None: + print(f" [skip] {chapter_dir.name} — no chapter number") + continue + chapter_items.append((chapter_dir, fields, chapter_num)) + + # <Series> from the first chapter's XML → strip source labels → clean title + # for the MangaBaka search. Folder name is the last resort. + raw_series = manga_title + if chapter_items: + xml_series = chapter_items[0][1].get("Series") + if xml_series: + raw_series = xml_series + builder_title = _clean_suwayomi_title(raw_series) + + # One builder per series — metadata fetched once, reused for all chapters. + builder = ComicInfoBuilder( + builder_title, chapter=1, + language=self._language, + request_timeout=self._timeout, + session=self._session, + volume_resolver=self._vol_resolver, + works_resolver=self._works_resolver, + mal_resolver=self._mal, + ) + + # Fetch MangaBaka metadata now to get the canonical title and MAL ID. + md: "dict | None" = None + mangabaka_title = manga_title + try: + md = builder.fetch_metadata() + mangabaka_title = md.get("title") or manga_title + except Exception as exc: + print(f" [warn] metadata fetch failed: {exc}") + + # Destination folder uses the MangaBaka canonical title. + dest_series = self._dst / mangabaka_title + dest_series.mkdir(parents=True, exist_ok=True) + + chapter_results: list[dict] = [] + for chapter_dir, _fields, chapter_num in chapter_items: + result = self._process_chapter( + builder, chapter_num, chapter_dir, dest_series) + chapter_results.append(result) + status = "ok" if result["ok"] else f"ERROR: {result.get('error')}" + print(f" Chapter {chapter_num}: {status}") + + # Sync Kavita persons once per series. + # MAL ID comes directly from MangaBaka; no extra Jikan title search needed. + person_result: "dict | None" = None + if self._person_updater: + mal_id = (_mal_id_from_metadata(md) if md else None + or self._mal.find_mal_id(builder_title)) + if mal_id: + try: + person_result = self._person_updater.update_for_manga(mal_id) + print(f" Persons: chars={person_result['characters'].get('updated')} " + f"staff={person_result['staff'].get('updated')}") + except Exception as exc: + person_result = {"error": str(exc)} + print(f" Persons: ERROR {exc}") + + return {"chapters": chapter_results, "persons": person_result} + + # ------------------------------------------------------------------ + # Internal: chapter + # ------------------------------------------------------------------ + def _process_chapter(self, + builder: ComicInfoBuilder, + chapter_num: str, + chapter_dir: Path, + dest_series: Path) -> dict: + """ + Generates ComicInfo.xml for one chapter, packs it to CBZ, and + optionally removes the source folder. + + The cover image is saved as "000.<ext>" so it sorts before the + numbered story pages in the archive (ensuring Image=0 in the + <Pages> element correctly points to the front cover). + """ + cbz_path = dest_series / f"{chapter_dir.name}.cbz" + try: + builder.chapter = chapter_num + builder.add_pages_from_folder(chapter_dir, cover_filename="000") + builder.save_xml(chapter_dir) + _pack_to_cbz(chapter_dir, cbz_path) + if self._delete_source: + shutil.rmtree(chapter_dir) + return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True} + except Exception as exc: + return {"chapter": chapter_num, "cbz": str(cbz_path), + "ok": False, "error": str(exc)} + + +# -------------------------------------------------------------------------- +# Usage example +# -------------------------------------------------------------------------- +if __name__ == "__main__": + SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas" + KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test" + KAVITA_URL = "http://192.168.2.2:5000" + KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA" + + mover = SuwayomiMover( + SUWAYOMI_PATH, + KAVITA_PATH, + kavita_base_url=KAVITA_URL, + kavita_api_key=KAVITA_KEY, + delete_source=False + ) + + # Process a single series + result = mover.process_series("Yofukashi no Uta") + ok = sum(1 for c in result["chapters"] if c["ok"]) + failed = sum(1 for c in result["chapters"] if not c["ok"]) + print(f"\nDone: {ok} ok, {failed} failed") + for c in result["chapters"]: + if not c["ok"]: + print(f" Chapter {c['chapter']}: {c['error']}") + + # Or process everything at once: + # results = mover.process_all()