Added Mover and other improvements

2026-05-23 16:09:13 +02:00
parent 3dab98cb41
commit 377aff34d0
4 changed files with 440 additions and 5 deletions
@@ -365,8 +365,11 @@ class ComicInfoBuilder:
        add("Volume", volume)

        # ----- Description with MAL stats -----------------------------------
-        mal_id = self._mal_resolver.find_mal_id(
-            md.get("title") or self._manga_title)
+        # Prefer the MAL ID from MangaBaka's source map — avoids an extra
+        # Jikan title-search request and is more reliable than fuzzy matching.
+        mal_id = (self._mal_id_from_source(md)
+                  or self._mal_resolver.find_mal_id(
+                      md.get("title") or self._manga_title))
        mal_stats = self._mal_resolver.get_stats(mal_id)
        add("Summary", self._build_summary(md, sd, mal_stats))

@@ -417,7 +420,8 @@ class ComicInfoBuilder:
                pass

        # ----- ISBN (GTIN) from volume work ---------------------------------
-        isbn = (work or {}).get('identifiers')[0].get("id")
+        identifiers = (work or {}).get("identifiers") or []
+        isbn = identifiers[0].get("id") if identifiers else None
        add("GTIN", isbn)

        # ----- SeriesGroup from related works -------------------------------
@@ -741,6 +745,19 @@ class ComicInfoBuilder:
                    return str(info["id"])
        return None

+    @staticmethod
+    def _mal_id_from_source(md: dict) -> "int | None":
+        for raw_key, info in (md.get("source") or {}).items():
+            if _normalise_key(raw_key) in ("myanimelist", "mal"):
+                if isinstance(info, dict):
+                    mid = info.get("id")
+                    if mid is not None:
+                        try:
+                            return int(mid)
+                        except (TypeError, ValueError):
+                            pass
+        return None
+
    @staticmethod
    def _publishers_by_type(md: dict, ptype: str) -> "str | None":
        names = [p.get("name") for p in (md.get("publishers") or [])
@@ -416,7 +416,7 @@ if __name__ == "__main__":
    updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)

    mal = MALResolver()
-    mal_id = mal.find_mal_id("One Punch-Man")
+    mal_id = mal.find_mal_id("よふかしのうた")
    print("MAL ID:", mal_id)

    if mal_id:
@@ -171,7 +171,10 @@ class MALResolver:

        detailed = self.get_characters_detailed(mal_id)
        names = [e["name"] for e in detailed if e.get("name")]
-        self._char_names_cache[mal_id] = names
+        if names:
+            # Only cache a successful result — empty could be a transient
+            # API failure and we want the next call to retry.
+            self._char_names_cache[mal_id] = names
        return names

    def get_characters_for_manga(self, title: str) -> list[str]:
@@ -0,0 +1,415 @@
+"""
+suwayomi_mover.py
+=================
+
+Moves Suwayomi-downloaded manga chapters to a Kavita library path,
+generating enriched ComicInfo.xml metadata and packing each chapter
+folder into a CBZ archive.  Optionally syncs Kavita person / character
+records with MyAnimeList data after each series is processed.
+
+Suwayomi folder structure (input)
+----------------------------------
+  <suwayomi_path>/
+    <Source (lang)>/                    e.g. "ComicK Fanmade (EN)"
+      <Manga Title>/                    e.g. "Yofukashi no Uta"
+        Official_Chapter 1/             chapter folder — any prefix is fine
+          001.webp
+          ...
+          ComicInfo.xml                 Suwayomi's own basic XML (read + replaced)
+
+Kavita folder structure (output)
+---------------------------------
+  <kavita_path>/
+    <Manga Title>/
+      Official_Chapter 1.cbz           CBZ archive: images + enriched ComicInfo.xml
+      Official_Chapter 2.cbz
+      ...
+
+Cover naming convention
+-----------------------
+The cover image is saved as "000.<ext>" inside each chapter folder so that
+it sorts before "001.webp", "002.webp", … in alphabetical order.  This
+ensures the <Pages Image="0" Type="FrontCover"> assignment in ComicInfo.xml
+matches the actual file order inside the CBZ archive.
+
+Dependencies
+------------
+    requests    -> pip install requests
+    Pillow      -> pip install pillow   (optional, for image dimensions)
+
+    ComicInfoBuilder, MangadexVolumeResolver, MangaBakaWorksResolver,
+    MALResolver, KavitaPersonUpdater must reside in the same directory.
+"""
+
+from __future__ import annotations
+
+import re
+import shutil
+import xml.etree.ElementTree as ET
+import zipfile
+from pathlib import Path
+
+import requests
+
+from ComicInfoBuilder import ComicInfoBuilder
+from MangadexVolumeResolver import MangaDexVolumeResolver
+from MangaBakaWorksResolver import MangaBakaWorksResolver
+from MALResolver import MALResolver
+from KavitaPersonUpdater import KavitaPersonUpdater
+
+
+_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
+_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
+
+# Parenthetical source labels that Suwayomi appends to series names.
+# These are not part of the actual title and confuse MangaBaka searches.
+_SOURCE_LABEL_RE = re.compile(
+    r'\s*\(\s*(?:official|unofficial|fan(?:\s*made)?|scanlation|'
+    r'bato(?:to)?|mangadex|manga\s*plus|viz|yen\s*press|webtoon)\s*\)\s*$',
+    re.IGNORECASE,
+)
+
+
+def _natural_key(name: str) -> list:
+    return [int(p) if p.isdigit() else p.lower()
+            for p in re.split(r"(\d+)", name)]
+
+
+_SUWAYOMI_WANTED = {"Title", "Series", "Number", "Summary",
+                    "Writer", "Penciller", "Genre", "Web",
+                    "Year", "Month", "Day"}
+
+
+def _read_suwayomi_fields(chapter_dir: Path) -> dict:
+    """
+    Reads metadata from Suwayomi's ComicInfo.xml inside a chapter folder.
+
+    Returns a dict of whichever fields are present, e.g.:
+      {"Number": "3", "Series": "Dungeon Odyssey", "Title": "Chapter 3", ...}
+    Returns an empty dict if the file is missing or unparseable.
+    """
+    xml_path = chapter_dir / "ComicInfo.xml"
+    if not xml_path.is_file():
+        return {}
+    try:
+        root = ET.parse(xml_path).getroot()
+    except ET.ParseError:
+        return {}
+    result = {}
+    for child in root:
+        tag = child.tag.split("}")[-1]
+        if tag in _SUWAYOMI_WANTED and child.text and child.text.strip():
+            result[tag] = child.text.strip()
+    return result
+
+
+def _clean_suwayomi_title(title: str) -> str:
+    """
+    Removes Suwayomi source annotations from a series title.
+
+    Suwayomi sometimes appends the translation group / source type in
+    parentheses, e.g. "Wistoria: Wand and Sword (Official)".  These labels
+    are not part of the canonical title and break MangaBaka / MAL lookups.
+    """
+    return _SOURCE_LABEL_RE.sub("", title).strip()
+
+
+def _mal_id_from_metadata(md: dict) -> "int | None":
+    """
+    Extracts the MAL ID directly from a MangaBaka series dict.
+
+    MangaBaka stores tracker IDs in md["source"], e.g.:
+      {"myanimelist": {"id": 121480}, "mangadex": {"id": "..."}, ...}
+
+    Returns the integer MAL ID, or None if not present.
+    """
+    for raw_key, info in (md.get("source") or {}).items():
+        if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
+            if isinstance(info, dict):
+                mal_id = info.get("id")
+                if mal_id is not None:
+                    try:
+                        return int(mal_id)
+                    except (TypeError, ValueError):
+                        pass
+    return None
+
+
+def _extract_chapter_num(folder_name: str) -> "str | None":
+    """
+    Fallback: extracts chapter number from the folder name.
+    Examples: "Chapter 10" -> "10", "Official_Chapter 10.5" -> "10.5"
+    """
+    m = _CHAPTER_RE.search(folder_name)
+    return m.group(1) if m else None
+
+
+def _chapter_sort_key(folder_name: str) -> tuple:
+    """Numeric sort key for chapter folder names."""
+    num = _extract_chapter_num(folder_name)
+    if num is None:
+        return (float("inf"), folder_name)
+    return (float(num), folder_name)
+
+
+def _pack_to_cbz(folder: Path, dest: Path) -> None:
+    """
+    Packs all files in `folder` into a CBZ archive at `dest`.
+
+    Images are stored in natural-sort order (so "000.jpg" < "001.webp").
+    ComicInfo.xml is appended last so image indices in the archive match
+    the <Pages> entries written by ComicInfoBuilder.
+    Files are stored without compression (ZIP_STORED) since the source
+    images are already compressed (webp / jpg / png / …).
+    """
+    images = sorted(
+        [f for f in folder.iterdir()
+         if f.is_file() and f.suffix.lower() in _IMAGE_EXTS],
+        key=lambda p: _natural_key(p.name),
+    )
+    extras = [
+        f for f in folder.iterdir()
+        if f.is_file() and f.suffix.lower() not in _IMAGE_EXTS
+    ]
+
+    with zipfile.ZipFile(dest, "w", zipfile.ZIP_STORED) as zf:
+        for f in images:
+            zf.write(f, f.name)
+        for f in extras:
+            zf.write(f, f.name)
+
+
+class SuwayomiMover:
+    """
+    Scans a Suwayomi download directory, generates enriched ComicInfo.xml
+    for each chapter, packs each chapter folder into a CBZ archive, and
+    moves the result to a Kavita library path.
+
+    Parameters
+    ----------
+    suwayomi_path   : Root of Suwayomi downloads.
+                      Expected layout: <root>/<Source>/<Title>/<Chapter N>/
+    kavita_path     : Root of the Kavita library.
+                      Series sub-directories are created automatically.
+    kavita_base_url : Kavita server URL — required only for person sync,
+                      e.g. "http://192.168.2.2:5000".
+    kavita_api_key  : Kavita API key   — required only for person sync.
+    language        : ComicInfo LanguageISO and SeriesSort language ("en").
+    request_timeout : HTTP timeout in seconds for all API / image requests.
+    delete_source   : Remove the source chapter folder after successful pack.
+    """
+
+    def __init__(self,
+                 suwayomi_path,
+                 kavita_path,
+                 *,
+                 kavita_base_url: "str | None" = None,
+                 kavita_api_key: "str | None" = None,
+                 language: str = "en",
+                 request_timeout: int = 30,
+                 delete_source: bool = True):
+        self._src = Path(suwayomi_path)
+        self._dst = Path(kavita_path)
+        self._language = language
+        self._timeout = request_timeout
+        self._delete_source = delete_source
+
+        # Shared HTTP session and resolvers — reused across all series/chapters
+        # to maximise cache hits and minimise API round-trips.
+        session = requests.Session()
+        session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
+        self._session = session
+
+        self._mal = MALResolver(request_timeout=request_timeout)
+        self._vol_resolver = MangaDexVolumeResolver(
+            request_timeout=request_timeout, session=session)
+        self._works_resolver = MangaBakaWorksResolver(
+            request_timeout=request_timeout, session=session)
+
+        self._person_updater: "KavitaPersonUpdater | None" = None
+        if kavita_base_url and kavita_api_key:
+            self._person_updater = KavitaPersonUpdater(
+                kavita_base_url, kavita_api_key,
+                mal_resolver=self._mal,
+                request_timeout=request_timeout)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def process_all(self) -> dict:
+        """
+        Processes every manga series found under the Suwayomi root.
+
+        Walks two directory levels deep:
+          <suwayomi_path>/<Source dir>/<Manga Title>/
+
+        Returns a dict keyed by manga title, each value being the result
+        dict from _process_series_dir.
+        """
+        results: dict = {}
+        for source_dir in sorted(self._src.iterdir()):
+            if not source_dir.is_dir():
+                continue
+            for manga_dir in sorted(source_dir.iterdir()):
+                if not manga_dir.is_dir():
+                    continue
+                title = manga_dir.name
+                print(f"[SuwayomiMover] {title}")
+                results[title] = self._process_series_dir(manga_dir)
+        return results
+
+    def process_series(self, manga_title: str) -> dict:
+        """
+        Processes all chapters for a single series, located by title.
+
+        Searches every source sub-directory under the Suwayomi root for a
+        directory whose name matches `manga_title` exactly.
+        Raises FileNotFoundError if no matching directory is found.
+        """
+        for source_dir in sorted(self._src.iterdir()):
+            if not source_dir.is_dir():
+                continue
+            candidate = source_dir / manga_title
+            if candidate.is_dir():
+                return self._process_series_dir(candidate)
+        raise FileNotFoundError(
+            f"No Suwayomi directory found for '{manga_title}' under {self._src}")
+
+    # ------------------------------------------------------------------
+    # Internal: series
+    # ------------------------------------------------------------------
+    def _process_series_dir(self, manga_dir: Path) -> dict:
+        manga_title = manga_dir.name
+
+        chapter_dirs = sorted(
+            (d for d in manga_dir.iterdir() if d.is_dir()),
+            key=lambda p: _chapter_sort_key(p.name),
+        )
+
+        # Read all chapter XMLs upfront to resolve chapter numbers and series name.
+        chapter_items: list[tuple[Path, dict, str]] = []
+        for chapter_dir in chapter_dirs:
+            fields = _read_suwayomi_fields(chapter_dir)
+            chapter_num = (fields.get("Number")
+                           or _extract_chapter_num(chapter_dir.name))
+            if chapter_num is None:
+                print(f"  [skip] {chapter_dir.name} — no chapter number")
+                continue
+            chapter_items.append((chapter_dir, fields, chapter_num))
+
+        # <Series> from the first chapter's XML → strip source labels → clean title
+        # for the MangaBaka search.  Folder name is the last resort.
+        raw_series = manga_title
+        if chapter_items:
+            xml_series = chapter_items[0][1].get("Series")
+            if xml_series:
+                raw_series = xml_series
+        builder_title = _clean_suwayomi_title(raw_series)
+
+        # One builder per series — metadata fetched once, reused for all chapters.
+        builder = ComicInfoBuilder(
+            builder_title, chapter=1,
+            language=self._language,
+            request_timeout=self._timeout,
+            session=self._session,
+            volume_resolver=self._vol_resolver,
+            works_resolver=self._works_resolver,
+            mal_resolver=self._mal,
+        )
+
+        # Fetch MangaBaka metadata now to get the canonical title and MAL ID.
+        md: "dict | None" = None
+        mangabaka_title = manga_title
+        try:
+            md = builder.fetch_metadata()
+            mangabaka_title = md.get("title") or manga_title
+        except Exception as exc:
+            print(f"  [warn] metadata fetch failed: {exc}")
+
+        # Destination folder uses the MangaBaka canonical title.
+        dest_series = self._dst / mangabaka_title
+        dest_series.mkdir(parents=True, exist_ok=True)
+
+        chapter_results: list[dict] = []
+        for chapter_dir, _fields, chapter_num in chapter_items:
+            result = self._process_chapter(
+                builder, chapter_num, chapter_dir, dest_series)
+            chapter_results.append(result)
+            status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
+            print(f"  Chapter {chapter_num}: {status}")
+
+        # Sync Kavita persons once per series.
+        # MAL ID comes directly from MangaBaka; no extra Jikan title search needed.
+        person_result: "dict | None" = None
+        if self._person_updater:
+            mal_id = (_mal_id_from_metadata(md) if md else None
+                      or self._mal.find_mal_id(builder_title))
+            if mal_id:
+                try:
+                    person_result = self._person_updater.update_for_manga(mal_id)
+                    print(f"  Persons: chars={person_result['characters'].get('updated')} "
+                          f"staff={person_result['staff'].get('updated')}")
+                except Exception as exc:
+                    person_result = {"error": str(exc)}
+                    print(f"  Persons: ERROR {exc}")
+
+        return {"chapters": chapter_results, "persons": person_result}
+
+    # ------------------------------------------------------------------
+    # Internal: chapter
+    # ------------------------------------------------------------------
+    def _process_chapter(self,
+                         builder: ComicInfoBuilder,
+                         chapter_num: str,
+                         chapter_dir: Path,
+                         dest_series: Path) -> dict:
+        """
+        Generates ComicInfo.xml for one chapter, packs it to CBZ, and
+        optionally removes the source folder.
+
+        The cover image is saved as "000.<ext>" so it sorts before the
+        numbered story pages in the archive (ensuring Image=0 in the
+        <Pages> element correctly points to the front cover).
+        """
+        cbz_path = dest_series / f"{chapter_dir.name}.cbz"
+        try:
+            builder.chapter = chapter_num
+            builder.add_pages_from_folder(chapter_dir, cover_filename="000")
+            builder.save_xml(chapter_dir)
+            _pack_to_cbz(chapter_dir, cbz_path)
+            if self._delete_source:
+                shutil.rmtree(chapter_dir)
+            return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True}
+        except Exception as exc:
+            return {"chapter": chapter_num, "cbz": str(cbz_path),
+                    "ok": False, "error": str(exc)}
+
+
+# --------------------------------------------------------------------------
+# Usage example
+# --------------------------------------------------------------------------
+if __name__ == "__main__":
+    SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas"
+    KAVITA_PATH   = r"\\192.168.2.2\root\ServerData\Kavita\test"
+    KAVITA_URL    = "http://192.168.2.2:5000"
+    KAVITA_KEY    = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
+
+    mover = SuwayomiMover(
+        SUWAYOMI_PATH,
+        KAVITA_PATH,
+        kavita_base_url=KAVITA_URL,
+        kavita_api_key=KAVITA_KEY,
+        delete_source=False
+    )
+
+    # Process a single series
+    result = mover.process_series("Yofukashi no Uta")
+    ok     = sum(1 for c in result["chapters"] if c["ok"])
+    failed = sum(1 for c in result["chapters"] if not c["ok"])
+    print(f"\nDone: {ok} ok, {failed} failed")
+    for c in result["chapters"]:
+        if not c["ok"]:
+            print(f"  Chapter {c['chapter']}: {c['error']}")
+
+    # Or process everything at once:
+    # results = mover.process_all()