manga-mover-and-metadata-co…/src/ComicInfoBuilder.py

"""
comicinfo_builder.py
====================

Generates a ComicInfo.xml (compatible with Kavita v0.9.0.2 / ComicInfo v2.1)
from series metadata provided by the MangaBaka API, enriched with data from
MangaDex (volume mapping), MangaBaka works (volume covers / ISBN / dates),
and MyAnimeList / Jikan (statistics and characters).

Dependencies
------------
    requests        (required  -> API calls / cover download)
    Pillow (PIL)    (optional  -> image dimensions for <Page> entries)

        pip install requests pillow

    The modules MangadexVolumeResolver, MangaBakaWorksResolver and
    MALResolver must reside in the same directory.

API address note
----------------
The official MangaBaka API is hosted at  https://api.mangabaka.dev/v1
(domain ".dev", not ".org").  Use the `api_base_url` constructor parameter
to override this if needed.

Data source notes
-----------------
* Volume assignment per chapter is resolved via MangaDex
  (MangaDexVolumeResolver).  Chapters missing from MangaDex are estimated
  from neighbouring volume boundaries and MangaBaka page-count data.
* Volume-specific covers, ISBNs and publication dates come from MangaBaka
  works (MangaBakaWorksResolver).  If no volume is assigned the series
  cover is used instead.
* MAL statistics and character names are fetched via the Jikan API
  (MALResolver).
"""

from __future__ import annotations

import difflib
import re
import xml.etree.ElementTree as ET
from pathlib import Path

import requests

from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver

try:
    from PIL import Image
    _HAS_PIL = True
except ImportError:
    _HAS_PIL = False


# --------------------------------------------------------------------------
# Constants
# --------------------------------------------------------------------------
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}

_AGE_RATING_MAP = {
    "safe": "Everyone",
    "suggestive": "Teen",
    "erotica": "Mature 17+",
    "pornographic": "Adults Only 18+",
}

_TRACKER_URL_TEMPLATES = {
    # Keys are normalised via _normalise_key (alphanumeric only, lowercase),
    # so e.g. the source key "anime_news_network" matches "animenewsnetwork".
    "anilist":          "https://anilist.co/manga/{id}",
    "myanimelist":      "https://myanimelist.net/manga/{id}",
    "mal":              "https://myanimelist.net/manga/{id}",
    "mangaupdates":     "https://www.mangaupdates.com/series.html?id={id}",
    "mangadex":         "https://mangadex.org/title/{id}",
    "kitsu":            "https://kitsu.app/manga/{id}",
    "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
    "ann":              "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
    "animeplanet":      "https://www.anime-planet.com/manga/{id}",
    "shikimori":        "https://shikimori.one/mangas/{id}",
}

# MangaDex relationship types that indicate child works (spin-offs, sequels …)
_CHILD_RELATION_TYPES = {"side_story", "spin_off", "sequel", "prequel",
                         "doujinshi", "adapted_from", "alternative_story",
                         "alternative_version"}


# --------------------------------------------------------------------------
# Module helpers
# --------------------------------------------------------------------------
def _natural_key(name: str):
    return [int(p) if p.isdigit() else p.lower()
            for p in re.split(r"(\d+)", name)]


def _normalise_key(key) -> str:
    return re.sub(r"[^a-z0-9]", "", str(key).lower())


def _format_term(value: str) -> str:
    """Converts a MangaBaka genre slug ('slice_of_life') to display form."""
    return str(value).replace("_", " ").strip().title() if value else ""


# --------------------------------------------------------------------------
# Main class
# --------------------------------------------------------------------------
class ComicInfoBuilder:
    """
    Builds a ComicInfo.xml for a single manga chapter.

    Constructor arguments
    ---------------------
    manga_title : Title of the manga (used for the API search).
    chapter     : Chapter number (int, float, or str — e.g. "10.5").

    Setter behaviour
    ----------------
    * Changing `manga_title` discards both the cached API metadata
      AND the current results (pages / cover).
    * Changing `chapter`     discards only the current results;
      the API metadata is kept.
    """

    def __init__(self, manga_title, chapter, *,
                 api_base_url: str = "https://api.mangabaka.dev/v1",
                 language: str = "en",
                 request_timeout: int = 30,
                 session: "requests.Session | None" = None,
                 volume_resolver: "MangaDexVolumeResolver | None" = None,
                 works_resolver: "MangaBakaWorksResolver | None" = None,
                 mal_resolver: "MALResolver | None" = None):
        if not manga_title or not str(manga_title).strip():
            raise ValueError("manga_title must not be empty.")

        self._manga_title = str(manga_title).strip()
        self._chapter = chapter

        self.api_base_url = api_base_url.rstrip("/")
        self.language = language
        self.request_timeout = request_timeout
        self._session = session or requests.Session()
        self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0")

        self._volume_resolver = (volume_resolver
                                 or MangaDexVolumeResolver(
                                     request_timeout=request_timeout,
                                     session=self._session))
        self._works_resolver = (works_resolver
                                or MangaBakaWorksResolver(
                                    api_base_url=api_base_url,
                                    request_timeout=request_timeout,
                                    session=self._session))
        # MALResolver is a Singleton — it manages its own session and caches.
        self._mal_resolver = mal_resolver or MALResolver(
            request_timeout=request_timeout)

        self._metadata: "dict | None" = None
        self._pages: list[dict] = []
        self._cover_path: "Path | None" = None
        self._suwayomi_data: dict = {}

    # ----- Repr -----------------------------------------------------------
    def __repr__(self) -> str:
        return (f"ComicInfoBuilder(manga_title={self._manga_title!r}, "
                f"chapter={self._chapter!r})")

    # ======================================================================
    # Properties / setters
    # ======================================================================
    @property
    def manga_title(self) -> str:
        return self._manga_title

    @manga_title.setter
    def manga_title(self, value):
        value = str(value).strip()
        if not value:
            raise ValueError("manga_title must not be empty.")
        if value == self._manga_title:
            return
        self._manga_title = value
        self._metadata = None
        self._clear_results()

    @property
    def chapter(self):
        return self._chapter

    @chapter.setter
    def chapter(self, value):
        if value == self._chapter:
            return
        self._chapter = value
        self._clear_results()

    def _clear_results(self) -> None:
        self._pages = []
        self._cover_path = None
        self._suwayomi_data = {}

    # ======================================================================
    # Public XML functions
    # ======================================================================
    def to_xml_string(self, *, pretty: bool = True) -> str:
        """Returns the ComicInfo.xml as a string."""
        tree = self._build_tree()
        if pretty:
            try:
                ET.indent(tree, space="  ")
            except AttributeError:
                pass
        body = ET.tostring(tree.getroot(), encoding="unicode")
        return '<?xml version="1.0" encoding="UTF-8"?>\n' + body

    def save_xml(self, path) -> Path:
        """
        Writes the ComicInfo.xml to `path`.
        If a directory is passed, ComicInfo.xml is created inside it.
        Returns the actual file path used.
        """
        path = Path(path)
        if path.is_dir():
            path = path / "ComicInfo.xml"
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(self.to_xml_string(), encoding="utf-8")
        return path

    # ======================================================================
    # Optional: analyse an image folder
    # ======================================================================
    def add_pages_from_folder(self, folder, *,
                              download_cover: bool = True,
                              cover_filename: str = "cover") -> dict:
        """
        Scans a chapter image folder and populates <Page> entries.
        Reads an existing Suwayomi ComicInfo.xml for supplementary fields.
        Downloads the cover (volume-specific if a volume is found, otherwise
        the series default cover).
        """
        folder = Path(folder)
        if not folder.is_dir():
            raise NotADirectoryError(f"Folder not found: {folder}")

        self._suwayomi_data = self._read_existing_comicinfo(folder)

        self._cover_path = None
        if download_cover:
            self._cover_path = self._download_cover(folder, cover_filename)

        cover_resolved = self._cover_path.resolve() if self._cover_path else None
        story_images: list[Path] = []
        for entry in folder.iterdir():
            if not entry.is_file():
                continue
            if entry.suffix.lower() not in _IMAGE_EXTS:
                continue
            if cover_resolved and entry.resolve() == cover_resolved:
                continue
            story_images.append(entry)
        story_images.sort(key=lambda p: _natural_key(p.name))

        ordered: list[tuple[Path, str]] = []
        if self._cover_path:
            ordered.append((self._cover_path, "FrontCover"))
        ordered.extend((img, "Story") for img in story_images)

        self._pages = []
        for index, (img_path, page_type) in enumerate(ordered):
            width, height = self._image_dimensions(img_path)
            try:
                size = img_path.stat().st_size
            except OSError:
                size = None
            self._pages.append({
                "image": index,
                "type": page_type,
                "width": width,
                "height": height,
                "size": size,
                "double": bool(width and height and width > height),
            })

        return {
            "page_count": len(self._pages),
            "cover": str(self._cover_path) if self._cover_path else None,
            "suwayomi_fields": dict(self._suwayomi_data),
        }

    # ======================================================================
    # Metadata retrieval (MangaBaka API)
    # ======================================================================
    def fetch_metadata(self, *, force: bool = False) -> dict:
        """Fetches (and caches) the series metadata. Pass force=True to refresh."""
        return self._get_metadata(force=force)

    def _get_metadata(self, *, force: bool = False) -> dict:
        if self._metadata is not None and not force:
            return self._metadata

        series = self._search_best_series(self._manga_title)
        if series is None:
            raise RuntimeError(
                f"No series found for '{self._manga_title}' on MangaBaka.")

        if series.get("state") == "merged" and series.get("merged_with"):
            series = self._fetch_series_by_id(series["merged_with"])

        self._metadata = series
        return series

    def _search_best_series(self, title: str):
        """Searches for `title` and returns the best matching series entry."""
        url = f"{self.api_base_url}/series/search"
        resp = self._session.get(
            url, params={"q": title, "page": 1, "limit": 1},
            timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data") or []

        return data[0] # I trust the API's relevance sorting and just take the first result, if any

    def _fetch_series_by_id(self, series_id) -> dict:
        url = f"{self.api_base_url}/series/{series_id}"
        resp = self._session.get(url, timeout=self.request_timeout)
        resp.raise_for_status()
        data = resp.json().get("data")
        if not data:
            raise RuntimeError(f"Series with ID {series_id} not found.")
        return data

    # ======================================================================
    # XML construction
    # ======================================================================
    def _build_tree(self) -> "ET.ElementTree":
        md = self._get_metadata()
        sd = self._suwayomi_data

        volume = self._determine_volume()
        work = self._get_work_for_volume(md, volume) if volume else None

        root = ET.Element("ComicInfo", {
            "xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
            "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
        })

        def add(tag: str, value) -> None:
            if value is None:
                return
            text = str(value).strip()
            if text:
                ET.SubElement(root, tag).text = text

        # ----- Title / Series -----------------------------------------------
        add("Title", sd.get("Title") or f"Chapter {self._chapter}")
        add("Series", md.get("title") or self._manga_title)
        add("LocalizedSeries",
            md.get("native_title") or md.get("romanized_title"))
        add("SeriesSort", self._get_sort_title(md))
        add("Number", sd.get("Number") or self._chapter)
        add("Count", md.get("total_chapters"))
        add("Volume", volume)

        # ----- Description with MAL stats -----------------------------------
        # Prefer the MAL ID from MangaBaka's source map — avoids an extra
        # Jikan title-search request and is more reliable than fuzzy matching.
        mal_id = (self._mal_id_from_source(md)
                  or self._mal_resolver.find_mal_id(
                      md.get("title") or self._manga_title))
        mal_stats = self._mal_resolver.get_stats(mal_id)
        add("Summary", self._build_summary(md, sd, mal_stats))

        # ----- Release date -------------------------------------------------
        # Volume publication date takes precedence over the chapter date.
        vol_year, vol_month, vol_day = self._parse_work_date(work)
        add("Year",  vol_year  or sd.get("Year")  or md.get("year"))
        add("Month", vol_month or sd.get("Month"))
        add("Day",   vol_day   or sd.get("Day"))

        # ----- Contributors -------------------------------------------------
        add("Writer",     ", ".join(md.get("authors") or []))
        add("Penciller",  ", ".join(md.get("artists") or []))
        add("Translator", sd.get("Translator"))

        # ----- Publisher ----------------------------------------------------
        eng_pub  = self._publishers_by_type(md, "English")
        orig_pub = self._publishers_by_type(md, "Original")
        add("Publisher", eng_pub or orig_pub)
        if eng_pub and orig_pub:
            add("Imprint", orig_pub)

        # ----- Genres / Tags ------------------------------------------------
        # Genres come back as lowercase snake_case ("slice_of_life"); convert
        # to display form ("Slice Of Life") so Kavita / readers show them
        # consistently with the (already-titled-cased) Tags field.
        add("Genre", ", ".join(_format_term(g) for g in (md.get("genres") or [])))
        add("Tags",  ", ".join(md.get("tags") or []))

        # ----- Characters from MAL ------------------------------------------
        characters = self._mal_resolver.get_characters(mal_id)
        add("Characters", ", ".join(characters) if characters else None)

        # ----- Web links ----------------------------------------------------
        add("Web", " ".join(self._collect_web_links(md, sd)))

        # ----- Miscellaneous ------------------------------------------------
        add("LanguageISO", self.language)
        add("Manga",       self._manga_flag(md))
        add("AgeRating",   _AGE_RATING_MAP.get(md.get("content_rating"), "Unknown"))

        if md.get("rating") is not None:
            try:
                # MangaBaka rating is on a 0..100 scale  ->  ComicInfo
                # CommunityRating uses 0..5.
                add("CommunityRating", round(float(md["rating"]) / 20, 1))
            except (TypeError, ValueError):
                pass

        # ----- ISBN (GTIN) from volume work ---------------------------------
        identifiers = (work or {}).get("identifiers") or []
        isbn = identifiers[0].get("id") if identifiers else None
        add("GTIN", isbn)

        # ----- SeriesGroup from related works -------------------------------
        add("SeriesGroup", self._determine_series_group(md))

        # ----- Alternate title notes ----------------------------------------
        add("Notes", self._build_notes(md))

        # ----- Pages --------------------------------------------------------
        if self._pages:
            add("PageCount", len(self._pages))
            pages_el = ET.SubElement(root, "Pages")
            for page in self._pages:
                attrs = {"Image": str(page["image"]), "Type": page["type"]}
                if page.get("size") is not None:
                    attrs["ImageSize"] = str(page["size"])
                if page.get("width"):
                    attrs["ImageWidth"] = str(page["width"])
                if page.get("height"):
                    attrs["ImageHeight"] = str(page["height"])
                if page.get("double"):
                    attrs["DoublePage"] = "true"
                ET.SubElement(pages_el, "Page", attrs)

        return ET.ElementTree(root)

    # ======================================================================
    # Volume determination
    # ======================================================================
    def _determine_volume(self) -> "str | None":
        """
        Resolves the volume for the current chapter via MangaDex.
        Falls back to estimation when the chapter is absent from MangaDex.
        Returns None if no volume can be determined.
        """
        md = self._get_metadata()
        try:
            manga_id = self._mangadex_id_from_source(md)
            if not manga_id:
                manga_id = self._volume_resolver.find_manga_id(
                    md.get("native_title") or self._manga_title)
            if not manga_id:
                return None

            series_id = str(md.get("id") or "")
            page_counts = {}
            if series_id:
                page_counts = self._works_resolver.get_page_counts(series_id)

            return self._volume_resolver.volume_for_chapter(
                manga_id, self._chapter,
                volume_page_counts=page_counts or None)
        except Exception:
            return None

    def _get_work_for_volume(self, md: dict,
                             volume: "str | None") -> "dict | None":
        """Returns the MangaBaka work dict for the current volume, or None."""
        if not volume:
            return None
        series_id = str(md.get("id") or "")
        if not series_id:
            return None
        try:
            return self._works_resolver.get_work_for_volume(series_id, volume)
        except Exception:
            return None

    # ======================================================================
    # Cover download
    # ======================================================================
    def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
        """
        Downloads the cover for the current chapter/volume.

        If a volume is known and a volume-specific cover exists in MangaBaka
        works, that cover is used.  Otherwise the series default cover is
        downloaded (raw variant preferred).
        """
        md = self._get_metadata()
        volume = self._determine_volume()
        cover_url: "str | None" = None

        if volume:
            series_id = str(md.get("id") or "")
            if series_id:
                try:
                    cover_url = self._works_resolver.get_cover_for_volume(
                        series_id, volume)
                except Exception:
                    pass

        if not cover_url:
            cover_url = _pick_cover_url(md.get("cover"))

        if not cover_url:
            return None

        try:
            resp = self._session.get(cover_url, timeout=self.request_timeout)
            resp.raise_for_status()
        except requests.RequestException:
            return None

        ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
        target = folder / f"{cover_filename}{ext}"
        target.write_bytes(resp.content)
        return target

    # ======================================================================
    # Series group
    # ======================================================================
    def _determine_series_group(self, md: dict) -> "str | None":
        """
        Determines the SeriesGroup value from MangaDex relationships.

        - If the series has a `main_story` parent -> use that title.
        - If the series itself has child works (spin-offs, sequels …)
          -> use the series own title so all related works are grouped.
        - Otherwise -> None (no SeriesGroup).
        """
        manga_id = self._mangadex_id_from_source(md)
        if not manga_id:
            return None
        try:
            relations = self._volume_resolver.get_series_relations(manga_id)
        except Exception:
            return None

        if not relations:
            return None

        main_stories = relations.get("main_story") or []
        if main_stories:
            return main_stories[0]

        if any(t in relations for t in _CHILD_RELATION_TYPES):
            return md.get("title") or self._manga_title

        return None

    # ======================================================================
    # Title helpers
    # ======================================================================
    def _get_sort_title(self, md: dict) -> "str | None":
        """
        Returns the SeriesSort title in the configured language.
        Looks for an alt-title with matching language code first;
        falls back to the primary title.
        """
        lang = self.language.lower()
        alt_titles = self._collect_alt_titles(md)
        if lang in alt_titles:
            return alt_titles[lang]
        # For 'en' the primary MangaBaka title is usually already English
        return md.get("title") or self._manga_title

    def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
        """
        Returns {lang_code: title} for EN, DE, JP kanji and JP romaji.

        MangaBaka stores alt-titles in the `titles` list, where each entry is
        a dict {language, title, traits, is_primary, note}.
        Important caveats observed against the real API:
          * `romanized_title` is the romanization of whatever the series'
            native script is — for a Japanese manga with a Korean licence it
            can hold the Korean romanization, NOT the Japanese romaji.
            Always prefer `titles[language="ja-Latn"]` for romaji instead.
          * `native_title` holds the kanji form for Japanese manga, but
            `titles[language="ja", traits contains "native"]` is more
            reliable when present.
          * Each language can have several entries; primary + official
            traits win over generic ones.
        """
        titles = md.get("titles") or md.get("alt_titles") or []

        def pick(language_codes: tuple, prefer_trait: "str | None" = None
                 ) -> "str | None":
            """Picks the best title entry for any of the given language codes."""
            if not isinstance(titles, list):
                return None
            best_score = -1
            best_title: "str | None" = None
            for entry in titles:
                if not isinstance(entry, dict):
                    continue
                lang = (entry.get("language") or entry.get("lang") or "").lower()
                if lang not in language_codes:
                    continue
                title = entry.get("title")
                if not title:
                    continue
                traits = entry.get("traits") or []
                score = 0
                if prefer_trait and prefer_trait in traits:
                    score += 4
                if "official" in traits:
                    score += 2
                if entry.get("is_primary"):
                    score += 1
                if score > best_score:
                    best_score, best_title = score, title
            return best_title

        result: dict[str, str] = {}

        # JP kanji (prefer entry with "native" trait, fall back to native_title)
        kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
        if kanji:
            result["jp"] = kanji

        # JP romaji — explicitly from "ja-Latn" entries.  Do NOT fall back to
        # `romanized_title` blindly; that field can hold a non-Japanese
        # romanization (e.g. Korean) for the same series.
        romaji = pick(("ja-latn", "ja-romaji"))
        if not romaji:
            # Heuristic fallback only when romanized_title looks Latin
            rt = md.get("romanized_title") or ""
            if rt and all(ord(c) < 128 for c in rt):
                romaji = rt
        if romaji:
            result["romaji"] = romaji

        # English (prefer official + primary)
        en = pick(("en",))
        if not en:
            en = md.get("title") if md.get("title") else None
        if en:
            result["en"] = en

        # German
        de = pick(("de",))
        if de:
            result["de"] = de

        return result

    # ======================================================================
    # Summary / notes
    # ======================================================================
    def _build_summary(self, md: dict, sd: dict,
                       mal_stats: "dict | None") -> "str | None":
        """
        Builds the <Summary> content.
        Appends a MAL statistics table (if available) after the description.
        """
        desc = (md.get("description") or sd.get("Summary") or "").strip()

        if not mal_stats:
            return desc or None

        as_of   = mal_stats.get("as_of", "")
        score   = mal_stats.get("score")
        rank    = mal_stats.get("rank")
        scored  = mal_stats.get("scored_by")
        pop     = mal_stats.get("popularity")
        members = mal_stats.get("members")
        favs    = mal_stats.get("favorites")
        url     = mal_stats.get("url", "")

        rows: list[str] = []
        if score   is not None: rows.append(f"Score\t{score}")
        if rank    is not None: rows.append(f"Ranked\t#{rank}")
        if scored  is not None: rows.append(f"Scored by\t{scored:,} users")
        if pop     is not None: rows.append(f"Popularity\t#{pop}")
        if members is not None: rows.append(f"Members\t{members:,}")
        if favs    is not None: rows.append(f"Favorites\t{favs:,}")

        if not rows:
            return desc or None

        table = f"[MyAnimeList]({url}) stats as of {as_of}:\n" + "\n".join(rows)
        return f"{desc}\n\n{table}" if desc else table

    def _build_notes(self, md: dict) -> "str | None":
        """
        Builds the <Notes> field containing alternate titles and the
        MangaBaka metadata source URL.
        """
        parts: list[str] = []

        alt = self._collect_alt_titles(md)
        if alt:
            label_map = {"en": "EN", "de": "DE",
                         "romaji": "Romaji", "jp": "JP (kanji)"}
            lines = []
            for code in ("en", "de", "romaji", "jp"):
                if code in alt:
                    lines.append(f"• {label_map[code]}: {alt[code]}")
            if lines:
                parts.append("Alternate titles:\n" + "\n".join(lines))

        series_id = str(md.get("id") or "")
        if series_id:
            parts.append(f"Metadata source: https://mangabaka.org/{series_id}")

        return "\n\n".join(parts) if parts else None

    # ======================================================================
    # Static helpers
    # ======================================================================
    @staticmethod
    def _parse_work_date(work: "dict | None") -> tuple:
        """Returns (year, month, day) strings from a MangaBaka work dict."""
        if not work:
            return (None, None, None)
        raw = (work.get("release_date") or work.get("publication_date") or "")
        if not raw:
            return (None, None, None)
        parts = str(raw).split("-")
        year  = parts[0] if len(parts) > 0 and parts[0] else None
        month = parts[1] if len(parts) > 1 and parts[1] else None
        day   = parts[2] if len(parts) > 2 and parts[2] else None
        return (year, month, day)

    @staticmethod
    def _mangadex_id_from_source(md: dict) -> "str | None":
        for raw_key, info in (md.get("source") or {}).items():
            if _normalise_key(raw_key) in ("mangadex", "mangadexorg", "md"):
                if isinstance(info, dict) and info.get("id") is not None:
                    return str(info["id"])
        return None

    @staticmethod
    def _mal_id_from_source(md: dict) -> "int | None":
        for raw_key, info in (md.get("source") or {}).items():
            if _normalise_key(raw_key) in ("myanimelist", "mal"):
                if isinstance(info, dict):
                    mid = info.get("id")
                    if mid is not None:
                        try:
                            return int(mid)
                        except (TypeError, ValueError):
                            pass
        return None

    @staticmethod
    def _publishers_by_type(md: dict, ptype: str) -> "str | None":
        names = [p.get("name") for p in (md.get("publishers") or [])
                 if p.get("type") == ptype and p.get("name")]
        return ", ".join(names) if names else None

    @staticmethod
    def _manga_flag(md: dict) -> str:
        mtype = (md.get("type") or "").lower()
        if mtype == "manga":
            return "YesAndRightToLeft"
        if mtype in ("manhwa", "manhua", "oel"):
            return "Yes"
        return "Unknown"

    def _collect_web_links(self, md: dict, sd: dict) -> list[str]:
        links: list[str] = []

        links.extend(l for l in (md.get("links") or []) if l)

        for raw_key, info in (md.get("source") or {}).items():
            template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
            if not template or not isinstance(info, dict):
                continue
            source_id = info.get("id")
            if source_id is not None:
                links.append(template.format(id=source_id))

        if sd.get("Web"):
            links.extend(str(sd["Web"]).split())

        seen: set[str] = set()
        unique: list[str] = []
        for link in links:
            if link not in seen:
                seen.add(link)
                unique.append(link)
        return unique

    @staticmethod
    def _read_existing_comicinfo(folder: Path) -> dict:
        xml_path = folder / "ComicInfo.xml"
        if not xml_path.is_file():
            return {}
        try:
            root = ET.parse(xml_path).getroot()
        except ET.ParseError:
            return {}

        wanted = {"Title", "Series", "Number", "Summary", "Writer",
                  "Penciller", "Translator", "Genre", "Web",
                  "Year", "Month", "Day"}
        data: dict = {}
        for child in root:
            tag = child.tag.split("}")[-1]
            if tag in wanted and child.text and child.text.strip():
                data[tag] = child.text.strip()
        return data

    @staticmethod
    def _image_dimensions(path: Path):
        if not _HAS_PIL:
            return (None, None)
        try:
            with Image.open(path) as im:
                return im.size
        except Exception:
            return (None, None)


# --------------------------------------------------------------------------
# Module-level helpers (shared with MangaBakaWorksResolver logic)
# --------------------------------------------------------------------------
def _pick_cover_url(cover) -> "str | None":
    """
    Selects the best cover URL from a MangaBaka cover object.

    Real API shape (from `GET /v1/series/{id}` and `/works`):
        {
          "raw":  {"url": "...", "size": ..., "height": ..., "width": ...},
          "x150": {"x1": "...", "x2": "...", "x3": "..."},
          "x250": {"x1": "...", "x2": "...", "x3": "..."},
          "x350": {"x1": "...", "x2": "...", "x3": "..."}
        }

    Order of preference: raw original  >  x350@x3  >  x250@x3  >  x150@x3
    (falling through to lower densities and sizes as needed).
    """
    if not cover:
        return None
    if isinstance(cover, str):
        return cover
    if not isinstance(cover, dict):
        return None

    # 1) Preferred: the unscaled "raw" image
    raw = cover.get("raw")
    if isinstance(raw, dict):
        url = raw.get("url")
        if isinstance(url, str) and url:
            return url
    elif isinstance(raw, str) and raw:
        return raw

    # 2) Fallback: size-keyed variants, largest first, highest density first
    for size_key in ("x350", "x250", "x150"):
        variant = cover.get(size_key)
        if isinstance(variant, dict):
            for density in ("x3", "x2", "x1"):
                url = variant.get(density)
                if isinstance(url, str) and url:
                    return url
        elif isinstance(variant, str) and variant:
            return variant

    # 3) Last-ditch fallback: any http URL anywhere in the structure
    for val in cover.values():
        if isinstance(val, str) and val.startswith("http"):
            return val
        if isinstance(val, dict):
            for sub in val.values():
                if isinstance(sub, str) and sub.startswith("http"):
                    return sub
    return None


def _guess_extension(url: str, content_type: str) -> str:
    url_ext = Path(url.split("?")[0]).suffix.lower()
    if url_ext in _IMAGE_EXTS:
        return url_ext
    ct = (content_type or "").lower()
    if "png"  in ct: return ".png"
    if "webp" in ct: return ".webp"
    if "gif"  in ct: return ".gif"
    return ".jpg"


# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
    builder = ComicInfoBuilder("Yofukashi no Uta", 66)

    builder.add_pages_from_folder(
        r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)"
        r"\Yofukashi no Uta\Official_Chapter 66")
    builder.save_xml(
        r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)"
        r"\Yofukashi no Uta\Official_Chapter 66\ComicInfo.xml")

    # Setter behaviour:
    # builder.chapter = 2          # only results discarded, metadata is kept
    # builder.manga_title = "X"    # metadata + results discarded