""" comicinfo_builder.py ==================== Generates a ComicInfo.xml (compatible with Kavita v0.9.0.2 / ComicInfo v2.1) from series metadata provided by the MangaBaka API, enriched with data from MangaDex (volume mapping), MangaBaka works (volume covers / ISBN / dates), and MyAnimeList / Jikan (statistics and characters). Dependencies ------------ requests (required -> API calls / cover download) Pillow (PIL) (optional -> image dimensions for entries) pip install requests pillow The modules MangadexVolumeResolver, MangaBakaWorksResolver and MALResolver must reside in the same directory. API address note ---------------- The official MangaBaka API is hosted at https://api.mangabaka.dev/v1 (domain ".dev", not ".org"). Use the `api_base_url` constructor parameter to override this if needed. Data source notes ----------------- * Volume assignment per chapter is resolved via MangaDex (MangaDexVolumeResolver). Chapters missing from MangaDex are estimated from neighbouring volume boundaries and MangaBaka page-count data. * Volume-specific covers, ISBNs and publication dates come from MangaBaka works (MangaBakaWorksResolver). If no volume is assigned the series cover is used instead. * MAL statistics and character names are fetched via the Jikan API (MALResolver). """ from __future__ import annotations import difflib import re import xml.etree.ElementTree as ET from pathlib import Path import requests from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver try: from PIL import Image _HAS_PIL = True except ImportError: _HAS_PIL = False # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} _AGE_RATING_MAP = { "safe": "Everyone", "suggestive": "Teen", "erotica": "Mature 17+", "pornographic": "Adults Only 18+", } _TRACKER_URL_TEMPLATES = { # Keys are normalised via _normalise_key (alphanumeric only, lowercase), # so e.g. the source key "anime_news_network" matches "animenewsnetwork". "anilist": "https://anilist.co/manga/{id}", "myanimelist": "https://myanimelist.net/manga/{id}", "mal": "https://myanimelist.net/manga/{id}", "mangaupdates": "https://www.mangaupdates.com/series.html?id={id}", "mangadex": "https://mangadex.org/title/{id}", "kitsu": "https://kitsu.app/manga/{id}", "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", "ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", "animeplanet": "https://www.anime-planet.com/manga/{id}", "shikimori": "https://shikimori.one/mangas/{id}", } # MangaDex relationship types that indicate child works (spin-offs, sequels …) _CHILD_RELATION_TYPES = {"side_story", "spin_off", "sequel", "prequel", "doujinshi", "adapted_from", "alternative_story", "alternative_version"} # -------------------------------------------------------------------------- # Module helpers # -------------------------------------------------------------------------- def _natural_key(name: str): return [int(p) if p.isdigit() else p.lower() for p in re.split(r"(\d+)", name)] def _normalise_key(key) -> str: return re.sub(r"[^a-z0-9]", "", str(key).lower()) def _format_term(value: str) -> str: """Converts a MangaBaka genre slug ('slice_of_life') to display form.""" return str(value).replace("_", " ").strip().title() if value else "" # -------------------------------------------------------------------------- # Main class # -------------------------------------------------------------------------- class ComicInfoBuilder: """ Builds a ComicInfo.xml for a single manga chapter. Constructor arguments --------------------- manga_title : Title of the manga (used for the API search). chapter : Chapter number (int, float, or str — e.g. "10.5"). Setter behaviour ---------------- * Changing `manga_title` discards both the cached API metadata AND the current results (pages / cover). * Changing `chapter` discards only the current results; the API metadata is kept. """ def __init__(self, manga_title, chapter, *, api_base_url: str = "https://api.mangabaka.dev/v1", language: str = "en", request_timeout: int = 30, session: "requests.Session | None" = None, volume_resolver: "MangaDexVolumeResolver | None" = None, works_resolver: "MangaBakaWorksResolver | None" = None, mal_resolver: "MALResolver | None" = None): if not manga_title or not str(manga_title).strip(): raise ValueError("manga_title must not be empty.") self._manga_title = str(manga_title).strip() self._chapter = chapter self.api_base_url = api_base_url.rstrip("/") self.language = language self.request_timeout = request_timeout self._session = session or requests.Session() self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0") self._volume_resolver = (volume_resolver or MangaDexVolumeResolver( request_timeout=request_timeout, session=self._session)) self._works_resolver = (works_resolver or MangaBakaWorksResolver( api_base_url=api_base_url, request_timeout=request_timeout, session=self._session)) # MALResolver is a Singleton — it manages its own session and caches. self._mal_resolver = mal_resolver or MALResolver( request_timeout=request_timeout) self._metadata: "dict | None" = None self._pages: list[dict] = [] self._cover_path: "Path | None" = None self._suwayomi_data: dict = {} # ----- Repr ----------------------------------------------------------- def __repr__(self) -> str: return (f"ComicInfoBuilder(manga_title={self._manga_title!r}, " f"chapter={self._chapter!r})") # ====================================================================== # Properties / setters # ====================================================================== @property def manga_title(self) -> str: return self._manga_title @manga_title.setter def manga_title(self, value): value = str(value).strip() if not value: raise ValueError("manga_title must not be empty.") if value == self._manga_title: return self._manga_title = value self._metadata = None self._clear_results() @property def chapter(self): return self._chapter @chapter.setter def chapter(self, value): if value == self._chapter: return self._chapter = value self._clear_results() def _clear_results(self) -> None: self._pages = [] self._cover_path = None self._suwayomi_data = {} # ====================================================================== # Public XML functions # ====================================================================== def to_xml_string(self, *, pretty: bool = True) -> str: """Returns the ComicInfo.xml as a string.""" tree = self._build_tree() if pretty: try: ET.indent(tree, space=" ") except AttributeError: pass body = ET.tostring(tree.getroot(), encoding="unicode") return '\n' + body def save_xml(self, path) -> Path: """ Writes the ComicInfo.xml to `path`. If a directory is passed, ComicInfo.xml is created inside it. Returns the actual file path used. """ path = Path(path) if path.is_dir(): path = path / "ComicInfo.xml" path.parent.mkdir(parents=True, exist_ok=True) path.write_text(self.to_xml_string(), encoding="utf-8") return path # ====================================================================== # Optional: analyse an image folder # ====================================================================== def add_pages_from_folder(self, folder, *, download_cover: bool = True, cover_filename: str = "cover") -> dict: """ Scans a chapter image folder and populates entries. Reads an existing Suwayomi ComicInfo.xml for supplementary fields. Downloads the cover (volume-specific if a volume is found, otherwise the series default cover). """ folder = Path(folder) if not folder.is_dir(): raise NotADirectoryError(f"Folder not found: {folder}") self._suwayomi_data = self._read_existing_comicinfo(folder) self._cover_path = None if download_cover: self._cover_path = self._download_cover(folder, cover_filename) cover_resolved = self._cover_path.resolve() if self._cover_path else None story_images: list[Path] = [] for entry in folder.iterdir(): if not entry.is_file(): continue if entry.suffix.lower() not in _IMAGE_EXTS: continue if cover_resolved and entry.resolve() == cover_resolved: continue story_images.append(entry) story_images.sort(key=lambda p: _natural_key(p.name)) ordered: list[tuple[Path, str]] = [] if self._cover_path: ordered.append((self._cover_path, "FrontCover")) ordered.extend((img, "Story") for img in story_images) self._pages = [] for index, (img_path, page_type) in enumerate(ordered): width, height = self._image_dimensions(img_path) try: size = img_path.stat().st_size except OSError: size = None self._pages.append({ "image": index, "type": page_type, "width": width, "height": height, "size": size, "double": bool(width and height and width > height), }) return { "page_count": len(self._pages), "cover": str(self._cover_path) if self._cover_path else None, "suwayomi_fields": dict(self._suwayomi_data), } # ====================================================================== # Metadata retrieval (MangaBaka API) # ====================================================================== def fetch_metadata(self, *, force: bool = False) -> dict: """Fetches (and caches) the series metadata. Pass force=True to refresh.""" return self._get_metadata(force=force) def _get_metadata(self, *, force: bool = False) -> dict: if self._metadata is not None and not force: return self._metadata series = self._search_best_series(self._manga_title) if series is None: raise RuntimeError( f"No series found for '{self._manga_title}' on MangaBaka.") if series.get("state") == "merged" and series.get("merged_with"): series = self._fetch_series_by_id(series["merged_with"]) self._metadata = series return series def _search_best_series(self, title: str): """Searches for `title` and returns the best matching series entry.""" url = f"{self.api_base_url}/series/search" resp = self._session.get( url, params={"q": title, "page": 1, "limit": 1}, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") or [] return data[0] # I trust the API's relevance sorting and just take the first result, if any def _fetch_series_by_id(self, series_id) -> dict: url = f"{self.api_base_url}/series/{series_id}" resp = self._session.get(url, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") if not data: raise RuntimeError(f"Series with ID {series_id} not found.") return data # ====================================================================== # XML construction # ====================================================================== def _build_tree(self) -> "ET.ElementTree": md = self._get_metadata() sd = self._suwayomi_data volume = self._determine_volume() work = self._get_work_for_volume(md, volume) if volume else None root = ET.Element("ComicInfo", { "xmlns:xsd": "http://www.w3.org/2001/XMLSchema", "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", }) def add(tag: str, value) -> None: if value is None: return text = str(value).strip() if text: ET.SubElement(root, tag).text = text # ----- Title / Series ----------------------------------------------- add("Title", sd.get("Title") or f"Chapter {self._chapter}") add("Series", md.get("title") or self._manga_title) add("LocalizedSeries", md.get("native_title") or md.get("romanized_title")) add("SeriesSort", self._get_sort_title(md)) add("Number", sd.get("Number") or self._chapter) add("Count", md.get("total_chapters")) add("Volume", volume) # ----- Description with MAL stats ----------------------------------- # Prefer the MAL ID from MangaBaka's source map — avoids an extra # Jikan title-search request and is more reliable than fuzzy matching. mal_id = (self._mal_id_from_source(md) or self._mal_resolver.find_mal_id( md.get("title") or self._manga_title)) mal_stats = self._mal_resolver.get_stats(mal_id) add("Summary", self._build_summary(md, sd, mal_stats)) # ----- Release date ------------------------------------------------- # Volume publication date takes precedence over the chapter date. vol_year, vol_month, vol_day = self._parse_work_date(work) add("Year", vol_year or sd.get("Year") or md.get("year")) add("Month", vol_month or sd.get("Month")) add("Day", vol_day or sd.get("Day")) # ----- Contributors ------------------------------------------------- add("Writer", ", ".join(md.get("authors") or [])) add("Penciller", ", ".join(md.get("artists") or [])) add("Translator", sd.get("Translator")) # ----- Publisher ---------------------------------------------------- eng_pub = self._publishers_by_type(md, "English") orig_pub = self._publishers_by_type(md, "Original") add("Publisher", eng_pub or orig_pub) if eng_pub and orig_pub: add("Imprint", orig_pub) # ----- Genres / Tags ------------------------------------------------ # Genres come back as lowercase snake_case ("slice_of_life"); convert # to display form ("Slice Of Life") so Kavita / readers show them # consistently with the (already-titled-cased) Tags field. add("Genre", ", ".join(_format_term(g) for g in (md.get("genres") or []))) add("Tags", ", ".join(md.get("tags") or [])) # ----- Characters from MAL ------------------------------------------ characters = self._mal_resolver.get_characters(mal_id) add("Characters", ", ".join(characters) if characters else None) # ----- Web links ---------------------------------------------------- add("Web", " ".join(self._collect_web_links(md, sd))) # ----- Miscellaneous ------------------------------------------------ add("LanguageISO", self.language) add("Manga", self._manga_flag(md)) add("AgeRating", _AGE_RATING_MAP.get(md.get("content_rating"), "Unknown")) if md.get("rating") is not None: try: # MangaBaka rating is on a 0..100 scale -> ComicInfo # CommunityRating uses 0..5. add("CommunityRating", round(float(md["rating"]) / 20, 1)) except (TypeError, ValueError): pass # ----- ISBN (GTIN) from volume work --------------------------------- identifiers = (work or {}).get("identifiers") or [] isbn = identifiers[0].get("id") if identifiers else None add("GTIN", isbn) # ----- SeriesGroup from related works ------------------------------- add("SeriesGroup", self._determine_series_group(md)) # ----- Alternate title notes ---------------------------------------- add("Notes", self._build_notes(md)) # ----- Pages -------------------------------------------------------- if self._pages: add("PageCount", len(self._pages)) pages_el = ET.SubElement(root, "Pages") for page in self._pages: attrs = {"Image": str(page["image"]), "Type": page["type"]} if page.get("size") is not None: attrs["ImageSize"] = str(page["size"]) if page.get("width"): attrs["ImageWidth"] = str(page["width"]) if page.get("height"): attrs["ImageHeight"] = str(page["height"]) if page.get("double"): attrs["DoublePage"] = "true" ET.SubElement(pages_el, "Page", attrs) return ET.ElementTree(root) # ====================================================================== # Volume determination # ====================================================================== def _determine_volume(self) -> "str | None": """ Resolves the volume for the current chapter via MangaDex. Falls back to estimation when the chapter is absent from MangaDex. Returns None if no volume can be determined. """ md = self._get_metadata() try: manga_id = self._mangadex_id_from_source(md) if not manga_id: manga_id = self._volume_resolver.find_manga_id( md.get("native_title") or self._manga_title) if not manga_id: return None series_id = str(md.get("id") or "") page_counts = {} if series_id: page_counts = self._works_resolver.get_page_counts(series_id) return self._volume_resolver.volume_for_chapter( manga_id, self._chapter, volume_page_counts=page_counts or None) except Exception: return None def _get_work_for_volume(self, md: dict, volume: "str | None") -> "dict | None": """Returns the MangaBaka work dict for the current volume, or None.""" if not volume: return None series_id = str(md.get("id") or "") if not series_id: return None try: return self._works_resolver.get_work_for_volume(series_id, volume) except Exception: return None # ====================================================================== # Cover download # ====================================================================== def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None": """ Downloads the cover for the current chapter/volume. If a volume is known and a volume-specific cover exists in MangaBaka works, that cover is used. Otherwise the series default cover is downloaded (raw variant preferred). """ md = self._get_metadata() volume = self._determine_volume() cover_url: "str | None" = None if volume: series_id = str(md.get("id") or "") if series_id: try: cover_url = self._works_resolver.get_cover_for_volume( series_id, volume) except Exception: pass if not cover_url: cover_url = _pick_cover_url(md.get("cover")) if not cover_url: return None try: resp = self._session.get(cover_url, timeout=self.request_timeout) resp.raise_for_status() except requests.RequestException: return None ext = _guess_extension(cover_url, resp.headers.get("Content-Type", "")) target = folder / f"{cover_filename}{ext}" target.write_bytes(resp.content) return target # ====================================================================== # Series group # ====================================================================== def _determine_series_group(self, md: dict) -> "str | None": """ Determines the SeriesGroup value from MangaDex relationships. - If the series has a `main_story` parent -> use that title. - If the series itself has child works (spin-offs, sequels …) -> use the series own title so all related works are grouped. - Otherwise -> None (no SeriesGroup). """ manga_id = self._mangadex_id_from_source(md) if not manga_id: return None try: relations = self._volume_resolver.get_series_relations(manga_id) except Exception: return None if not relations: return None main_stories = relations.get("main_story") or [] if main_stories: return main_stories[0] if any(t in relations for t in _CHILD_RELATION_TYPES): return md.get("title") or self._manga_title return None # ====================================================================== # Title helpers # ====================================================================== def _get_sort_title(self, md: dict) -> "str | None": """ Returns the SeriesSort title in the configured language. Looks for an alt-title with matching language code first; falls back to the primary title. """ lang = self.language.lower() alt_titles = self._collect_alt_titles(md) if lang in alt_titles: return alt_titles[lang] # For 'en' the primary MangaBaka title is usually already English return md.get("title") or self._manga_title def _collect_alt_titles(self, md: dict) -> "dict[str, str]": """ Returns {lang_code: title} for EN, DE, JP kanji and JP romaji. MangaBaka stores alt-titles in the `titles` list, where each entry is a dict {language, title, traits, is_primary, note}. Important caveats observed against the real API: * `romanized_title` is the romanization of whatever the series' native script is — for a Japanese manga with a Korean licence it can hold the Korean romanization, NOT the Japanese romaji. Always prefer `titles[language="ja-Latn"]` for romaji instead. * `native_title` holds the kanji form for Japanese manga, but `titles[language="ja", traits contains "native"]` is more reliable when present. * Each language can have several entries; primary + official traits win over generic ones. """ titles = md.get("titles") or md.get("alt_titles") or [] def pick(language_codes: tuple, prefer_trait: "str | None" = None ) -> "str | None": """Picks the best title entry for any of the given language codes.""" if not isinstance(titles, list): return None best_score = -1 best_title: "str | None" = None for entry in titles: if not isinstance(entry, dict): continue lang = (entry.get("language") or entry.get("lang") or "").lower() if lang not in language_codes: continue title = entry.get("title") if not title: continue traits = entry.get("traits") or [] score = 0 if prefer_trait and prefer_trait in traits: score += 4 if "official" in traits: score += 2 if entry.get("is_primary"): score += 1 if score > best_score: best_score, best_title = score, title return best_title result: dict[str, str] = {} # JP kanji (prefer entry with "native" trait, fall back to native_title) kanji = pick(("ja",), prefer_trait="native") or md.get("native_title") if kanji: result["jp"] = kanji # JP romaji — explicitly from "ja-Latn" entries. Do NOT fall back to # `romanized_title` blindly; that field can hold a non-Japanese # romanization (e.g. Korean) for the same series. romaji = pick(("ja-latn", "ja-romaji")) if not romaji: # Heuristic fallback only when romanized_title looks Latin rt = md.get("romanized_title") or "" if rt and all(ord(c) < 128 for c in rt): romaji = rt if romaji: result["romaji"] = romaji # English (prefer official + primary) en = pick(("en",)) if not en: en = md.get("title") if md.get("title") else None if en: result["en"] = en # German de = pick(("de",)) if de: result["de"] = de return result # ====================================================================== # Summary / notes # ====================================================================== def _build_summary(self, md: dict, sd: dict, mal_stats: "dict | None") -> "str | None": """ Builds the content. Appends a MAL statistics table (if available) after the description. """ desc = (md.get("description") or sd.get("Summary") or "").strip() if not mal_stats: return desc or None as_of = mal_stats.get("as_of", "") score = mal_stats.get("score") rank = mal_stats.get("rank") scored = mal_stats.get("scored_by") pop = mal_stats.get("popularity") members = mal_stats.get("members") favs = mal_stats.get("favorites") url = mal_stats.get("url", "") rows: list[str] = [] if score is not None: rows.append(f"Score\t{score}") if rank is not None: rows.append(f"Ranked\t#{rank}") if scored is not None: rows.append(f"Scored by\t{scored:,} users") if pop is not None: rows.append(f"Popularity\t#{pop}") if members is not None: rows.append(f"Members\t{members:,}") if favs is not None: rows.append(f"Favorites\t{favs:,}") if not rows: return desc or None table = f"[MyAnimeList]({url}) stats as of {as_of}:\n" + "\n".join(rows) return f"{desc}\n\n{table}" if desc else table def _build_notes(self, md: dict) -> "str | None": """ Builds the field containing alternate titles and the MangaBaka metadata source URL. """ parts: list[str] = [] alt = self._collect_alt_titles(md) if alt: label_map = {"en": "EN", "de": "DE", "romaji": "Romaji", "jp": "JP (kanji)"} lines = [] for code in ("en", "de", "romaji", "jp"): if code in alt: lines.append(f"• {label_map[code]}: {alt[code]}") if lines: parts.append("Alternate titles:\n" + "\n".join(lines)) series_id = str(md.get("id") or "") if series_id: parts.append(f"Metadata source: https://mangabaka.org/{series_id}") return "\n\n".join(parts) if parts else None # ====================================================================== # Static helpers # ====================================================================== @staticmethod def _parse_work_date(work: "dict | None") -> tuple: """Returns (year, month, day) strings from a MangaBaka work dict.""" if not work: return (None, None, None) raw = (work.get("release_date") or work.get("publication_date") or "") if not raw: return (None, None, None) parts = str(raw).split("-") year = parts[0] if len(parts) > 0 and parts[0] else None month = parts[1] if len(parts) > 1 and parts[1] else None day = parts[2] if len(parts) > 2 and parts[2] else None return (year, month, day) @staticmethod def _mangadex_id_from_source(md: dict) -> "str | None": for raw_key, info in (md.get("source") or {}).items(): if _normalise_key(raw_key) in ("mangadex", "mangadexorg", "md"): if isinstance(info, dict) and info.get("id") is not None: return str(info["id"]) return None @staticmethod def _mal_id_from_source(md: dict) -> "int | None": for raw_key, info in (md.get("source") or {}).items(): if _normalise_key(raw_key) in ("myanimelist", "mal"): if isinstance(info, dict): mid = info.get("id") if mid is not None: try: return int(mid) except (TypeError, ValueError): pass return None @staticmethod def _publishers_by_type(md: dict, ptype: str) -> "str | None": names = [p.get("name") for p in (md.get("publishers") or []) if p.get("type") == ptype and p.get("name")] return ", ".join(names) if names else None @staticmethod def _manga_flag(md: dict) -> str: mtype = (md.get("type") or "").lower() if mtype == "manga": return "YesAndRightToLeft" if mtype in ("manhwa", "manhua", "oel"): return "Yes" return "Unknown" def _collect_web_links(self, md: dict, sd: dict) -> list[str]: links: list[str] = [] links.extend(l for l in (md.get("links") or []) if l) for raw_key, info in (md.get("source") or {}).items(): template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key)) if not template or not isinstance(info, dict): continue source_id = info.get("id") if source_id is not None: links.append(template.format(id=source_id)) if sd.get("Web"): links.extend(str(sd["Web"]).split()) seen: set[str] = set() unique: list[str] = [] for link in links: if link not in seen: seen.add(link) unique.append(link) return unique @staticmethod def _read_existing_comicinfo(folder: Path) -> dict: xml_path = folder / "ComicInfo.xml" if not xml_path.is_file(): return {} try: root = ET.parse(xml_path).getroot() except ET.ParseError: return {} wanted = {"Title", "Series", "Number", "Summary", "Writer", "Penciller", "Translator", "Genre", "Web", "Year", "Month", "Day"} data: dict = {} for child in root: tag = child.tag.split("}")[-1] if tag in wanted and child.text and child.text.strip(): data[tag] = child.text.strip() return data @staticmethod def _image_dimensions(path: Path): if not _HAS_PIL: return (None, None) try: with Image.open(path) as im: return im.size except Exception: return (None, None) # -------------------------------------------------------------------------- # Module-level helpers (shared with MangaBakaWorksResolver logic) # -------------------------------------------------------------------------- def _pick_cover_url(cover) -> "str | None": """ Selects the best cover URL from a MangaBaka cover object. Real API shape (from `GET /v1/series/{id}` and `/works`): { "raw": {"url": "...", "size": ..., "height": ..., "width": ...}, "x150": {"x1": "...", "x2": "...", "x3": "..."}, "x250": {"x1": "...", "x2": "...", "x3": "..."}, "x350": {"x1": "...", "x2": "...", "x3": "..."} } Order of preference: raw original > x350@x3 > x250@x3 > x150@x3 (falling through to lower densities and sizes as needed). """ if not cover: return None if isinstance(cover, str): return cover if not isinstance(cover, dict): return None # 1) Preferred: the unscaled "raw" image raw = cover.get("raw") if isinstance(raw, dict): url = raw.get("url") if isinstance(url, str) and url: return url elif isinstance(raw, str) and raw: return raw # 2) Fallback: size-keyed variants, largest first, highest density first for size_key in ("x350", "x250", "x150"): variant = cover.get(size_key) if isinstance(variant, dict): for density in ("x3", "x2", "x1"): url = variant.get(density) if isinstance(url, str) and url: return url elif isinstance(variant, str) and variant: return variant # 3) Last-ditch fallback: any http URL anywhere in the structure for val in cover.values(): if isinstance(val, str) and val.startswith("http"): return val if isinstance(val, dict): for sub in val.values(): if isinstance(sub, str) and sub.startswith("http"): return sub return None def _guess_extension(url: str, content_type: str) -> str: url_ext = Path(url.split("?")[0]).suffix.lower() if url_ext in _IMAGE_EXTS: return url_ext ct = (content_type or "").lower() if "png" in ct: return ".png" if "webp" in ct: return ".webp" if "gif" in ct: return ".gif" return ".jpg" # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": builder = ComicInfoBuilder("Yofukashi no Uta", 66) builder.add_pages_from_folder( r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" r"\Yofukashi no Uta\Official_Chapter 66") builder.save_xml( r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" r"\Yofukashi no Uta\Official_Chapter 66\ComicInfo.xml") # Setter behaviour: # builder.chapter = 2 # only results discarded, metadata is kept # builder.manga_title = "X" # metadata + results discarded