""" comicinfo_builder.py ==================== Generates a ComicInfo.xml (compatible with Kavita v0.9.0.2 / ComicInfo v2.1) from series metadata provided by the MangaBaka API, enriched with data from MangaDex (volume mapping), MangaBaka works (volume covers / ISBN / dates), and MyAnimeList / Jikan (statistics and characters). Dependencies ------------ requests (required -> API calls / cover download) Pillow (PIL) (optional -> image dimensions for entries) pip install requests pillow The modules MangadexVolumeResolver, MangaBakaWorksResolver and MALResolver must reside in the same directory. API address note ---------------- The official MangaBaka API is hosted at https://api.mangabaka.dev/v1 (domain ".dev", not ".org"). Use the `api_base_url` constructor parameter to override this if needed. Data source notes ----------------- * Volume assignment per chapter is resolved via MangaDex (MangaDexVolumeResolver). Chapters missing from MangaDex are estimated from neighbouring volume boundaries and MangaBaka page-count data. * Volume-specific covers, ISBNs and publication dates come from MangaBaka works (MangaBakaWorksResolver). If no volume is assigned the series cover is used instead. * MAL statistics and character names are fetched via the Jikan API (MALResolver). """ from __future__ import annotations import difflib import re import xml.etree.ElementTree as ET from pathlib import Path import requests from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver from MatchesCache import MatchesCache from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit try: from PIL import Image _HAS_PIL = True except ImportError: _HAS_PIL = False # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} # Series types accepted by the MangaBaka search endpoint. Light/web novels # are filtered out because this pipeline only handles image-based manga. # Passed to `requests` as a list so each value becomes its own `&type=...` # query parameter (MangaBaka's API expects repeated keys, not a CSV list). _SEARCH_TYPES = ["manga", "manhwa", "manhua"] _AGE_RATING_MAP = { "safe": "Everyone", "suggestive": "Teen", "erotica": "Mature 17+", "pornographic": "Adults Only 18+", } _TRACKER_URL_TEMPLATES = { # Keys are normalised via _normalise_key (alphanumeric only, lowercase), # so e.g. the source key "anime_news_network" matches "animenewsnetwork". "anilist": "https://anilist.co/manga/{id}", "myanimelist": "https://myanimelist.net/manga/{id}", "mal": "https://myanimelist.net/manga/{id}", "mangaupdates": "https://www.mangaupdates.com/series.html?id={id}", "mangadex": "https://mangadex.org/title/{id}", "kitsu": "https://kitsu.app/manga/{id}", "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", "ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", "animeplanet": "https://www.anime-planet.com/manga/{id}", "shikimori": "https://shikimori.one/mangas/{id}", } # MangaDex relationship types that indicate child works (spin-offs, sequels …) _CHILD_RELATION_TYPES = {"side_story", "spin_off", "sequel", "prequel", "doujinshi", "adapted_from", "alternative_story", "alternative_version"} # -------------------------------------------------------------------------- # Module helpers # -------------------------------------------------------------------------- def _natural_key(name: str): return [int(p) if p.isdigit() else p.lower() for p in re.split(r"(\d+)", name)] def _normalise_key(key) -> str: return re.sub(r"[^a-z0-9]", "", str(key).lower()) def _format_term(value: str) -> str: """Converts a MangaBaka genre slug ('slice_of_life') to display form.""" return str(value).replace("_", " ").strip().title() if value else "" # Markdown backslash escape sequences recognised by CommonMark (e.g. \- → -) _MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])') def _md_to_html(text: str) -> str: """ Converts a subset of Markdown (as produced by MangaBaka) to HTML. Handles: backslash escapes, [text](url) links, **bold**, *italic*, blank-line paragraph splits, and single-newline line breaks. Produces compact HTML with no raw newline characters — Kavita renders every bare \\n as a
, so all line-breaks must be explicit. """ if not text: return "" # Unescape Markdown backslash sequences (\- → -, \* → *, …) text = _MD_ESCAPE_RE.sub(r'\1', text) # [text](url) → text text = re.sub( r'\[([^\]]+)\]\(([^)]+)\)', lambda m: f'{m.group(1)}', text, ) # **bold** before *italic* so ** is not mistaken for two * text = re.sub(r'\*\*(.+?)\*\*', r'\1', text, flags=re.DOTALL) text = re.sub(r'\*(.+?)\*', r'\1', text, flags=re.DOTALL) # Split on blank lines →

blocks; single newlines →
parts: list[str] = [] for para in re.split(r'\n{2,}', text.strip()): para = para.strip() if para: parts.append(f"

{para.replace(chr(10), '
')}

") return "".join(parts) # no raw \n — every \n becomes a
in Kavita # -------------------------------------------------------------------------- # Main class # -------------------------------------------------------------------------- class ComicInfoBuilder: """ Builds a ComicInfo.xml for a single manga chapter. Constructor arguments --------------------- manga_title : Title of the manga (used for the API search). chapter : Chapter number (int, float, or str — e.g. "10.5"). Setter behaviour ---------------- * Changing `manga_title` discards both the cached API metadata AND the current results (pages / cover). * Changing `chapter` discards only the current results; the API metadata is kept. """ def __init__(self, manga_title, chapter, *, api_base_url: str = "https://api.mangabaka.dev/v1", language: str = "en", request_timeout: int = 30, session: "requests.Session | None" = None, volume_resolver: "MangaDexVolumeResolver | None" = None, works_resolver: "MangaBakaWorksResolver | None" = None, mal_resolver: "MALResolver | None" = None, al_resolver: "AniListResolver | None" = None, matches_cache: "MatchesCache | None" = None): if not manga_title or not str(manga_title).strip(): raise ValueError("manga_title must not be empty.") self._manga_title = str(manga_title).strip() self._chapter = chapter self.api_base_url = api_base_url.rstrip("/") self.language = language self.request_timeout = request_timeout self._session = session or requests.Session() self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0") # Throttle every call to api.mangabaka.dev (idempotent — safe even # when the session was already prepared by a parent class). _apply_mangabaka_rate_limit(self._session) self._volume_resolver = (volume_resolver or MangaDexVolumeResolver( request_timeout=request_timeout, session=self._session)) self._works_resolver = (works_resolver or MangaBakaWorksResolver( api_base_url=api_base_url, request_timeout=request_timeout, session=self._session)) # Both resolvers are Singletons — they manage their own sessions/caches. self._mal_resolver = mal_resolver or MALResolver( request_timeout=request_timeout) self._al_resolver = al_resolver or AniListResolver( request_timeout=request_timeout) self._matches_cache = matches_cache self._metadata: "dict | None" = None self._pages: list[dict] = [] self._cover_path: "Path | None" = None self._suwayomi_data: dict = {} # ----- Repr ----------------------------------------------------------- def __repr__(self) -> str: return (f"ComicInfoBuilder(manga_title={self._manga_title!r}, " f"chapter={self._chapter!r})") # ====================================================================== # Properties / setters # ====================================================================== @property def manga_title(self) -> str: return self._manga_title @manga_title.setter def manga_title(self, value): value = str(value).strip() if not value: raise ValueError("manga_title must not be empty.") if value == self._manga_title: return self._manga_title = value self._metadata = None self._clear_results() @property def chapter(self): return self._chapter @chapter.setter def chapter(self, value): if value == self._chapter: return self._chapter = value self._clear_results() def _clear_results(self) -> None: self._pages = [] self._cover_path = None self._suwayomi_data = {} # ====================================================================== # Public XML functions # ====================================================================== def to_xml_string(self, *, pretty: bool = True) -> str: """Returns the ComicInfo.xml as a string.""" tree = self._build_tree() if pretty: try: ET.indent(tree, space=" ") except AttributeError: pass body = ET.tostring(tree.getroot(), encoding="unicode") return '\n' + body def save_xml(self, path) -> Path: """ Writes the ComicInfo.xml to `path`. If a directory is passed, ComicInfo.xml is created inside it. Returns the actual file path used. """ path = Path(path) if path.is_dir(): path = path / "ComicInfo.xml" path.parent.mkdir(parents=True, exist_ok=True) path.write_text(self.to_xml_string(), encoding="utf-8") return path # ====================================================================== # Optional: analyse an image folder # ====================================================================== def add_pages_from_folder(self, folder, *, download_cover: bool = True, cover_filename: str = "cover") -> dict: """ Scans a chapter image folder and populates entries. Reads an existing Suwayomi ComicInfo.xml for supplementary fields. Downloads the cover (volume-specific if a volume is found, otherwise the series default cover). """ folder = Path(folder) if not folder.is_dir(): raise NotADirectoryError(f"Folder not found: {folder}") self._suwayomi_data = self._read_existing_comicinfo(folder) self._cover_path = None if download_cover: self._cover_path = self._download_cover(folder, cover_filename) cover_resolved = self._cover_path.resolve() if self._cover_path else None story_images: list[Path] = [] for entry in folder.iterdir(): if not entry.is_file(): continue if entry.suffix.lower() not in _IMAGE_EXTS: continue if cover_resolved and entry.resolve() == cover_resolved: continue story_images.append(entry) story_images.sort(key=lambda p: _natural_key(p.name)) ordered: list[tuple[Path, str]] = [] if self._cover_path: ordered.append((self._cover_path, "FrontCover")) ordered.extend((img, "Story") for img in story_images) self._pages = [] for index, (img_path, page_type) in enumerate(ordered): width, height = self._image_dimensions(img_path) try: size = img_path.stat().st_size except OSError: size = None self._pages.append({ "image": index, "type": page_type, "width": width, "height": height, "size": size, "double": bool(width and height and width > height), }) return { "page_count": len(self._pages), "cover": str(self._cover_path) if self._cover_path else None, "suwayomi_fields": dict(self._suwayomi_data), } # ====================================================================== # Metadata retrieval (MangaBaka API) # ====================================================================== def fetch_metadata(self, *, force: bool = False) -> dict: """Fetches (and caches) the series metadata. Pass force=True to refresh.""" return self._get_metadata(force=force) def _get_metadata(self, *, force: bool = False) -> dict: if self._metadata is not None and not force: return self._metadata series = self._search_best_series(self._manga_title) if series is None: raise RuntimeError( f"No series found for '{self._manga_title}' on MangaBaka.") if series.get("state") == "merged" and series.get("merged_with"): series = self._fetch_series_by_id(series["merged_with"]) self._metadata = series return series def _search_best_series(self, title: str): """ Resolves `title` to a MangaBaka series. Lookup order: 1. matches.json cache (if attached) — uses the stored series ID to fetch the full series, skipping the search step entirely. 2. Fresh MangaBaka search — top hit. The match is persisted to matches.json before being returned so it survives a crash. """ if self._matches_cache is not None: cached = self._matches_cache.get(title) if cached and cached.get("mangabakaId"): try: return self._fetch_series_by_id(cached["mangabakaId"]) except Exception as exc: print(f"[ComicInfoBuilder] cached id " f"{cached['mangabakaId']} for {title!r} failed " f"({exc}); falling back to fresh search", flush=True) url = f"{self.api_base_url}/series/search" resp = self._session.get( url, params={"q": title, "type": _SEARCH_TYPES, "page": 1, "limit": 1}, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") or [] series = data[0] if data else None if series and self._matches_cache is not None: self._matches_cache.add( title, mangabaka_id=series.get("id"), mangabaka_name=series.get("title") or "", image_url=_pick_cover_url(series.get("cover")), ) return series def _fetch_series_by_id(self, series_id) -> dict: url = f"{self.api_base_url}/series/{series_id}" resp = self._session.get(url, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") if not data: raise RuntimeError(f"Series with ID {series_id} not found.") return data # ====================================================================== # XML construction # ====================================================================== def _build_tree(self) -> "ET.ElementTree": md = self._get_metadata() sd = self._suwayomi_data volume = self._determine_volume() work = self._get_work_for_volume(md, volume) if volume else None root = ET.Element("ComicInfo", { "xmlns:xsd": "http://www.w3.org/2001/XMLSchema", "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", }) def add(tag: str, value) -> None: if value is None: return text = str(value).strip() if text: ET.SubElement(root, tag).text = text # ----- Title / Series ----------------------------------------------- add("Title", sd.get("Title") or f"Chapter {self._chapter}") add("Series", md.get("title") or self._manga_title) add("LocalizedSeries", md.get("native_title") or md.get("romanized_title")) add("SeriesSort", self._get_sort_title(md)) add("Number", sd.get("Number") or self._chapter) add("Count", md.get("total_chapters")) add("Volume", volume) # ----- Description with MAL stats ----------------------------------- # Prefer the MAL ID from MangaBaka's source map — avoids an extra # Jikan title-search request and is more reliable than fuzzy matching. mal_id = (self._mal_id_from_source(md) or self._mal_resolver.find_mal_id( md.get("title") or self._manga_title)) al_id = self._al_id_from_source(md) mal_stats = self._mal_resolver.get_stats(mal_id) add("Summary", self._build_summary(md, sd, mal_stats)) # ----- Release date ------------------------------------------------- # Volume publication date takes precedence over the chapter date. vol_year, vol_month, vol_day = self._parse_work_date(work) add("Year", vol_year or sd.get("Year") or md.get("year")) add("Month", vol_month or sd.get("Month")) add("Day", vol_day or sd.get("Day")) # ----- Contributors ------------------------------------------------- add("Writer", ", ".join(md.get("authors") or [])) add("Penciller", ", ".join(md.get("artists") or [])) add("Translator", sd.get("Translator")) # ----- Publisher ---------------------------------------------------- eng_pub = self._publishers_by_type(md, "English") orig_pub = self._publishers_by_type(md, "Original") add("Publisher", eng_pub or orig_pub) if eng_pub and orig_pub: add("Imprint", orig_pub) # ----- Genres / Tags ------------------------------------------------ # Genres come back as lowercase snake_case ("slice_of_life"); convert # to display form ("Slice Of Life") so Kavita / readers show them # consistently with the (already-titled-cased) Tags field. add("Genre", ", ".join(_format_term(g) for g in (md.get("genres") or []))) add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or []))) # ----- Characters — MAL first, AniList fallback --------------------- characters = self._mal_resolver.get_characters(mal_id) if not characters and al_id: characters = self._al_resolver.get_characters(al_id) add("Characters", ", ".join(characters) if characters else None) # ----- Web links ---------------------------------------------------- add("Web", " ".join(self._collect_web_links(md, sd))) # ----- Miscellaneous ------------------------------------------------ add("LanguageISO", self.language) add("Manga", self._manga_flag(md)) add("AgeRating", _AGE_RATING_MAP.get(md.get("content_rating"), "Unknown")) if md.get("rating") is not None: try: # MangaBaka rating is on a 0..100 scale -> ComicInfo # CommunityRating uses 0..5. add("CommunityRating", round(float(md["rating"]) / 20, 1)) except (TypeError, ValueError): pass # ----- ISBN (GTIN) from volume work --------------------------------- identifiers = (work or {}).get("identifiers") or [] isbn = identifiers[0].get("id") if identifiers else None add("GTIN", isbn) # ----- SeriesGroup from related works ------------------------------- add("SeriesGroup", self._determine_series_group(md)) # ----- Alternate title notes ---------------------------------------- add("Notes", self._build_notes(md)) # ----- Pages -------------------------------------------------------- if self._pages: add("PageCount", len(self._pages)) pages_el = ET.SubElement(root, "Pages") for page in self._pages: attrs = {"Image": str(page["image"]), "Type": page["type"]} if page.get("size") is not None: attrs["ImageSize"] = str(page["size"]) if page.get("width"): attrs["ImageWidth"] = str(page["width"]) if page.get("height"): attrs["ImageHeight"] = str(page["height"]) if page.get("double"): attrs["DoublePage"] = "true" ET.SubElement(pages_el, "Page", attrs) return ET.ElementTree(root) # ====================================================================== # Volume determination # ====================================================================== def _determine_volume(self) -> "str | None": """ Resolves the volume for the current chapter via MangaDex. Falls back to estimation when the chapter is absent from MangaDex. Returns None if no volume can be determined. """ md = self._get_metadata() try: manga_id = self._mangadex_id_from_source(md) if not manga_id: manga_id = self._volume_resolver.find_manga_id( md.get("native_title") or self._manga_title) if not manga_id: return None series_id = str(md.get("id") or "") page_counts = {} if series_id: page_counts = self._works_resolver.get_page_counts(series_id) return self._volume_resolver.volume_for_chapter( manga_id, self._chapter, volume_page_counts=page_counts or None) except Exception: return None def _get_work_for_volume(self, md: dict, volume: "str | None") -> "dict | None": """Returns the MangaBaka work dict for the current volume, or None.""" if not volume: return None series_id = str(md.get("id") or "") if not series_id: return None try: return self._works_resolver.get_work_for_volume(series_id, volume) except Exception: return None # ====================================================================== # Cover download # ====================================================================== def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None": """ Downloads the cover for the current chapter/volume. If a volume is known and a volume-specific cover exists in MangaBaka works, that cover is used. Otherwise the series default cover is downloaded (raw variant preferred). """ md = self._get_metadata() volume = self._determine_volume() cover_url: "str | None" = None if volume: series_id = str(md.get("id") or "") if series_id: try: cover_url = self._works_resolver.get_cover_for_volume( series_id, volume) except Exception: pass if not cover_url: cover_url = _pick_cover_url(md.get("cover")) if not cover_url: return None try: resp = self._session.get(cover_url, timeout=self.request_timeout) resp.raise_for_status() except requests.RequestException: return None ext = _guess_extension(cover_url, resp.headers.get("Content-Type", "")) target = folder / f"{cover_filename}{ext}" target.write_bytes(resp.content) return target # ====================================================================== # Series group # ====================================================================== def _determine_series_group(self, md: dict) -> "str | None": """ Determines SeriesGroup from MangaBaka's relationships_v2 field. - If the series has a 'parent' relationship entry → fetch the parent series and return its MangaBaka title (so arcs/sequels appear under the root series in Kavita). - Otherwise → return the series' own title (it is the root, or a standalone series with no parent). """ for rel in (md.get("relationships_v2") or []): if rel.get("relation_type") == "parent": parent_id = rel.get("to_series_id") if parent_id is not None: try: parent_md = self._fetch_series_by_id(parent_id) parent_title = parent_md.get("title") if parent_title: return parent_title except Exception: pass break return md.get("title") or self._manga_title # ====================================================================== # Title helpers # ====================================================================== def _get_sort_title(self, md: dict) -> "str | None": """ Returns the SeriesSort title in the configured language. Looks for an alt-title with matching language code first; falls back to the primary title. """ lang = self.language.lower() alt_titles = self._collect_alt_titles(md) if lang in alt_titles: return alt_titles[lang] # For 'en' the primary MangaBaka title is usually already English return md.get("title") or self._manga_title def _collect_alt_titles(self, md: dict) -> "dict[str, str]": """ Returns {lang_code: title} for EN, DE, JP kanji and JP romaji. MangaBaka stores alt-titles in the `titles` list, where each entry is a dict {language, title, traits, is_primary, note}. Important caveats observed against the real API: * `romanized_title` is the romanization of whatever the series' native script is — for a Japanese manga with a Korean licence it can hold the Korean romanization, NOT the Japanese romaji. Always prefer `titles[language="ja-Latn"]` for romaji instead. * `native_title` holds the kanji form for Japanese manga, but `titles[language="ja", traits contains "native"]` is more reliable when present. * Each language can have several entries; primary + official traits win over generic ones. """ titles = md.get("titles") or md.get("alt_titles") or [] def pick(language_codes: tuple, prefer_trait: "str | None" = None ) -> "str | None": """Picks the best title entry for any of the given language codes.""" if not isinstance(titles, list): return None best_score = -1 best_title: "str | None" = None for entry in titles: if not isinstance(entry, dict): continue lang = (entry.get("language") or entry.get("lang") or "").lower() if lang not in language_codes: continue title = entry.get("title") if not title: continue traits = entry.get("traits") or [] score = 0 if prefer_trait and prefer_trait in traits: score += 4 if "official" in traits: score += 2 if entry.get("is_primary"): score += 1 if score > best_score: best_score, best_title = score, title return best_title result: dict[str, str] = {} # JP kanji (prefer entry with "native" trait, fall back to native_title) kanji = pick(("ja",), prefer_trait="native") or md.get("native_title") if kanji: result["jp"] = kanji # JP romaji — explicitly from "ja-Latn" entries. Do NOT fall back to # `romanized_title` blindly; that field can hold a non-Japanese # romanization (e.g. Korean) for the same series. romaji = pick(("ja-latn", "ja-romaji")) if not romaji: # Heuristic fallback only when romanized_title looks Latin rt = md.get("romanized_title") or "" if rt and all(ord(c) < 128 for c in rt): romaji = rt if romaji: result["romaji"] = romaji # English (prefer official + primary) en = pick(("en",)) if not en: en = md.get("title") if md.get("title") else None if en: result["en"] = en # German de = pick(("de",)) if de: result["de"] = de return result def _collect_all_alt_titles(self, md: dict) -> "dict[str, list[str]]": """ Returns all known title variants grouped by language/script. Groups collected (skipped when empty): "en" – English (language = "en") "de" – German (language = "de") "ja" – Japanese native kanji (language = "ja") "ja-romaji" – Japanese romanized (language = "ja-Latn" / "ja-romaji") "ko" – Korean native (language = "ko") "ko-romaji" – Korean romanized (language = "ko-Latn" / "ko-romaji") "zh" – Chinese native (language = "zh" / "zh-hk" / "zh-tw" / …) "zh-romaji" – Chinese romanized (language = "zh-Latn") All variants are included (not just primary), preserving API order. Duplicates within a group are removed. """ _GROUPS: "dict[str, tuple]" = { "en": ("en",), "de": ("de",), "ja": ("ja",), "ja-romaji": ("ja-latn", "ja-romaji"), "ko": ("ko",), "ko-romaji": ("ko-latn", "ko-romaji"), "zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"), "zh-romaji": ("zh-latn",), } # Pre-build a flat lang → group mapping for O(1) lookup lang_to_group: "dict[str, str]" = { lang: group for group, langs in _GROUPS.items() for lang in langs } result: "dict[str, list[str]]" = {} seen: "dict[str, set[str]]" = {} for entry in (md.get("titles") or md.get("alt_titles") or []): if not isinstance(entry, dict): continue lang = (entry.get("language") or entry.get("lang") or "").lower() group = lang_to_group.get(lang) if not group: continue title = (entry.get("title") or "").strip() if not title: continue if group not in result: result[group] = [] seen[group] = set() if title not in seen[group]: result[group].append(title) seen[group].add(title) return result # ====================================================================== # Summary / notes # ====================================================================== def _build_summary(self, md: dict, sd: dict, mal_stats: "dict | None") -> "str | None": """ Builds as HTML (Kavita supports HTML in this field). Structure (top → bottom): 1. MAL statistics — HTML link + table with padded columns 2. Series description — Markdown converted to HTML 3. Alternate titles — HTML table """ # Inline style applied to label cells for readable column spacing. _TD = 'style="padding-right:1.5em"' parts: list[str] = [] # 1. MAL stats table (top) ---------------------------------------- if mal_stats: url = mal_stats.get("url", "") as_of = mal_stats.get("as_of", "") score = mal_stats.get("score") rank = mal_stats.get("rank") scored = mal_stats.get("scored_by") pop = mal_stats.get("popularity") members = mal_stats.get("members") favs = mal_stats.get("favorites") rows: list[str] = [] if score is not None: rows.append(f"Score{score}") if rank is not None: rows.append(f"Ranked#{rank}") if scored is not None: rows.append(f"Scored by{scored:,} users") if pop is not None: rows.append(f"Popularity#{pop}") if members is not None: rows.append(f"Members{members:,}") if favs is not None: rows.append(f"Favorites{favs:,}") if rows: link = f'MyAnimeList' if url else "MyAnimeList" parts.append(f"

{link} stats as of {as_of}:

{''.join(rows)}
") # 2. Description — Markdown → HTML (middle) ----------------------- desc_raw = (md.get("description") or sd.get("Summary") or "").strip() if desc_raw: parts.append(_md_to_html(desc_raw)) # 3. Alternate titles table (bottom) — all variants per language ------ all_alt = self._collect_all_alt_titles(md) if all_alt: label_map = { "en": "EN", "de": "DE", "ja": "JA", "ja-romaji": "JA Romaji", "ko": "KO", "ko-romaji": "KO Romaji", "zh": "ZH", "zh-romaji": "ZH Romaji", } alt_rows: list[str] = [] for group in ("en", "de", "ja", "ja-romaji", "ko", "ko-romaji", "zh", "zh-romaji"): titles = all_alt.get(group) if not titles: continue label = label_map[group] cell = "
".join(titles) alt_rows.append(f"{label}{cell}") if alt_rows: parts.append(f"{''.join(alt_rows)}
") return "
".join(parts) if parts else None def _build_notes(self, md: dict) -> "str | None": """Builds the field with the MangaBaka metadata source URL.""" series_id = str(md.get("id") or "") return f"Metadata source: https://mangabaka.org/{series_id}" if series_id else None # ====================================================================== # Static helpers # ====================================================================== @staticmethod def _parse_work_date(work: "dict | None") -> tuple: """Returns (year, month, day) strings from a MangaBaka work dict.""" if not work: return (None, None, None) raw = (work.get("release_date") or work.get("publication_date") or "") if not raw: return (None, None, None) parts = str(raw).split("-") year = parts[0] if len(parts) > 0 and parts[0] else None month = parts[1] if len(parts) > 1 and parts[1] else None day = parts[2] if len(parts) > 2 and parts[2] else None return (year, month, day) @staticmethod def _mangadex_id_from_source(md: dict) -> "str | None": for raw_key, info in (md.get("source") or {}).items(): if _normalise_key(raw_key) in ("mangadex", "mangadexorg", "md"): if isinstance(info, dict) and info.get("id") is not None: return str(info["id"]) return None @staticmethod def _mal_id_from_source(md: dict) -> "int | None": for raw_key, info in (md.get("source") or {}).items(): if _normalise_key(raw_key) in ("myanimelist", "mal"): if isinstance(info, dict): mid = info.get("id") if mid is not None: try: return int(mid) except (TypeError, ValueError): pass return None @staticmethod def _al_id_from_source(md: dict) -> "int | None": for raw_key, info in (md.get("source") or {}).items(): if _normalise_key(raw_key) == "anilist": if isinstance(info, dict): mid = info.get("id") if mid is not None: try: return int(mid) except (TypeError, ValueError): pass return None @staticmethod def _publishers_by_type(md: dict, ptype: str) -> "str | None": names = [p.get("name") for p in (md.get("publishers") or []) if p.get("type") == ptype and p.get("name")] return ", ".join(names) if names else None @staticmethod def _manga_flag(md: dict) -> str: mtype = (md.get("type") or "").lower() if mtype == "manga": return "YesAndRightToLeft" if mtype in ("manhwa", "manhua", "oel"): return "Yes" return "Unknown" def _collect_web_links(self, md: dict, sd: dict) -> list[str]: links: list[str] = [] links.extend(l for l in (md.get("links") or []) if l) for raw_key, info in (md.get("source") or {}).items(): template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key)) if not template or not isinstance(info, dict): continue source_id = info.get("id") if source_id is not None: links.append(template.format(id=source_id)) if sd.get("Web"): links.extend(str(sd["Web"]).split()) seen: set[str] = set() unique: list[str] = [] for link in links: if link not in seen: seen.add(link) unique.append(link) return unique @staticmethod def _read_existing_comicinfo(folder: Path) -> dict: xml_path = folder / "ComicInfo.xml" if not xml_path.is_file(): return {} try: root = ET.parse(xml_path).getroot() except ET.ParseError: return {} wanted = {"Title", "Series", "Number", "Summary", "Writer", "Penciller", "Translator", "Genre", "Web", "Year", "Month", "Day"} data: dict = {} for child in root: tag = child.tag.split("}")[-1] if tag in wanted and child.text and child.text.strip(): data[tag] = child.text.strip() return data @staticmethod def _image_dimensions(path: Path): if not _HAS_PIL: return (None, None) try: with Image.open(path) as im: return im.size except Exception: return (None, None) # -------------------------------------------------------------------------- # Module-level helpers (shared with MangaBakaWorksResolver logic) # -------------------------------------------------------------------------- def _pick_cover_url(cover) -> "str | None": """ Selects the best cover URL from a MangaBaka cover object. Real API shape (from `GET /v1/series/{id}` and `/works`): { "raw": {"url": "...", "size": ..., "height": ..., "width": ...}, "x150": {"x1": "...", "x2": "...", "x3": "..."}, "x250": {"x1": "...", "x2": "...", "x3": "..."}, "x350": {"x1": "...", "x2": "...", "x3": "..."} } Order of preference: raw original > x350@x3 > x250@x3 > x150@x3 (falling through to lower densities and sizes as needed). """ if not cover: return None if isinstance(cover, str): return cover if not isinstance(cover, dict): return None # 1) Preferred: the unscaled "raw" image raw = cover.get("raw") if isinstance(raw, dict): url = raw.get("url") if isinstance(url, str) and url: return url elif isinstance(raw, str) and raw: return raw # 2) Fallback: size-keyed variants, largest first, highest density first for size_key in ("x350", "x250", "x150"): variant = cover.get(size_key) if isinstance(variant, dict): for density in ("x3", "x2", "x1"): url = variant.get(density) if isinstance(url, str) and url: return url elif isinstance(variant, str) and variant: return variant # 3) Last-ditch fallback: any http URL anywhere in the structure for val in cover.values(): if isinstance(val, str) and val.startswith("http"): return val if isinstance(val, dict): for sub in val.values(): if isinstance(sub, str) and sub.startswith("http"): return sub return None def _guess_extension(url: str, content_type: str) -> str: url_ext = Path(url.split("?")[0]).suffix.lower() if url_ext in _IMAGE_EXTS: return url_ext ct = (content_type or "").lower() if "png" in ct: return ".png" if "webp" in ct: return ".webp" if "gif" in ct: return ".gif" return ".jpg" # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": builder = ComicInfoBuilder("Yofukashi no Uta", 66) builder.add_pages_from_folder( r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" r"\Yofukashi no Uta\Official_Chapter 66") builder.save_xml( r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" r"\Yofukashi no Uta\Official_Chapter 66\ComicInfo.xml") # Setter behaviour: # builder.chapter = 2 # only results discarded, metadata is kept # builder.manga_title = "X" # metadata + results discarded