commit d5817e908a7245a9bfeb992bba0914534429a1c2 Author: JohannesBOT Date: Fri May 22 22:42:04 2026 +0200 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76c8a67 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# General +.DS_Store +.idea +.vscode/ + +# Node modules (all subprojects) +__pycache__ + +# Environment files +.env +.env.local + +# Logs +*.log + +# SSL Certificates - ignore all except default +certificates/* +!certificates/default/ +!certificates/default/** +!certificates/package.json +!certificates/generate-cert.js +!certificates/README.md +certificates/node_modules/ + +# AI docs (generated) +doc/ai +output/ +test/ +tmp/ +temp/ + diff --git a/doc/ComicInfo.md b/doc/ComicInfo.md new file mode 100644 index 0000000..12f4824 --- /dev/null +++ b/doc/ComicInfo.md @@ -0,0 +1,59 @@ +# ComicInfo.xml ↔ Kavita – Feldübersicht + +Kavita **v0.9.0.2** unterstützt das ComicInfo.xml-Schema **v2.1 (Draft)** des Anansi-Projekts. + +Die Datei muss exakt `ComicInfo.xml` heißen und im **Wurzelverzeichnis** des Archivs (`.cbz`, `.cbr`, `.cb7`, `.cbt`) liegen. Sie überschreibt dann alle aus dem Dateinamen geparsten Informationen. + +## Feldtabelle + +| Feld in ComicInfo.xml | Entspricht in Kavita | Mehrfach möglich | Erklärung | +|---|---|---|---| +| `Title` | Kapitel-/Issue-Titel | nein | Titel des einzelnen Issues/Kapitels. | +| `Series` | Name (der Serie) | nein | Name der Serie. Wird zur Gruppierung der Dateien genutzt. | +| `LocalizedSeries` | Localized Name | nein | Lokalisierter Serienname (z. B. englischer Titel). Beide Namen werden durchsuchbar, Dateien mit gleichem lokalisiertem Namen werden zusammengefasst. *Nicht-Standard-Tag.* | +| `SeriesSort` | Name (Sortiertitel) | nein | Sortiertitel der Serie. Kavita bevorzugt diesen Wert gegenüber `Series`. *Nicht-Standard-Tag.* | +| `Number` | Issue-/Kapitelnummer | nein | Nummer des Issues/Kapitels. Kann auch Bereiche (`1-5`) oder `TPB1` für Sammelbände enthalten. | +| `Count` | Publication Status | nein | Gesamtzahl der Issues/Bände. Steuert, ob Kavita die Serie als „Ongoing", „Ended" oder „Completed" einstuft. | +| `Volume` | Volume | nein | Band-/Volume-Nummer. Bei Comics häufig das Erscheinungsjahr. | +| `AlternateSeries` | Leseliste | ja (Komma) | Alternative Serie – wird in Kavita als Leseliste angelegt. | +| `AlternateNumber` | Leseliste (Reihenfolge) | ja (Komma) | Position innerhalb der alternativen Serie/Leseliste. | +| `AlternateCount` | Leseliste | ja (Komma) | Gesamtzahl in der alternativen Serie. | +| `Summary` | Zusammenfassung | nein | Beschreibung. Die der ersten Datei wird zur Serien-Zusammenfassung; sonst Issue-/Kapitel-Zusammenfassung. | +| `Notes` | Notizen | nein | Freitext-Notizen (z. B. Herkunft der Metadaten). | +| `Review` | Review | nein | Rezensionstext. | +| `Year` / `Month` / `Day` | Release Date | nein | Veröffentlichungsdatum. Für die Serie nimmt Kavita das früheste Jahr (Release Year). | +| `Writer` | Writer | ja (Komma) | Autor(en). | +| `Penciller` | Penciller | ja (Komma) | Zeichner (Bleistift). | +| `Inker` | Inker | ja (Komma) | Tuscher. | +| `Colorist` | Colorist | ja (Komma) | Kolorist. | +| `Letterer` | Letterer | ja (Komma) | Letterer (Schrift/Text). | +| `CoverArtist` | Cover Artist | ja (Komma) | Cover-Künstler. | +| `Editor` | Editor | ja (Komma) | Redakteur/Lektor. | +| `Translator` | Translator | ja (Komma) | Übersetzer (v2.1-Feld). | +| `Publisher` | Publisher | ja (Komma) | Verlag. | +| `Imprint` | Imprint | ja (Komma) | Imprint (Verlagsmarke). | +| `Genre` | Genres | ja (Komma) | Genre(s). | +| `Tags` | Tags | ja (Komma) | Freie Schlagwörter (v2.1-Feld). | +| `Characters` | Characters | ja (Komma) | Auftretende Charaktere. | +| `Teams` | Teams | ja (Komma) | Teams/Gruppierungen. | +| `Locations` | Locations | ja (Komma) | Schauplätze. | +| `Web` | Web Links | ja (Leerzeichen) | URLs zu externen Quellen; wird auch zum Matching in CBL-Importen genutzt. | +| `PageCount` | Length | nein | Seitenanzahl. Wird auf Serienebene aufsummiert. | +| `LanguageISO` | Language | nein | Sprache als ISO-Code (z. B. `de`, `en`, `ja`). | +| `Format` | Special | nein | Format. Bestimmte Werte (Special, Annual, TPB, Omnibus, One-Shot, FCBD u. a.) markieren die Datei als „Special". | +| `BlackAndWhite` | – (gelesen, intern) | nein | `Yes` / `No` / `Unknown`. | +| `Manga` | Leserichtung | nein | `Yes`, `No`, `YesAndRightToLeft`, `Unknown`. Steuert u. a. die Leserichtung. | +| `AgeRating` | Age Rating | nein | Altersfreigabe. Serie übernimmt die strengste Freigabe aller Dateien. Werte sind im Standard fest vorgegeben. | +| `CommunityRating` | Rating | nein | Numerische Bewertung (z. B. `4.5`). | +| `ScanInformation` | Scan-Info | nein | Information zur Scan-Quelle. | +| `GTIN` | ISBN | nein | ISBN/GTIN-Nummer (v2.1-Feld). | +| `SeriesGroup` | Collections | ja (Komma) | Legt Sammlungen an/aktualisiert sie – nur wenn in der Bibliothek „Manage Collections" aktiv ist. | +| `StoryArc` | Leselisten | ja (Komma) | Story-Arc(s) – werden in Kavita zu Leselisten. | +| `StoryArcNumber` | Leselisten (Reihenfolge) | ja (Komma) | Position innerhalb des jeweiligen Story-Arcs (v2.1-Feld). | +| `Pages` / `Page` | Seiten-Metadaten | ja (``-Elemente) | Pro Seite: `Image`, `Type` (FrontCover, Story, Advertisement, BackCover …), `DoublePage`, `ImageSize`, `ImageWidth`, `ImageHeight`, `Bookmark`, `Key`. | + +## Hinweise zum Verhalten von Kavita + +- **`AgeRating`** akzeptiert nur feste Werte. Von am wenigsten zu am strengsten geordnet: Unknown, Rating Pending, Early Childhood, Everyone, G, Everyone 10+, PG, Kids to Adults, Teen, MA15+, Mature 17+, M, R18+, Adults Only 18+, X18+. +- **`Count`** steuert den Publication Status: Ist mindestens ein `Count` in der Serie definiert und nicht 0, gilt die Serie als „Ended". Stimmt der Count mit der vorhandenen Band-/Kapitelzahl überein, gilt sie als „Completed". Ohne `Count` gilt sie als „Ongoing". +- **`SeriesGroup`** und **`StoryArc`** greifen nur, wenn die Bibliothek die entsprechende Verwaltung aktiviert hat (Collections bzw. Reading Lists) — standardmäßig sind diese ausgeschaltet. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..76d0e8c --- /dev/null +++ b/main.py @@ -0,0 +1,16 @@ +# This is a sample Python script. + +# Press Umschalt+F10 to execute it or replace it with your code. +# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. + + +def print_hi(name): + # Use a breakpoint in the code line below to debug your script. + print(f'Hi, {name}') # Press Strg+F8 to toggle the breakpoint. + + +# Press the green button in the gutter to run the script. +if __name__ == '__main__': + print_hi('PyCharm') + +# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ee3e931 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +pillow \ No newline at end of file diff --git a/src/ComicInfoBuilder.py b/src/ComicInfoBuilder.py new file mode 100644 index 0000000..913e5f7 --- /dev/null +++ b/src/ComicInfoBuilder.py @@ -0,0 +1,812 @@ +""" +comicinfo_builder.py +==================== + +Generates a ComicInfo.xml (compatible with Kavita v0.9.0.2 / ComicInfo v2.1) +from series metadata provided by the MangaBaka API, enriched with data from +MangaDex (volume mapping), MangaBaka works (volume covers / ISBN / dates), +and MyAnimeList / Jikan (statistics and characters). + +Dependencies +------------ + requests (required -> API calls / cover download) + Pillow (PIL) (optional -> image dimensions for entries) + + pip install requests pillow + + The modules MangadexVolumeResolver, MangaBakaWorksResolver and + MALResolver must reside in the same directory. + +API address note +---------------- +The official MangaBaka API is hosted at https://api.mangabaka.dev/v1 +(domain ".dev", not ".org"). Use the `api_base_url` constructor parameter +to override this if needed. + +Data source notes +----------------- +* Volume assignment per chapter is resolved via MangaDex + (MangaDexVolumeResolver). Chapters missing from MangaDex are estimated + from neighbouring volume boundaries and MangaBaka page-count data. +* Volume-specific covers, ISBNs and publication dates come from MangaBaka + works (MangaBakaWorksResolver). If no volume is assigned the series + cover is used instead. +* MAL statistics and character names are fetched via the Jikan API + (MALResolver). +""" + +from __future__ import annotations + +import difflib +import re +import xml.etree.ElementTree as ET +from pathlib import Path + +import requests + +from MangadexVolumeResolver import MangaDexVolumeResolver +from MangaBakaWorksResolver import MangaBakaWorksResolver +from MALResolver import MALResolver + +try: + from PIL import Image + _HAS_PIL = True +except ImportError: + _HAS_PIL = False + + +# -------------------------------------------------------------------------- +# Constants +# -------------------------------------------------------------------------- +_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} + +_AGE_RATING_MAP = { + "safe": "Everyone", + "suggestive": "Teen", + "erotica": "Mature 17+", + "pornographic": "Adults Only 18+", +} + +_TRACKER_URL_TEMPLATES = { + "anilist": "https://anilist.co/manga/{id}", + "myanimelist": "https://myanimelist.net/manga/{id}", + "mal": "https://myanimelist.net/manga/{id}", + "mangaupdates": "https://www.mangaupdates.com/series.html?id={id}", + "mangadex": "https://mangadex.org/title/{id}", + "kitsu": "https://kitsu.app/manga/{id}", + "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", + "ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", +} + +# MangaDex relationship types that indicate child works (spin-offs, sequels …) +_CHILD_RELATION_TYPES = {"side_story", "spin_off", "sequel", "prequel", + "doujinshi", "adapted_from", "alternative_story", + "alternative_version"} + + +# -------------------------------------------------------------------------- +# Module helpers +# -------------------------------------------------------------------------- +def _natural_key(name: str): + return [int(p) if p.isdigit() else p.lower() + for p in re.split(r"(\d+)", name)] + + +def _normalise_key(key) -> str: + return re.sub(r"[^a-z0-9]", "", str(key).lower()) + + +# -------------------------------------------------------------------------- +# Main class +# -------------------------------------------------------------------------- +class ComicInfoBuilder: + """ + Builds a ComicInfo.xml for a single manga chapter. + + Constructor arguments + --------------------- + manga_title : Title of the manga (used for the API search). + chapter : Chapter number (int, float, or str — e.g. "10.5"). + + Setter behaviour + ---------------- + * Changing `manga_title` discards both the cached API metadata + AND the current results (pages / cover). + * Changing `chapter` discards only the current results; + the API metadata is kept. + """ + + def __init__(self, manga_title, chapter, *, + api_base_url: str = "https://api.mangabaka.dev/v1", + language: str = "en", + request_timeout: int = 30, + session: "requests.Session | None" = None, + volume_resolver: "MangaDexVolumeResolver | None" = None, + works_resolver: "MangaBakaWorksResolver | None" = None, + mal_resolver: "MALResolver | None" = None): + if not manga_title or not str(manga_title).strip(): + raise ValueError("manga_title must not be empty.") + + self._manga_title = str(manga_title).strip() + self._chapter = chapter + + self.api_base_url = api_base_url.rstrip("/") + self.language = language + self.request_timeout = request_timeout + self._session = session or requests.Session() + self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0") + + self._volume_resolver = (volume_resolver + or MangaDexVolumeResolver( + request_timeout=request_timeout, + session=self._session)) + self._works_resolver = (works_resolver + or MangaBakaWorksResolver( + api_base_url=api_base_url, + request_timeout=request_timeout, + session=self._session)) + self._mal_resolver = (mal_resolver + or MALResolver( + request_timeout=request_timeout, + session=self._session)) + + self._metadata: "dict | None" = None + self._pages: list[dict] = [] + self._cover_path: "Path | None" = None + self._suwayomi_data: dict = {} + + # ----- Repr ----------------------------------------------------------- + def __repr__(self) -> str: + return (f"ComicInfoBuilder(manga_title={self._manga_title!r}, " + f"chapter={self._chapter!r})") + + # ====================================================================== + # Properties / setters + # ====================================================================== + @property + def manga_title(self) -> str: + return self._manga_title + + @manga_title.setter + def manga_title(self, value): + value = str(value).strip() + if not value: + raise ValueError("manga_title must not be empty.") + if value == self._manga_title: + return + self._manga_title = value + self._metadata = None + self._clear_results() + + @property + def chapter(self): + return self._chapter + + @chapter.setter + def chapter(self, value): + if value == self._chapter: + return + self._chapter = value + self._clear_results() + + def _clear_results(self) -> None: + self._pages = [] + self._cover_path = None + self._suwayomi_data = {} + + # ====================================================================== + # Public XML functions + # ====================================================================== + def to_xml_string(self, *, pretty: bool = True) -> str: + """Returns the ComicInfo.xml as a string.""" + tree = self._build_tree() + if pretty: + try: + ET.indent(tree, space=" ") + except AttributeError: + pass + body = ET.tostring(tree.getroot(), encoding="unicode") + return '\n' + body + + def save_xml(self, path) -> Path: + """ + Writes the ComicInfo.xml to `path`. + If a directory is passed, ComicInfo.xml is created inside it. + Returns the actual file path used. + """ + path = Path(path) + if path.is_dir(): + path = path / "ComicInfo.xml" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(self.to_xml_string(), encoding="utf-8") + return path + + # ====================================================================== + # Optional: analyse an image folder + # ====================================================================== + def add_pages_from_folder(self, folder, *, + download_cover: bool = True, + cover_filename: str = "cover") -> dict: + """ + Scans a chapter image folder and populates entries. + Reads an existing Suwayomi ComicInfo.xml for supplementary fields. + Downloads the cover (volume-specific if a volume is found, otherwise + the series default cover). + """ + folder = Path(folder) + if not folder.is_dir(): + raise NotADirectoryError(f"Folder not found: {folder}") + + self._suwayomi_data = self._read_existing_comicinfo(folder) + + self._cover_path = None + if download_cover: + self._cover_path = self._download_cover(folder, cover_filename) + + cover_resolved = self._cover_path.resolve() if self._cover_path else None + story_images: list[Path] = [] + for entry in folder.iterdir(): + if not entry.is_file(): + continue + if entry.suffix.lower() not in _IMAGE_EXTS: + continue + if cover_resolved and entry.resolve() == cover_resolved: + continue + story_images.append(entry) + story_images.sort(key=lambda p: _natural_key(p.name)) + + ordered: list[tuple[Path, str]] = [] + if self._cover_path: + ordered.append((self._cover_path, "FrontCover")) + ordered.extend((img, "Story") for img in story_images) + + self._pages = [] + for index, (img_path, page_type) in enumerate(ordered): + width, height = self._image_dimensions(img_path) + try: + size = img_path.stat().st_size + except OSError: + size = None + self._pages.append({ + "image": index, + "type": page_type, + "width": width, + "height": height, + "size": size, + "double": bool(width and height and width > height), + }) + + return { + "page_count": len(self._pages), + "cover": str(self._cover_path) if self._cover_path else None, + "suwayomi_fields": dict(self._suwayomi_data), + } + + # ====================================================================== + # Metadata retrieval (MangaBaka API) + # ====================================================================== + def fetch_metadata(self, *, force: bool = False) -> dict: + """Fetches (and caches) the series metadata. Pass force=True to refresh.""" + return self._get_metadata(force=force) + + def _get_metadata(self, *, force: bool = False) -> dict: + if self._metadata is not None and not force: + return self._metadata + + series = self._search_best_series(self._manga_title) + if series is None: + raise RuntimeError( + f"No series found for '{self._manga_title}' on MangaBaka.") + + if series.get("state") == "merged" and series.get("merged_with"): + series = self._fetch_series_by_id(series["merged_with"]) + + self._metadata = series + return series + + def _search_best_series(self, title: str): + """Searches for `title` and returns the best matching series entry.""" + url = f"{self.api_base_url}/series/search" + resp = self._session.get( + url, params={"q": title, "page": 1, "limit": 1}, + timeout=self.request_timeout) + resp.raise_for_status() + data = resp.json().get("data") or [] + + return data[0] # I trust the API's relevance sorting and just take the first result, if any + + def _fetch_series_by_id(self, series_id) -> dict: + url = f"{self.api_base_url}/series/{series_id}" + resp = self._session.get(url, timeout=self.request_timeout) + resp.raise_for_status() + data = resp.json().get("data") + if not data: + raise RuntimeError(f"Series with ID {series_id} not found.") + return data + + # ====================================================================== + # XML construction + # ====================================================================== + def _build_tree(self) -> "ET.ElementTree": + md = self._get_metadata() + sd = self._suwayomi_data + + volume = self._determine_volume() + work = self._get_work_for_volume(md, volume) if volume else None + + root = ET.Element("ComicInfo", { + "xmlns:xsd": "http://www.w3.org/2001/XMLSchema", + "xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance", + }) + + def add(tag: str, value) -> None: + if value is None: + return + text = str(value).strip() + if text: + ET.SubElement(root, tag).text = text + + # ----- Title / Series ----------------------------------------------- + add("Title", sd.get("Title") or f"Chapter {self._chapter}") + add("Series", md.get("title") or self._manga_title) + add("LocalizedSeries", + md.get("native_title") or md.get("romanized_title")) + add("SeriesSort", self._get_sort_title(md)) + add("Number", sd.get("Number") or self._chapter) + add("Count", md.get("total_chapters")) + add("Volume", volume) + + # ----- Description with MAL stats ----------------------------------- + mal_id = self._mal_resolver.find_mal_id( + md.get("title") or self._manga_title) + mal_stats = self._mal_resolver.get_stats(mal_id) + add("Summary", self._build_summary(md, sd, mal_stats)) + + # ----- Release date ------------------------------------------------- + # Volume publication date takes precedence over the chapter date. + vol_year, vol_month, vol_day = self._parse_work_date(work) + add("Year", vol_year or sd.get("Year") or md.get("year")) + add("Month", vol_month or sd.get("Month")) + add("Day", vol_day or sd.get("Day")) + + # ----- Contributors ------------------------------------------------- + add("Writer", ", ".join(md.get("authors") or [])) + add("Penciller", ", ".join(md.get("artists") or [])) + add("Translator", sd.get("Translator")) + + # ----- Publisher ---------------------------------------------------- + eng_pub = self._publishers_by_type(md, "English") + orig_pub = self._publishers_by_type(md, "Original") + add("Publisher", eng_pub or orig_pub) + if eng_pub and orig_pub: + add("Imprint", orig_pub) + + # ----- Genres / Tags ------------------------------------------------ + add("Genre", ", ".join(md.get("genres") or [])) + add("Tags", ", ".join(md.get("tags") or [])) + + # ----- Characters from MAL ------------------------------------------ + characters = self._mal_resolver.get_characters(mal_id) + add("Characters", ", ".join(characters) if characters else None) + + # ----- Web links ---------------------------------------------------- + add("Web", " ".join(self._collect_web_links(md, sd))) + + # ----- Miscellaneous ------------------------------------------------ + add("LanguageISO", self.language) + add("Manga", self._manga_flag(md)) + add("AgeRating", _AGE_RATING_MAP.get(md.get("content_rating"), "Unknown")) + + if md.get("rating"): + try: + add("CommunityRating", round(float(md["rating"]) / 2, 1)) + except (TypeError, ValueError): + pass + + # ----- ISBN (GTIN) from volume work --------------------------------- + isbn = (work or {}).get('identifiers')[0].get("id") + add("GTIN", isbn) + + # ----- SeriesGroup from related works ------------------------------- + add("SeriesGroup", self._determine_series_group(md)) + + # ----- Alternate title notes ---------------------------------------- + add("Notes", self._build_notes(md)) + + # ----- Pages -------------------------------------------------------- + if self._pages: + add("PageCount", len(self._pages)) + pages_el = ET.SubElement(root, "Pages") + for page in self._pages: + attrs = {"Image": str(page["image"]), "Type": page["type"]} + if page.get("size") is not None: + attrs["ImageSize"] = str(page["size"]) + if page.get("width"): + attrs["ImageWidth"] = str(page["width"]) + if page.get("height"): + attrs["ImageHeight"] = str(page["height"]) + if page.get("double"): + attrs["DoublePage"] = "true" + ET.SubElement(pages_el, "Page", attrs) + + return ET.ElementTree(root) + + # ====================================================================== + # Volume determination + # ====================================================================== + def _determine_volume(self) -> "str | None": + """ + Resolves the volume for the current chapter via MangaDex. + Falls back to estimation when the chapter is absent from MangaDex. + Returns None if no volume can be determined. + """ + md = self._get_metadata() + try: + manga_id = self._mangadex_id_from_source(md) + if not manga_id: + manga_id = self._volume_resolver.find_manga_id( + md.get("native_title") or self._manga_title) + if not manga_id: + return None + + series_id = str(md.get("id") or "") + page_counts = {} + if series_id: + page_counts = self._works_resolver.get_page_counts(series_id) + + return self._volume_resolver.volume_for_chapter( + manga_id, self._chapter, + volume_page_counts=page_counts or None) + except Exception: + return None + + def _get_work_for_volume(self, md: dict, + volume: "str | None") -> "dict | None": + """Returns the MangaBaka work dict for the current volume, or None.""" + if not volume: + return None + series_id = str(md.get("id") or "") + if not series_id: + return None + try: + return self._works_resolver.get_work_for_volume(series_id, volume) + except Exception: + return None + + # ====================================================================== + # Cover download + # ====================================================================== + def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None": + """ + Downloads the cover for the current chapter/volume. + + If a volume is known and a volume-specific cover exists in MangaBaka + works, that cover is used. Otherwise the series default cover is + downloaded (raw variant preferred). + """ + md = self._get_metadata() + volume = self._determine_volume() + cover_url: "str | None" = None + + if volume: + series_id = str(md.get("id") or "") + if series_id: + try: + cover_url = self._works_resolver.get_cover_for_volume( + series_id, volume) + except Exception: + pass + + if not cover_url: + cover_url = _pick_cover_url(md.get("cover")) + + if not cover_url: + return None + + try: + resp = self._session.get(cover_url, timeout=self.request_timeout) + resp.raise_for_status() + except requests.RequestException: + return None + + ext = _guess_extension(cover_url, resp.headers.get("Content-Type", "")) + target = folder / f"{cover_filename}{ext}" + target.write_bytes(resp.content) + return target + + # ====================================================================== + # Series group + # ====================================================================== + def _determine_series_group(self, md: dict) -> "str | None": + """ + Determines the SeriesGroup value from MangaDex relationships. + + - If the series has a `main_story` parent -> use that title. + - If the series itself has child works (spin-offs, sequels …) + -> use the series own title so all related works are grouped. + - Otherwise -> None (no SeriesGroup). + """ + manga_id = self._mangadex_id_from_source(md) + if not manga_id: + return None + try: + relations = self._volume_resolver.get_series_relations(manga_id) + except Exception: + return None + + if not relations: + return None + + main_stories = relations.get("main_story") or [] + if main_stories: + return main_stories[0] + + if any(t in relations for t in _CHILD_RELATION_TYPES): + return md.get("title") or self._manga_title + + return None + + # ====================================================================== + # Title helpers + # ====================================================================== + def _get_sort_title(self, md: dict) -> "str | None": + """ + Returns the SeriesSort title in the configured language. + Looks for an alt-title with matching language code first; + falls back to the primary title. + """ + lang = self.language.lower() + alt_titles = self._collect_alt_titles(md) + if lang in alt_titles: + return alt_titles[lang] + # For 'en' the primary MangaBaka title is usually already English + return md.get("title") or self._manga_title + + def _collect_alt_titles(self, md: dict) -> "dict[str, str]": + """ + Returns {lang_code: title} for EN, DE and JP (kanji + romaji). + Handles both list-of-dicts and plain-dict formats from the API. + """ + result: dict[str, str] = {} + + if md.get("romanized_title"): + result["romaji"] = md["romanized_title"] + if md.get("native_title"): + result["jp"] = md["native_title"] + + alt = md.get("alt_titles") or md.get("titles") or [] + if isinstance(alt, list): + for entry in alt: + if not isinstance(entry, dict): + continue + lang = entry.get("lang") + title = entry.get("title") + if not lang or not title: + # Single-key format: {"en": "Call of the Night"} + for k, v in entry.items(): + if isinstance(v, str) and len(k) <= 10: + lang, title = k, v + break + if lang and title and lang.lower() in ("en", "de"): + result[lang.lower()] = title + elif isinstance(alt, dict): + for lang, title in alt.items(): + if isinstance(title, str) and lang.lower() in ("en", "de"): + result[lang.lower()] = title + + if "en" not in result and md.get("title"): + result["en"] = md["title"] + + return result + + # ====================================================================== + # Summary / notes + # ====================================================================== + def _build_summary(self, md: dict, sd: dict, + mal_stats: "dict | None") -> "str | None": + """ + Builds the content. + Appends a MAL statistics table (if available) after the description. + """ + desc = (md.get("description") or sd.get("Summary") or "").strip() + + if not mal_stats: + return desc or None + + as_of = mal_stats.get("as_of", "") + score = mal_stats.get("score") + rank = mal_stats.get("rank") + scored = mal_stats.get("scored_by") + pop = mal_stats.get("popularity") + members = mal_stats.get("members") + favs = mal_stats.get("favorites") + url = mal_stats.get("url", "") + + rows: list[str] = [] + if score is not None: rows.append(f"Score\t{score}") + if rank is not None: rows.append(f"Ranked\t#{rank}") + if scored is not None: rows.append(f"Scored by\t{scored:,} users") + if pop is not None: rows.append(f"Popularity\t#{pop}") + if members is not None: rows.append(f"Members\t{members:,}") + if favs is not None: rows.append(f"Favorites\t{favs:,}") + + if not rows: + return desc or None + + table = f"[MyAnimeList]({url}) stats as of {as_of}:\n" + "\n".join(rows) + return f"{desc}\n\n{table}" if desc else table + + def _build_notes(self, md: dict) -> "str | None": + """ + Builds the field containing alternate titles and the + MangaBaka metadata source URL. + """ + parts: list[str] = [] + + alt = self._collect_alt_titles(md) + if alt: + label_map = {"en": "EN", "de": "DE", + "romaji": "Romaji", "jp": "JP (kanji)"} + lines = [] + for code in ("en", "de", "romaji", "jp"): + if code in alt: + lines.append(f"• {label_map[code]}: {alt[code]}") + if lines: + parts.append("Alternate titles:\n" + "\n".join(lines)) + + series_id = str(md.get("id") or "") + if series_id: + parts.append(f"Metadata source: https://mangabaka.org/{series_id}") + + return "\n\n".join(parts) if parts else None + + # ====================================================================== + # Static helpers + # ====================================================================== + @staticmethod + def _parse_work_date(work: "dict | None") -> tuple: + """Returns (year, month, day) strings from a MangaBaka work dict.""" + if not work: + return (None, None, None) + raw = (work.get("release_date") or work.get("publication_date") or "") + if not raw: + return (None, None, None) + parts = str(raw).split("-") + year = parts[0] if len(parts) > 0 and parts[0] else None + month = parts[1] if len(parts) > 1 and parts[1] else None + day = parts[2] if len(parts) > 2 and parts[2] else None + return (year, month, day) + + @staticmethod + def _mangadex_id_from_source(md: dict) -> "str | None": + for raw_key, info in (md.get("source") or {}).items(): + if _normalise_key(raw_key) in ("mangadex", "mangadexorg", "md"): + if isinstance(info, dict) and info.get("id") is not None: + return str(info["id"]) + return None + + @staticmethod + def _publishers_by_type(md: dict, ptype: str) -> "str | None": + names = [p.get("name") for p in (md.get("publishers") or []) + if p.get("type") == ptype and p.get("name")] + return ", ".join(names) if names else None + + @staticmethod + def _manga_flag(md: dict) -> str: + mtype = (md.get("type") or "").lower() + if mtype == "manga": + return "YesAndRightToLeft" + if mtype in ("manhwa", "manhua", "oel"): + return "Yes" + return "Unknown" + + def _collect_web_links(self, md: dict, sd: dict) -> list[str]: + links: list[str] = [] + + links.extend(l for l in (md.get("links") or []) if l) + + for raw_key, info in (md.get("source") or {}).items(): + template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key)) + if not template or not isinstance(info, dict): + continue + source_id = info.get("id") + if source_id is not None: + links.append(template.format(id=source_id)) + + if sd.get("Web"): + links.extend(str(sd["Web"]).split()) + + seen: set[str] = set() + unique: list[str] = [] + for link in links: + if link not in seen: + seen.add(link) + unique.append(link) + return unique + + @staticmethod + def _read_existing_comicinfo(folder: Path) -> dict: + xml_path = folder / "ComicInfo.xml" + if not xml_path.is_file(): + return {} + try: + root = ET.parse(xml_path).getroot() + except ET.ParseError: + return {} + + wanted = {"Title", "Series", "Number", "Summary", "Writer", + "Penciller", "Translator", "Genre", "Web", + "Year", "Month", "Day"} + data: dict = {} + for child in root: + tag = child.tag.split("}")[-1] + if tag in wanted and child.text and child.text.strip(): + data[tag] = child.text.strip() + return data + + @staticmethod + def _image_dimensions(path: Path): + if not _HAS_PIL: + return (None, None) + try: + with Image.open(path) as im: + return im.size + except Exception: + return (None, None) + + +# -------------------------------------------------------------------------- +# Module-level helpers (shared with MangaBakaWorksResolver logic) +# -------------------------------------------------------------------------- +def _pick_cover_url(cover) -> "str | None": + """Selects the best cover URL (raw preferred) from a MangaBaka cover object.""" + if not cover: + return None + if isinstance(cover, str): + return cover + if isinstance(cover, dict): + for key in ("raw", "default", "large", "medium", "small"): + val = cover.get(key) + if isinstance(val, str) and val: + return val + if isinstance(val, dict): + for sub in ("x2", "x1"): + if isinstance(val.get(sub), str) and val[sub]: + return val[sub] + for val in cover.values(): + if isinstance(val, str) and val.startswith("http"): + return val + if isinstance(val, dict): + for sub in val.values(): + if isinstance(sub, str) and sub.startswith("http"): + return sub + return None + + +def _guess_extension(url: str, content_type: str) -> str: + url_ext = Path(url.split("?")[0]).suffix.lower() + if url_ext in _IMAGE_EXTS: + return url_ext + ct = (content_type or "").lower() + if "png" in ct: return ".png" + if "webp" in ct: return ".webp" + if "gif" in ct: return ".gif" + return ".jpg" + + +# -------------------------------------------------------------------------- +# Usage example +# -------------------------------------------------------------------------- +if __name__ == "__main__": + builder = ComicInfoBuilder("Yofukashi no Uta", 66) + + builder.add_pages_from_folder( + r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" + r"\Yofukashi no Uta\Official_Chapter 66") + builder.save_xml( + r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)" + r"\Yofukashi no Uta\Official_Chapter 66\ComicInfo.xml") + + # Setter behaviour: + # builder.chapter = 2 # only results discarded, metadata is kept + # builder.manga_title = "X" # metadata + results discarded diff --git a/src/MALResolver.py b/src/MALResolver.py new file mode 100644 index 0000000..08f9ba5 --- /dev/null +++ b/src/MALResolver.py @@ -0,0 +1,231 @@ +""" +mal_resolver.py +=============== + +Fetches and caches MyAnimeList manga metadata (statistics and characters) +using the public Jikan REST API v4. + +Jikan API: https://api.jikan.moe/v4 (no authentication required) +Rate limit: 3 req/s, 60 req/min -> a 400 ms delay between calls is applied. + +Provided features +----------------- +- Title-based MAL ID lookup with best-match scoring (cached) +- MAL statistics: score, rank, scored_by, popularity, members, favorites +- Character list for a manga (names only, cached) +- Convenience: get_characters_for_manga(title) -> list[str] + +Dependencies +------------ + requests -> pip install requests +""" + +from __future__ import annotations + +import datetime +import difflib +import time + +import requests + + +class MALResolver: + """ + Fetches and caches MyAnimeList manga data via the Jikan API v4. + """ + + JIKAN_BASE = "https://api.jikan.moe/v4" + + def __init__(self, *, + request_timeout: int = 30, + session: "requests.Session | None" = None): + self.request_timeout = request_timeout + self._session = session or requests.Session() + self._session.headers.setdefault("User-Agent", "MALResolver/1.0") + + self._id_cache: dict[str, "int | None"] = {} # title_lower -> mal_id + self._stats_cache: dict[int, dict] = {} # mal_id -> stats dict + self._char_cache: dict[int, list[str]] = {} # mal_id -> [name, ...] + + self._last_request_at: float = 0.0 + + # ------------------------------------------------------------------ + # Public: ID lookup + # ------------------------------------------------------------------ + def find_mal_id(self, title: str) -> "int | None": + """ + Searches MAL for a manga by title and returns the best-matching MAL ID. + Returns None on failure or when no result is found. + """ + if not title or not title.strip(): + return None + + key = title.strip().lower() + if key in self._id_cache: + return self._id_cache[key] + + try: + data = self._get(f"{self.JIKAN_BASE}/manga", + {"q": title, "limit": 5, "type": "manga"}) + results = data.get("data") or [] + except requests.RequestException: + return None + + if not results: + self._id_cache[key] = None + return None + + results.sort(key=lambda e: _score_title(title, e), reverse=True) + mal_id = results[0].get("mal_id") + self._id_cache[key] = mal_id + return mal_id + + # ------------------------------------------------------------------ + # Public: statistics + # ------------------------------------------------------------------ + def get_stats(self, mal_id: "int | None") -> "dict | None": + """ + Returns a statistics dict for the given MAL manga ID: + + { + "score": float | None, + "rank": int | None, + "scored_by": int | None, + "popularity": int | None, + "members": int | None, + "favorites": int | None, + "url": str, + "title": str, + "as_of": str (DD-MM-YYYY), + } + + Returns None if mal_id is None or on network failure. + """ + if mal_id is None: + return None + + if mal_id in self._stats_cache: + return self._stats_cache[mal_id] + + try: + data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}") + entry = data.get("data") or {} + except requests.RequestException: + return None + + stats: dict = { + "score": entry.get("score"), + "rank": entry.get("rank"), + "scored_by": entry.get("scored_by"), + "popularity": entry.get("popularity"), + "members": entry.get("members"), + "favorites": entry.get("favorites"), + "url": (entry.get("url") + or f"https://myanimelist.net/manga/{mal_id}"), + "title": entry.get("title") or "", + "as_of": datetime.date.today().strftime("%d-%m-%Y"), + } + self._stats_cache[mal_id] = stats + return stats + + def get_stats_for_manga(self, title: str) -> "dict | None": + """Convenience: find MAL ID by title, then return stats.""" + return self.get_stats(self.find_mal_id(title)) + + # ------------------------------------------------------------------ + # Public: characters + # ------------------------------------------------------------------ + def get_characters(self, mal_id: "int | None") -> list[str]: + """ + Returns a list of character names (strings) for the manga. + Returns an empty list on failure. + """ + if mal_id is None: + return [] + + if mal_id in self._char_cache: + return self._char_cache[mal_id] + + try: + data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/characters") + entries = data.get("data") or [] + except requests.RequestException: + return [] + + names = [] + for entry in entries: + char = entry.get("character") or {} + name = char.get("name") + if name: + names.append(name) + + self._char_cache[mal_id] = names + return names + + def get_characters_for_manga(self, title: str) -> list[str]: + """ + Convenience: search for manga by title, then return its characters. + """ + return self.get_characters(self.find_mal_id(title)) + + # ------------------------------------------------------------------ + # Public: cache management + # ------------------------------------------------------------------ + def clear_cache(self) -> None: + """Clears all internal caches.""" + self._id_cache.clear() + self._stats_cache.clear() + self._char_cache.clear() + + # ------------------------------------------------------------------ + # Internal: rate-limited HTTP + # ------------------------------------------------------------------ + def _get(self, url: str, params: "dict | None" = None) -> dict: + """Rate-limited GET request (respects Jikan's 3 req/s limit).""" + elapsed = time.monotonic() - self._last_request_at + if elapsed < 0.4: + time.sleep(0.4 - elapsed) + resp = self._session.get(url, params=params, timeout=self.request_timeout) + self._last_request_at = time.monotonic() + resp.raise_for_status() + return resp.json() + + +# -------------------------------------------------------------------------- +# Module helper +# -------------------------------------------------------------------------- +def _score_title(query: str, entry: dict) -> float: + """Returns the best title-similarity score for a Jikan manga entry.""" + candidates = [ + entry.get("title") or "", + entry.get("title_english") or "", + entry.get("title_japanese") or "", + ] + for alt in (entry.get("titles") or []): + candidates.append(alt.get("title") or "") + + best = 0.0 + q = query.lower() + for t in candidates: + if t: + ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio() + best = max(best, ratio) + return best + + +# -------------------------------------------------------------------------- +# Usage example +# -------------------------------------------------------------------------- +if __name__ == "__main__": + resolver = MALResolver() + + mal_id = resolver.find_mal_id("Yofukashi no Uta") + print("MAL ID :", mal_id) + + stats = resolver.get_stats(mal_id) + if stats: + print("Score :", stats["score"]) + print("Rank :", stats["rank"]) + + chars = resolver.get_characters(mal_id) + print("Characters (first 5):", chars[:5]) diff --git a/src/MangaBakaWorksResolver.py b/src/MangaBakaWorksResolver.py new file mode 100644 index 0000000..92d5db0 --- /dev/null +++ b/src/MangaBakaWorksResolver.py @@ -0,0 +1,169 @@ +""" +mangabaka_works_resolver.py +=========================== + +Fetches volume-level (work) data from the MangaBaka API. + +Each "work" is a physical tankobon volume and may carry: + - volume number + - ISBN / GTIN + - page count (used for chapter-to-volume estimation) + - release date + - cover image (raw / default / small variants) + +Only works that have a usable cover are kept in the cache. +Works without a cover are discarded at fetch time. +If no volume is assigned for a chapter, callers fall back to the +default series cover from the series object itself. + +Dependencies +------------ + requests -> pip install requests +""" + +from __future__ import annotations + +import requests + + +class MangaBakaWorksResolver: + """ + Fetches and caches MangaBaka volume (work) data for a series. + Only works that have a cover image are retained in the cache. + """ + + def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1", + request_timeout: int = 30, + session: "requests.Session | None" = None): + self.api_base_url = api_base_url.rstrip("/") + self.request_timeout = request_timeout + self._session = session or requests.Session() + self._session.headers.setdefault("User-Agent", "MangaBakaWorksResolver/1.0") + + # Cache: series_id (str) -> list of work dicts (only those with covers) + self._cache: dict[str, list[dict]] = {} + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + def get_works(self, series_id: str) -> list[dict]: + """ + Returns volume-level works for a series, filtered to those that have + a usable cover image. Results are cached per series. + + Pages through the API (limit=50) until the response returns an empty + page, collecting all works before applying the cover filter. + """ + if not series_id: + return [] + + if series_id in self._cache: + return self._cache[series_id] + + all_works: list[dict] = [] + page = 1 + try: + while True: + resp = self._session.get( + f"{self.api_base_url}/series/{series_id}/works", + params={"limit": 50, "page": page}, + timeout=self.request_timeout, + ) + resp.raise_for_status() + page_data = resp.json().get("data") or [] + if not page_data: + break + all_works.extend(page_data) + if len(page_data) < 50: + break + page += 1 + except requests.RequestException: + if not all_works: + return [] + + # Discard works that carry no usable cover + works_with_cover = [w for w in all_works if w.get("images")] + self._cache[series_id] = works_with_cover + return works_with_cover + + def get_work_for_volume(self, series_id: str, volume) -> "dict | None": + """ + Returns the work dict for a specific volume number, or None. + Volume comparison normalises trailing ".0" (e.g. "1.0" == "1"). + """ + works = self.get_works(series_id) + if not works: + return None + + target = _norm_vol(volume) + for work in works: + if _norm_vol(work.get("sequence_string")) == target: + return work + return None + + def get_cover_for_volume(self, series_id: str, volume) -> "str | None": + """Returns the cover URL for a specific volume, or None if not found.""" + work = self.get_work_for_volume(series_id, volume) + if not work: + return None + return self._pick_cover_url(work.get("images")[0].get("image")) + + def get_page_counts(self, series_id: str) -> "dict[str, int]": + """ + Returns {volume_str: page_count} for all cached works. + Used by MangaDexVolumeResolver for chapter-to-volume estimation. + """ + result: dict[str, int] = {} + for work in self.get_works(series_id): + vol = _norm_vol(work.get("volume")) + pages = work.get("pages") + if vol and pages is not None: + try: + result[vol] = int(pages) + except (TypeError, ValueError): + pass + return result + + def clear_cache(self) -> None: + """Clears the internal works cache.""" + self._cache.clear() + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + @staticmethod + def _pick_cover_url(cover) -> "str | None": + """Selects the best (raw-preferred) cover URL from a cover object.""" + if not cover: + return None + if isinstance(cover, str): + return cover + if isinstance(cover, dict): + url = cover.get("raw").get("url") or None + if url: + return url + + # Generic fallback: any HTTP URL in the dict + for val in cover.values(): + if isinstance(val, str) and val.startswith("http"): + return val + if isinstance(val, dict): + for sub_val in val.values(): + if isinstance(sub_val, str) and sub_val.startswith("http"): + return sub_val + return None + + +# -------------------------------------------------------------------------- +# Module helper +# -------------------------------------------------------------------------- +def _norm_vol(value) -> str: + """Normalises a volume identifier: strips whitespace, removes trailing .0.""" + text = str(value or "").strip() + try: + f = float(text) + if f.is_integer(): + return str(int(f)) + except ValueError: + pass + return text diff --git a/src/MangadexVolumeResolver.py b/src/MangadexVolumeResolver.py new file mode 100644 index 0000000..61462dd --- /dev/null +++ b/src/MangadexVolumeResolver.py @@ -0,0 +1,346 @@ +""" +mangadex_volume_resolver.py +=========================== + +Resolves chapter numbers to their corresponding volumes (tankobon) using +the public MangaDex API. + +Background +---------- +The MangaBaka API only provides series-level data. MangaDex, however, +stores a volume attribute per chapter. The endpoint + + GET /manga/{id}/aggregate + +returns a chapter overview grouped by volume. This class encapsulates +that lookup so that `ComicInfoBuilder._determine_volume()` stays clean. + +All available translations are queried (no language filter on the +aggregate endpoint) so that chapters only published in non-English +languages are still covered. + +Chapter estimation +------------------ +When a chapter is not present in the MangaDex aggregate at all (e.g. +because it has never been uploaded to MangaDex in any language), the +`estimate_volume_for_chapter()` method infers the most likely volume by +examining the known chapter-to-volume boundaries on both sides of the +target chapter. If MangaBaka page-count data is supplied, the page-count +per chapter is used to estimate where a volume boundary falls within the +gap; otherwise a simple midpoint heuristic is used. + +Series relations +---------------- +`get_series_relations()` returns related manga titles keyed by MangaDex +relationship type ("main_story", "spin_off", "sequel", …). This is used +by `ComicInfoBuilder` to populate the `` element. + +Dependencies +------------ + requests -> pip install requests +""" + +from __future__ import annotations + +import difflib +import re + +import requests + + +def _normalise_chapter(value) -> str: + """ + Converts a chapter number into a canonical comparison string. + + Examples: 1 -> "1" | 1.0 -> "1" | "01" -> "1" + 1.5 -> "1.5" | "1.50" -> "1.5" + """ + text = str(value).strip() + try: + number = float(text) + except ValueError: + return text.lower() + if number.is_integer(): + return str(int(number)) + return ("%f" % number).rstrip("0").rstrip(".") + + +class MangaDexVolumeResolver: + """ + Resolves chapter numbers to their volume numbers via the MangaDex API. + + Typical usage + ------------- + resolver = MangaDexVolumeResolver() + manga_id = resolver.find_manga_id("Yofukashi no Uta") + volume = resolver.volume_for_chapter(manga_id, 1) + """ + + def __init__(self, *, + base_url: str = "https://api.mangadex.org", + request_timeout: int = 30, + session: "requests.Session | None" = None): + """ + base_url : Base URL of the MangaDex API. + request_timeout : HTTP request timeout in seconds. + session : Optional reusable requests.Session. + """ + self.base_url = base_url.rstrip("/") + self.request_timeout = request_timeout + self._session = session or requests.Session() + self._session.headers.setdefault("User-Agent", + "MangaDexVolumeResolver/1.0") + # Cache: manga_id -> {chapter_number: volume} + self._cache: dict[str, dict] = {} + # Cache: manga_id -> {relation_type: [title, ...]} + self._relations_cache: dict[str, dict] = {} + + # ---------------------------------------------------------------------- + # Locate the manga ID + # ---------------------------------------------------------------------- + def find_manga_id(self, title: str) -> "str | None": + """ + Searches MangaDex for `title` and returns the best matching manga + ID, or None if no result is found. + """ + if not title or not title.strip(): + return None + + resp = self._session.get( + f"{self.base_url}/manga", + params={"title": title, "limit": 5, + "contentRating[]": ["safe", "suggestive", + "erotica", "pornographic"]}, + timeout=self.request_timeout) + resp.raise_for_status() + results = resp.json().get("data") or [] + if not results: + return None + + def score(entry) -> float: + attrs = entry.get("attributes", {}) + names: list[str] = [] + names.extend(str(v) for v in (attrs.get("title") or {}).values()) + for alt in (attrs.get("altTitles") or []): + names.extend(str(v) for v in alt.values()) + best = 0.0 + for name in names: + ratio = difflib.SequenceMatcher( + None, title.lower(), name.lower()).ratio() + best = max(best, ratio) + return best + + results.sort(key=score, reverse=True) + return results[0].get("id") + + # ---------------------------------------------------------------------- + # Main function: retrieve and return volume / chapter data + # ---------------------------------------------------------------------- + def get_chapter_volume_map(self, manga_id: str, *, + use_cache: bool = True) -> dict: + """ + Retrieves the complete chapter-to-volume mapping for a series. + + All available languages are queried so that chapters only published + in non-English translations are still included. + + Returns: dict { chapter_number (str) : volume (str) or None } + Example: { "1": "1", "2": "1", "11": "2", "57": None } + + Chapters without a volume assignment are mapped to None. + """ + if not manga_id: + return {} + if use_cache and manga_id in self._cache: + return self._cache[manga_id] + + # No language filter: query all available translations so that every + # chapter appears in the aggregate, regardless of translation status. + resp = self._session.get( + f"{self.base_url}/manga/{manga_id}/aggregate", + timeout=self.request_timeout) + resp.raise_for_status() + volumes = resp.json().get("volumes") or {} + + chapter_map: dict[str, "str | None"] = {} + for volume_key, volume_data in volumes.items(): + if str(volume_key).lower() in ("none", ""): + volume_value = None + else: + volume_value = str(volume_data.get("volume") or volume_key) + + for chapter_key in (volume_data.get("chapters") or {}): + chapter_map[_normalise_chapter(chapter_key)] = volume_value + + if use_cache: + self._cache[manga_id] = chapter_map + return chapter_map + + # ---------------------------------------------------------------------- + # Convenience: look up the volume for a single chapter number + # ---------------------------------------------------------------------- + def volume_for_chapter(self, manga_id: str, chapter, + *, use_cache: bool = True, + volume_page_counts: "dict | None" = None) -> "str | None": + """ + Returns the volume for the given chapter number. + + Falls back to `estimate_volume_for_chapter` when the chapter is not + directly present in the MangaDex aggregate. + + volume_page_counts : optional {volume_str: page_count} dict from + MangaBakaWorksResolver.get_page_counts(). + Improves estimation accuracy when provided. + """ + chapter_map = self.get_chapter_volume_map(manga_id, use_cache=use_cache) + result = chapter_map.get(_normalise_chapter(chapter)) + if result is None and chapter_map: + result = self.estimate_volume_for_chapter( + chapter_map, chapter, volume_page_counts) + print(result) + return result + + # ---------------------------------------------------------------------- + # Chapter estimation for unmapped chapters + # ---------------------------------------------------------------------- + def estimate_volume_for_chapter(self, chapter_map: dict, chapter, + volume_page_counts: "dict | None" = None, + ) -> "str | None": + """ + Estimates the volume for a chapter that is absent from chapter_map. + + Algorithm + --------- + 1. Sort all chapters that have a known volume assignment. + 2. Find the nearest mapped chapters before and after the target. + 3. If both neighbors belong to the same volume -> return that volume. + 4. If they differ (volume boundary somewhere in the gap): + a. If page-count data is provided, estimate where the boundary + falls based on average pages-per-chapter and remaining page + budget of the left volume. + b. Otherwise use a midpoint heuristic (favour the left volume). + + Returns None if no suitable estimate can be made. + """ + target = float(_normalise_chapter(chapter)) + + known = sorted( + [(float(k), v) for k, v in chapter_map.items() if v is not None], + key=lambda x: x[0], + ) + if not known: + return None + + # Insertion point: first index where known[i][0] > target + pos = next((i for i, (c, _) in enumerate(known) if c > target), + len(known)) + + if pos == 0: + return known[0][1] + if pos == len(known): + return known[-1][1] + + ch_left, vol_left = known[pos - 1] + ch_right, vol_right = known[pos] + + if vol_left == vol_right: + return vol_left + + # Volume boundary lies somewhere in (ch_left, ch_right) + vol_left_chapters = [c for c, v in known if v == vol_left] + + if volume_page_counts: + # Estimate average pages per chapter across all known volumes. + total_pages = sum(volume_page_counts.values()) + total_chapters = len(known) + avg_pages = total_pages / total_chapters if total_chapters else 20.0 + + left_vol_pages = volume_page_counts.get(vol_left) + if left_vol_pages: + expected_chaps = max(len(vol_left_chapters), + round(left_vol_pages / avg_pages)) + remaining_slots = expected_chaps - len(vol_left_chapters) + boundary = max(vol_left_chapters) + max(0, remaining_slots) + return vol_left if target <= boundary else vol_right + + # Fallback: use average volume size to estimate the boundary. + vol_sizes: dict[str, int] = {} + for _, v in known: + if v: + vol_sizes[v] = vol_sizes.get(v, 0) + 1 + avg_size = sum(vol_sizes.values()) / len(vol_sizes) if vol_sizes else 10.0 + boundary = ch_left + max(1.0, avg_size - len(vol_left_chapters)) + return vol_left if target <= boundary else vol_right + + # ---------------------------------------------------------------------- + # Related series (for SeriesGroup) + # ---------------------------------------------------------------------- + def get_series_relations(self, manga_id: str) -> "dict[str, list[str]]": + """ + Returns related manga titles grouped by relationship type. + + Example return value: + {"main_story": ["Call of the Night"], "spin_off": ["Side Story A"]} + + The MangaDex `?includes[]=manga` parameter is used to embed + related manga attributes so their titles are available without + additional requests. + """ + if not manga_id: + return {} + + if manga_id in self._relations_cache: + return self._relations_cache[manga_id] + + try: + resp = self._session.get( + f"{self.base_url}/manga/{manga_id}", + params={"includes[]": "manga"}, + timeout=self.request_timeout, + ) + resp.raise_for_status() + data = resp.json().get("data") or {} + except requests.RequestException: + return {} + + relations: dict[str, list[str]] = {} + for rel in (data.get("relationships") or []): + if rel.get("type") != "manga": + continue + rel_type = rel.get("related") + if not rel_type: + continue + attrs = rel.get("attributes") or {} + if not attrs: + continue + titles: dict = attrs.get("title") or {} + # Prefer English, then romanized Japanese, then any available + title = (titles.get("en") + or titles.get("ja-ro") + or next(iter(titles.values()), None)) + if title: + relations.setdefault(rel_type, []).append(title) + + self._relations_cache[manga_id] = relations + return relations + + # ---------------------------------------------------------------------- + def clear_cache(self) -> None: + """Clears all internal caches.""" + self._cache.clear() + self._relations_cache.clear() + + +# -------------------------------------------------------------------------- +# Usage example +# -------------------------------------------------------------------------- +if __name__ == "__main__": + resolver = MangaDexVolumeResolver() + + mid = resolver.find_manga_id("Yofukashi no Uta") + print("MangaDex ID :", mid) + + if mid: + print("Volume for ch. 1 :", resolver.volume_for_chapter(mid, 66)) + print("Full chapter map :", resolver.get_chapter_volume_map(mid)) + print("Relations :", resolver.get_series_relations(mid))