""" mangadex_volume_resolver.py =========================== Resolves chapter numbers to their corresponding volumes (tankobon) using the public MangaDex API. Background ---------- The MangaBaka API only provides series-level data. MangaDex, however, stores a volume attribute per chapter. The endpoint GET /manga/{id}/aggregate returns a chapter overview grouped by volume. This class encapsulates that lookup so that `ComicInfoBuilder._determine_volume()` stays clean. All available translations are queried (no language filter on the aggregate endpoint) so that chapters only published in non-English languages are still covered. Chapter estimation ------------------ When a chapter is not present in the MangaDex aggregate at all (e.g. because it has never been uploaded to MangaDex in any language), the `estimate_volume_for_chapter()` method infers the most likely volume by examining the known chapter-to-volume boundaries on both sides of the target chapter. If MangaBaka page-count data is supplied, the page-count per chapter is used to estimate where a volume boundary falls within the gap; otherwise a simple midpoint heuristic is used. Series relations ---------------- `get_series_relations()` returns related manga titles keyed by MangaDex relationship type ("main_story", "spin_off", "sequel", …). This is used by `ComicInfoBuilder` to populate the `` element. Dependencies ------------ requests -> pip install requests """ from __future__ import annotations import difflib import requests def _normalise_chapter(value) -> str: """ Converts a chapter number into a canonical comparison string. Examples: 1 -> "1" | 1.0 -> "1" | "01" -> "1" 1.5 -> "1.5" | "1.50" -> "1.5" """ text = str(value).strip() try: number = float(text) except ValueError: return text.lower() if number.is_integer(): return str(int(number)) return ("%f" % number).rstrip("0").rstrip(".") class MangaDexVolumeResolver: """ Resolves chapter numbers to their volume numbers via the MangaDex API. Typical usage ------------- resolver = MangaDexVolumeResolver() manga_id = resolver.find_manga_id("Yofukashi no Uta") volume = resolver.volume_for_chapter(manga_id, 1) """ def __init__(self, *, base_url: str = "https://api.mangadex.org", request_timeout: int = 30, session: "requests.Session | None" = None): """ base_url : Base URL of the MangaDex API. request_timeout : HTTP request timeout in seconds. session : Optional reusable requests.Session. """ self.base_url = base_url.rstrip("/") self.request_timeout = request_timeout self._session = session or requests.Session() self._session.headers.setdefault("User-Agent", "MangaDexVolumeResolver/1.0") # Cache: manga_id -> {chapter_number: volume} self._cache: dict[str, dict] = {} # Cache: manga_id -> {relation_type: [title, ...]} self._relations_cache: dict[str, dict] = {} # ---------------------------------------------------------------------- # Locate the manga ID # ---------------------------------------------------------------------- def find_manga_id(self, title: str) -> "str | None": """ Searches MangaDex for `title` and returns the best matching manga ID, or None if no result is found. """ if not title or not title.strip(): return None resp = self._session.get( f"{self.base_url}/manga", params={"title": title, "limit": 5, "contentRating[]": ["safe", "suggestive", "erotica", "pornographic"]}, timeout=self.request_timeout) resp.raise_for_status() results = resp.json().get("data") or [] if not results: return None def score(entry) -> float: attrs = entry.get("attributes", {}) names: list[str] = [] names.extend(str(v) for v in (attrs.get("title") or {}).values()) for alt in (attrs.get("altTitles") or []): names.extend(str(v) for v in alt.values()) best = 0.0 for name in names: ratio = difflib.SequenceMatcher( None, title.lower(), name.lower()).ratio() best = max(best, ratio) return best results.sort(key=score, reverse=True) return results[0].get("id") # ---------------------------------------------------------------------- # Main function: retrieve and return volume / chapter data # ---------------------------------------------------------------------- def get_chapter_volume_map(self, manga_id: str, *, use_cache: bool = True) -> dict: """ Retrieves the complete chapter-to-volume mapping for a series. All available languages are queried so that chapters only published in non-English translations are still included. Returns: dict { chapter_number (str) : volume (str) or None } Example: { "1": "1", "2": "1", "11": "2", "57": None } Chapters without a volume assignment are mapped to None. """ if not manga_id: return {} if use_cache and manga_id in self._cache: return self._cache[manga_id] # No language filter: query all available translations so that every # chapter appears in the aggregate, regardless of translation status. resp = self._session.get( f"{self.base_url}/manga/{manga_id}/aggregate", timeout=self.request_timeout) resp.raise_for_status() volumes = resp.json().get("volumes") or {} chapter_map: dict[str, "str | None"] = {} for volume_key, volume_data in volumes.items(): if str(volume_key).lower() in ("none", ""): volume_value = None else: volume_value = str(volume_data.get("volume") or volume_key) for chapter_key in (volume_data.get("chapters") or {}): chapter_map[_normalise_chapter(chapter_key)] = volume_value if use_cache: self._cache[manga_id] = chapter_map return chapter_map # ---------------------------------------------------------------------- # Convenience: look up the volume for a single chapter number # ---------------------------------------------------------------------- def volume_for_chapter(self, manga_id: str, chapter, *, use_cache: bool = True, volume_page_counts: "dict | None" = None) -> "str | None": """ Returns the volume for the given chapter number. Falls back to `estimate_volume_for_chapter` when the chapter is not directly present in the MangaDex aggregate. volume_page_counts : optional {volume_str: page_count} dict from MangaBakaWorksResolver.get_page_counts(). Improves estimation accuracy when provided. """ chapter_map = self.get_chapter_volume_map(manga_id, use_cache=use_cache) result = chapter_map.get(_normalise_chapter(chapter)) if result is None and chapter_map: result = self.estimate_volume_for_chapter( chapter_map, chapter, volume_page_counts) return result # ---------------------------------------------------------------------- # Chapter estimation for unmapped chapters # ---------------------------------------------------------------------- def estimate_volume_for_chapter(self, chapter_map: dict, chapter, volume_page_counts: "dict | None" = None, ) -> "str | None": """ Estimates the volume for a chapter that is absent from chapter_map. Algorithm --------- 1. Sort all chapters that have a known volume assignment. 2. Find the nearest mapped chapters before and after the target. 3. If both neighbors belong to the same volume -> return that volume. 4. If they differ (volume boundary somewhere in the gap): a. If page-count data is provided, estimate where the boundary falls based on average pages-per-chapter and remaining page budget of the left volume. b. Otherwise use a midpoint heuristic (favour the left volume). Returns None if no suitable estimate can be made. """ target = float(_normalise_chapter(chapter)) known = sorted( [(float(k), v) for k, v in chapter_map.items() if v is not None], key=lambda x: x[0], ) if not known: return None # Insertion point: first index where known[i][0] > target pos = next((i for i, (c, _) in enumerate(known) if c > target), len(known)) if pos == 0: return known[0][1] if pos == len(known): return known[-1][1] ch_left, vol_left = known[pos - 1] ch_right, vol_right = known[pos] if vol_left == vol_right: return vol_left # Volume boundary lies somewhere in (ch_left, ch_right) vol_left_chapters = [c for c, v in known if v == vol_left] if volume_page_counts: # Estimate average pages per chapter across all known volumes. total_pages = sum(volume_page_counts.values()) total_chapters = len(known) avg_pages = total_pages / total_chapters if total_chapters else 20.0 left_vol_pages = volume_page_counts.get(vol_left) if left_vol_pages: expected_chaps = max(len(vol_left_chapters), round(left_vol_pages / avg_pages)) remaining_slots = expected_chaps - len(vol_left_chapters) boundary = max(vol_left_chapters) + max(0, remaining_slots) return vol_left if target <= boundary else vol_right # Fallback: use average volume size to estimate the boundary. vol_sizes: dict[str, int] = {} for _, v in known: if v: vol_sizes[v] = vol_sizes.get(v, 0) + 1 avg_size = sum(vol_sizes.values()) / len(vol_sizes) if vol_sizes else 10.0 boundary = ch_left + max(1.0, avg_size - len(vol_left_chapters)) return vol_left if target <= boundary else vol_right # ---------------------------------------------------------------------- # Related series (for SeriesGroup) # ---------------------------------------------------------------------- def get_series_relations(self, manga_id: str) -> "dict[str, list[str]]": """ Returns related manga titles grouped by relationship type. Example return value: {"main_story": ["Call of the Night"], "spin_off": ["Side Story A"]} The MangaDex `?includes[]=manga` parameter is used to embed related manga attributes so their titles are available without additional requests. """ if not manga_id: return {} if manga_id in self._relations_cache: return self._relations_cache[manga_id] try: resp = self._session.get( f"{self.base_url}/manga/{manga_id}", params={"includes[]": "manga"}, timeout=self.request_timeout, ) resp.raise_for_status() data = resp.json().get("data") or {} except requests.RequestException: return {} relations: dict[str, list[str]] = {} for rel in (data.get("relationships") or []): if rel.get("type") != "manga": continue rel_type = rel.get("related") if not rel_type: continue attrs = rel.get("attributes") or {} if not attrs: continue titles: dict = attrs.get("title") or {} # Prefer English, then romanized Japanese, then any available title = (titles.get("en") or titles.get("ja-ro") or next(iter(titles.values()), None)) if title: relations.setdefault(rel_type, []).append(title) self._relations_cache[manga_id] = relations return relations # ---------------------------------------------------------------------- def clear_cache(self) -> None: """Clears all internal caches.""" self._cache.clear() self._relations_cache.clear() # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": resolver = MangaDexVolumeResolver() mid = resolver.find_manga_id("Yofukashi no Uta") print("MangaDex ID :", mid) if mid: print("Volume for ch. 1 :", resolver.volume_for_chapter(mid, 66)) print("Full chapter map :", resolver.get_chapter_volume_map(mid)) print("Relations :", resolver.get_series_relations(mid))