345 lines
13 KiB
Python
345 lines
13 KiB
Python
"""
|
|
mangadex_volume_resolver.py
|
|
===========================
|
|
|
|
Resolves chapter numbers to their corresponding volumes (tankobon) using
|
|
the public MangaDex API.
|
|
|
|
Background
|
|
----------
|
|
The MangaBaka API only provides series-level data. MangaDex, however,
|
|
stores a volume attribute per chapter. The endpoint
|
|
|
|
GET /manga/{id}/aggregate
|
|
|
|
returns a chapter overview grouped by volume. This class encapsulates
|
|
that lookup so that `ComicInfoBuilder._determine_volume()` stays clean.
|
|
|
|
All available translations are queried (no language filter on the
|
|
aggregate endpoint) so that chapters only published in non-English
|
|
languages are still covered.
|
|
|
|
Chapter estimation
|
|
------------------
|
|
When a chapter is not present in the MangaDex aggregate at all (e.g.
|
|
because it has never been uploaded to MangaDex in any language), the
|
|
`estimate_volume_for_chapter()` method infers the most likely volume by
|
|
examining the known chapter-to-volume boundaries on both sides of the
|
|
target chapter. If MangaBaka page-count data is supplied, the page-count
|
|
per chapter is used to estimate where a volume boundary falls within the
|
|
gap; otherwise a simple midpoint heuristic is used.
|
|
|
|
Series relations
|
|
----------------
|
|
`get_series_relations()` returns related manga titles keyed by MangaDex
|
|
relationship type ("main_story", "spin_off", "sequel", …). This is used
|
|
by `ComicInfoBuilder` to populate the `<SeriesGroup>` element.
|
|
|
|
Dependencies
|
|
------------
|
|
requests -> pip install requests
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import difflib
|
|
|
|
import requests
|
|
|
|
|
|
def _normalise_chapter(value) -> str:
|
|
"""
|
|
Converts a chapter number into a canonical comparison string.
|
|
|
|
Examples: 1 -> "1" | 1.0 -> "1" | "01" -> "1"
|
|
1.5 -> "1.5" | "1.50" -> "1.5"
|
|
"""
|
|
text = str(value).strip()
|
|
try:
|
|
number = float(text)
|
|
except ValueError:
|
|
return text.lower()
|
|
if number.is_integer():
|
|
return str(int(number))
|
|
return ("%f" % number).rstrip("0").rstrip(".")
|
|
|
|
|
|
class MangaDexVolumeResolver:
|
|
"""
|
|
Resolves chapter numbers to their volume numbers via the MangaDex API.
|
|
|
|
Typical usage
|
|
-------------
|
|
resolver = MangaDexVolumeResolver()
|
|
manga_id = resolver.find_manga_id("Yofukashi no Uta")
|
|
volume = resolver.volume_for_chapter(manga_id, 1)
|
|
"""
|
|
|
|
def __init__(self, *,
|
|
base_url: str = "https://api.mangadex.org",
|
|
request_timeout: int = 30,
|
|
session: "requests.Session | None" = None):
|
|
"""
|
|
base_url : Base URL of the MangaDex API.
|
|
request_timeout : HTTP request timeout in seconds.
|
|
session : Optional reusable requests.Session.
|
|
"""
|
|
self.base_url = base_url.rstrip("/")
|
|
self.request_timeout = request_timeout
|
|
self._session = session or requests.Session()
|
|
self._session.headers.setdefault("User-Agent",
|
|
"MangaDexVolumeResolver/1.0")
|
|
# Cache: manga_id -> {chapter_number: volume}
|
|
self._cache: dict[str, dict] = {}
|
|
# Cache: manga_id -> {relation_type: [title, ...]}
|
|
self._relations_cache: dict[str, dict] = {}
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Locate the manga ID
|
|
# ----------------------------------------------------------------------
|
|
def find_manga_id(self, title: str) -> "str | None":
|
|
"""
|
|
Searches MangaDex for `title` and returns the best matching manga
|
|
ID, or None if no result is found.
|
|
"""
|
|
if not title or not title.strip():
|
|
return None
|
|
|
|
resp = self._session.get(
|
|
f"{self.base_url}/manga",
|
|
params={"title": title, "limit": 5,
|
|
"contentRating[]": ["safe", "suggestive",
|
|
"erotica", "pornographic"]},
|
|
timeout=self.request_timeout)
|
|
resp.raise_for_status()
|
|
results = resp.json().get("data") or []
|
|
if not results:
|
|
return None
|
|
|
|
def score(entry) -> float:
|
|
attrs = entry.get("attributes", {})
|
|
names: list[str] = []
|
|
names.extend(str(v) for v in (attrs.get("title") or {}).values())
|
|
for alt in (attrs.get("altTitles") or []):
|
|
names.extend(str(v) for v in alt.values())
|
|
best = 0.0
|
|
for name in names:
|
|
ratio = difflib.SequenceMatcher(
|
|
None, title.lower(), name.lower()).ratio()
|
|
best = max(best, ratio)
|
|
return best
|
|
|
|
results.sort(key=score, reverse=True)
|
|
return results[0].get("id")
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Main function: retrieve and return volume / chapter data
|
|
# ----------------------------------------------------------------------
|
|
def get_chapter_volume_map(self, manga_id: str, *,
|
|
use_cache: bool = True) -> dict:
|
|
"""
|
|
Retrieves the complete chapter-to-volume mapping for a series.
|
|
|
|
All available languages are queried so that chapters only published
|
|
in non-English translations are still included.
|
|
|
|
Returns: dict { chapter_number (str) : volume (str) or None }
|
|
Example: { "1": "1", "2": "1", "11": "2", "57": None }
|
|
|
|
Chapters without a volume assignment are mapped to None.
|
|
"""
|
|
if not manga_id:
|
|
return {}
|
|
if use_cache and manga_id in self._cache:
|
|
return self._cache[manga_id]
|
|
|
|
# No language filter: query all available translations so that every
|
|
# chapter appears in the aggregate, regardless of translation status.
|
|
resp = self._session.get(
|
|
f"{self.base_url}/manga/{manga_id}/aggregate",
|
|
timeout=self.request_timeout)
|
|
resp.raise_for_status()
|
|
volumes = resp.json().get("volumes") or {}
|
|
|
|
chapter_map: dict[str, "str | None"] = {}
|
|
for volume_key, volume_data in volumes.items():
|
|
if str(volume_key).lower() in ("none", ""):
|
|
volume_value = None
|
|
else:
|
|
volume_value = str(volume_data.get("volume") or volume_key)
|
|
|
|
for chapter_key in (volume_data.get("chapters") or {}):
|
|
chapter_map[_normalise_chapter(chapter_key)] = volume_value
|
|
|
|
if use_cache:
|
|
self._cache[manga_id] = chapter_map
|
|
return chapter_map
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Convenience: look up the volume for a single chapter number
|
|
# ----------------------------------------------------------------------
|
|
def volume_for_chapter(self, manga_id: str, chapter,
|
|
*, use_cache: bool = True,
|
|
volume_page_counts: "dict | None" = None) -> "str | None":
|
|
"""
|
|
Returns the volume for the given chapter number.
|
|
|
|
Falls back to `estimate_volume_for_chapter` when the chapter is not
|
|
directly present in the MangaDex aggregate.
|
|
|
|
volume_page_counts : optional {volume_str: page_count} dict from
|
|
MangaBakaWorksResolver.get_page_counts().
|
|
Improves estimation accuracy when provided.
|
|
"""
|
|
chapter_map = self.get_chapter_volume_map(manga_id, use_cache=use_cache)
|
|
result = chapter_map.get(_normalise_chapter(chapter))
|
|
if result is None and chapter_map:
|
|
result = self.estimate_volume_for_chapter(
|
|
chapter_map, chapter, volume_page_counts)
|
|
return result
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Chapter estimation for unmapped chapters
|
|
# ----------------------------------------------------------------------
|
|
def estimate_volume_for_chapter(self, chapter_map: dict, chapter,
|
|
volume_page_counts: "dict | None" = None,
|
|
) -> "str | None":
|
|
"""
|
|
Estimates the volume for a chapter that is absent from chapter_map.
|
|
|
|
Algorithm
|
|
---------
|
|
1. Sort all chapters that have a known volume assignment.
|
|
2. Find the nearest mapped chapters before and after the target.
|
|
3. If both neighbors belong to the same volume -> return that volume.
|
|
4. If they differ (volume boundary somewhere in the gap):
|
|
a. If page-count data is provided, estimate where the boundary
|
|
falls based on average pages-per-chapter and remaining page
|
|
budget of the left volume.
|
|
b. Otherwise use a midpoint heuristic (favour the left volume).
|
|
|
|
Returns None if no suitable estimate can be made.
|
|
"""
|
|
target = float(_normalise_chapter(chapter))
|
|
|
|
known = sorted(
|
|
[(float(k), v) for k, v in chapter_map.items() if v is not None],
|
|
key=lambda x: x[0],
|
|
)
|
|
if not known:
|
|
return None
|
|
|
|
# Insertion point: first index where known[i][0] > target
|
|
pos = next((i for i, (c, _) in enumerate(known) if c > target),
|
|
len(known))
|
|
|
|
if pos == 0:
|
|
return known[0][1]
|
|
if pos == len(known):
|
|
return known[-1][1]
|
|
|
|
ch_left, vol_left = known[pos - 1]
|
|
ch_right, vol_right = known[pos]
|
|
|
|
if vol_left == vol_right:
|
|
return vol_left
|
|
|
|
# Volume boundary lies somewhere in (ch_left, ch_right)
|
|
vol_left_chapters = [c for c, v in known if v == vol_left]
|
|
|
|
if volume_page_counts:
|
|
# Estimate average pages per chapter across all known volumes.
|
|
total_pages = sum(volume_page_counts.values())
|
|
total_chapters = len(known)
|
|
avg_pages = total_pages / total_chapters if total_chapters else 20.0
|
|
|
|
left_vol_pages = volume_page_counts.get(vol_left)
|
|
if left_vol_pages:
|
|
expected_chaps = max(len(vol_left_chapters),
|
|
round(left_vol_pages / avg_pages))
|
|
remaining_slots = expected_chaps - len(vol_left_chapters)
|
|
boundary = max(vol_left_chapters) + max(0, remaining_slots)
|
|
return vol_left if target <= boundary else vol_right
|
|
|
|
# Fallback: use average volume size to estimate the boundary.
|
|
vol_sizes: dict[str, int] = {}
|
|
for _, v in known:
|
|
if v:
|
|
vol_sizes[v] = vol_sizes.get(v, 0) + 1
|
|
avg_size = sum(vol_sizes.values()) / len(vol_sizes) if vol_sizes else 10.0
|
|
boundary = ch_left + max(1.0, avg_size - len(vol_left_chapters))
|
|
return vol_left if target <= boundary else vol_right
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Related series (for SeriesGroup)
|
|
# ----------------------------------------------------------------------
|
|
def get_series_relations(self, manga_id: str) -> "dict[str, list[str]]":
|
|
"""
|
|
Returns related manga titles grouped by relationship type.
|
|
|
|
Example return value:
|
|
{"main_story": ["Call of the Night"], "spin_off": ["Side Story A"]}
|
|
|
|
The MangaDex `?includes[]=manga` parameter is used to embed
|
|
related manga attributes so their titles are available without
|
|
additional requests.
|
|
"""
|
|
if not manga_id:
|
|
return {}
|
|
|
|
if manga_id in self._relations_cache:
|
|
return self._relations_cache[manga_id]
|
|
|
|
try:
|
|
resp = self._session.get(
|
|
f"{self.base_url}/manga/{manga_id}",
|
|
params={"includes[]": "manga"},
|
|
timeout=self.request_timeout,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json().get("data") or {}
|
|
except requests.RequestException:
|
|
return {}
|
|
|
|
relations: dict[str, list[str]] = {}
|
|
for rel in (data.get("relationships") or []):
|
|
if rel.get("type") != "manga":
|
|
continue
|
|
rel_type = rel.get("related")
|
|
if not rel_type:
|
|
continue
|
|
attrs = rel.get("attributes") or {}
|
|
if not attrs:
|
|
continue
|
|
titles: dict = attrs.get("title") or {}
|
|
# Prefer English, then romanized Japanese, then any available
|
|
title = (titles.get("en")
|
|
or titles.get("ja-ro")
|
|
or next(iter(titles.values()), None))
|
|
if title:
|
|
relations.setdefault(rel_type, []).append(title)
|
|
|
|
self._relations_cache[manga_id] = relations
|
|
return relations
|
|
|
|
# ----------------------------------------------------------------------
|
|
def clear_cache(self) -> None:
|
|
"""Clears all internal caches."""
|
|
self._cache.clear()
|
|
self._relations_cache.clear()
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Usage example
|
|
# --------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
resolver = MangaDexVolumeResolver()
|
|
|
|
mid = resolver.find_manga_id("Yofukashi no Uta")
|
|
print("MangaDex ID :", mid)
|
|
|
|
if mid:
|
|
print("Volume for ch. 1 :", resolver.volume_for_chapter(mid, 66))
|
|
print("Full chapter map :", resolver.get_chapter_volume_map(mid))
|
|
print("Relations :", resolver.get_series_relations(mid))
|