Files
manga-mover-and-metadata-co…/src/manga/MangadexVolumeResolver.py
T
johannesbot 216771f709
Build and Deploy / build (push) Successful in 59s
Build and Deploy / deploy (push) Successful in 24s
merged ln metadata into manga mover
2026-06-14 10:47:47 +02:00

345 lines
13 KiB
Python

"""
mangadex_volume_resolver.py
===========================
Resolves chapter numbers to their corresponding volumes (tankobon) using
the public MangaDex API.
Background
----------
The MangaBaka API only provides series-level data. MangaDex, however,
stores a volume attribute per chapter. The endpoint
GET /manga/{id}/aggregate
returns a chapter overview grouped by volume. This class encapsulates
that lookup so that `ComicInfoBuilder._determine_volume()` stays clean.
All available translations are queried (no language filter on the
aggregate endpoint) so that chapters only published in non-English
languages are still covered.
Chapter estimation
------------------
When a chapter is not present in the MangaDex aggregate at all (e.g.
because it has never been uploaded to MangaDex in any language), the
`estimate_volume_for_chapter()` method infers the most likely volume by
examining the known chapter-to-volume boundaries on both sides of the
target chapter. If MangaBaka page-count data is supplied, the page-count
per chapter is used to estimate where a volume boundary falls within the
gap; otherwise a simple midpoint heuristic is used.
Series relations
----------------
`get_series_relations()` returns related manga titles keyed by MangaDex
relationship type ("main_story", "spin_off", "sequel", …). This is used
by `ComicInfoBuilder` to populate the `<SeriesGroup>` element.
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import difflib
import requests
def _normalise_chapter(value) -> str:
"""
Converts a chapter number into a canonical comparison string.
Examples: 1 -> "1" | 1.0 -> "1" | "01" -> "1"
1.5 -> "1.5" | "1.50" -> "1.5"
"""
text = str(value).strip()
try:
number = float(text)
except ValueError:
return text.lower()
if number.is_integer():
return str(int(number))
return ("%f" % number).rstrip("0").rstrip(".")
class MangaDexVolumeResolver:
"""
Resolves chapter numbers to their volume numbers via the MangaDex API.
Typical usage
-------------
resolver = MangaDexVolumeResolver()
manga_id = resolver.find_manga_id("Yofukashi no Uta")
volume = resolver.volume_for_chapter(manga_id, 1)
"""
def __init__(self, *,
base_url: str = "https://api.mangadex.org",
request_timeout: int = 30,
session: "requests.Session | None" = None):
"""
base_url : Base URL of the MangaDex API.
request_timeout : HTTP request timeout in seconds.
session : Optional reusable requests.Session.
"""
self.base_url = base_url.rstrip("/")
self.request_timeout = request_timeout
self._session = session or requests.Session()
self._session.headers.setdefault("User-Agent",
"MangaDexVolumeResolver/1.0")
# Cache: manga_id -> {chapter_number: volume}
self._cache: dict[str, dict] = {}
# Cache: manga_id -> {relation_type: [title, ...]}
self._relations_cache: dict[str, dict] = {}
# ----------------------------------------------------------------------
# Locate the manga ID
# ----------------------------------------------------------------------
def find_manga_id(self, title: str) -> "str | None":
"""
Searches MangaDex for `title` and returns the best matching manga
ID, or None if no result is found.
"""
if not title or not title.strip():
return None
resp = self._session.get(
f"{self.base_url}/manga",
params={"title": title, "limit": 5,
"contentRating[]": ["safe", "suggestive",
"erotica", "pornographic"]},
timeout=self.request_timeout)
resp.raise_for_status()
results = resp.json().get("data") or []
if not results:
return None
def score(entry) -> float:
attrs = entry.get("attributes", {})
names: list[str] = []
names.extend(str(v) for v in (attrs.get("title") or {}).values())
for alt in (attrs.get("altTitles") or []):
names.extend(str(v) for v in alt.values())
best = 0.0
for name in names:
ratio = difflib.SequenceMatcher(
None, title.lower(), name.lower()).ratio()
best = max(best, ratio)
return best
results.sort(key=score, reverse=True)
return results[0].get("id")
# ----------------------------------------------------------------------
# Main function: retrieve and return volume / chapter data
# ----------------------------------------------------------------------
def get_chapter_volume_map(self, manga_id: str, *,
use_cache: bool = True) -> dict:
"""
Retrieves the complete chapter-to-volume mapping for a series.
All available languages are queried so that chapters only published
in non-English translations are still included.
Returns: dict { chapter_number (str) : volume (str) or None }
Example: { "1": "1", "2": "1", "11": "2", "57": None }
Chapters without a volume assignment are mapped to None.
"""
if not manga_id:
return {}
if use_cache and manga_id in self._cache:
return self._cache[manga_id]
# No language filter: query all available translations so that every
# chapter appears in the aggregate, regardless of translation status.
resp = self._session.get(
f"{self.base_url}/manga/{manga_id}/aggregate",
timeout=self.request_timeout)
resp.raise_for_status()
volumes = resp.json().get("volumes") or {}
chapter_map: dict[str, "str | None"] = {}
for volume_key, volume_data in volumes.items():
if str(volume_key).lower() in ("none", ""):
volume_value = None
else:
volume_value = str(volume_data.get("volume") or volume_key)
for chapter_key in (volume_data.get("chapters") or {}):
chapter_map[_normalise_chapter(chapter_key)] = volume_value
if use_cache:
self._cache[manga_id] = chapter_map
return chapter_map
# ----------------------------------------------------------------------
# Convenience: look up the volume for a single chapter number
# ----------------------------------------------------------------------
def volume_for_chapter(self, manga_id: str, chapter,
*, use_cache: bool = True,
volume_page_counts: "dict | None" = None) -> "str | None":
"""
Returns the volume for the given chapter number.
Falls back to `estimate_volume_for_chapter` when the chapter is not
directly present in the MangaDex aggregate.
volume_page_counts : optional {volume_str: page_count} dict from
MangaBakaWorksResolver.get_page_counts().
Improves estimation accuracy when provided.
"""
chapter_map = self.get_chapter_volume_map(manga_id, use_cache=use_cache)
result = chapter_map.get(_normalise_chapter(chapter))
if result is None and chapter_map:
result = self.estimate_volume_for_chapter(
chapter_map, chapter, volume_page_counts)
return result
# ----------------------------------------------------------------------
# Chapter estimation for unmapped chapters
# ----------------------------------------------------------------------
def estimate_volume_for_chapter(self, chapter_map: dict, chapter,
volume_page_counts: "dict | None" = None,
) -> "str | None":
"""
Estimates the volume for a chapter that is absent from chapter_map.
Algorithm
---------
1. Sort all chapters that have a known volume assignment.
2. Find the nearest mapped chapters before and after the target.
3. If both neighbors belong to the same volume -> return that volume.
4. If they differ (volume boundary somewhere in the gap):
a. If page-count data is provided, estimate where the boundary
falls based on average pages-per-chapter and remaining page
budget of the left volume.
b. Otherwise use a midpoint heuristic (favour the left volume).
Returns None if no suitable estimate can be made.
"""
target = float(_normalise_chapter(chapter))
known = sorted(
[(float(k), v) for k, v in chapter_map.items() if v is not None],
key=lambda x: x[0],
)
if not known:
return None
# Insertion point: first index where known[i][0] > target
pos = next((i for i, (c, _) in enumerate(known) if c > target),
len(known))
if pos == 0:
return known[0][1]
if pos == len(known):
return known[-1][1]
ch_left, vol_left = known[pos - 1]
ch_right, vol_right = known[pos]
if vol_left == vol_right:
return vol_left
# Volume boundary lies somewhere in (ch_left, ch_right)
vol_left_chapters = [c for c, v in known if v == vol_left]
if volume_page_counts:
# Estimate average pages per chapter across all known volumes.
total_pages = sum(volume_page_counts.values())
total_chapters = len(known)
avg_pages = total_pages / total_chapters if total_chapters else 20.0
left_vol_pages = volume_page_counts.get(vol_left)
if left_vol_pages:
expected_chaps = max(len(vol_left_chapters),
round(left_vol_pages / avg_pages))
remaining_slots = expected_chaps - len(vol_left_chapters)
boundary = max(vol_left_chapters) + max(0, remaining_slots)
return vol_left if target <= boundary else vol_right
# Fallback: use average volume size to estimate the boundary.
vol_sizes: dict[str, int] = {}
for _, v in known:
if v:
vol_sizes[v] = vol_sizes.get(v, 0) + 1
avg_size = sum(vol_sizes.values()) / len(vol_sizes) if vol_sizes else 10.0
boundary = ch_left + max(1.0, avg_size - len(vol_left_chapters))
return vol_left if target <= boundary else vol_right
# ----------------------------------------------------------------------
# Related series (for SeriesGroup)
# ----------------------------------------------------------------------
def get_series_relations(self, manga_id: str) -> "dict[str, list[str]]":
"""
Returns related manga titles grouped by relationship type.
Example return value:
{"main_story": ["Call of the Night"], "spin_off": ["Side Story A"]}
The MangaDex `?includes[]=manga` parameter is used to embed
related manga attributes so their titles are available without
additional requests.
"""
if not manga_id:
return {}
if manga_id in self._relations_cache:
return self._relations_cache[manga_id]
try:
resp = self._session.get(
f"{self.base_url}/manga/{manga_id}",
params={"includes[]": "manga"},
timeout=self.request_timeout,
)
resp.raise_for_status()
data = resp.json().get("data") or {}
except requests.RequestException:
return {}
relations: dict[str, list[str]] = {}
for rel in (data.get("relationships") or []):
if rel.get("type") != "manga":
continue
rel_type = rel.get("related")
if not rel_type:
continue
attrs = rel.get("attributes") or {}
if not attrs:
continue
titles: dict = attrs.get("title") or {}
# Prefer English, then romanized Japanese, then any available
title = (titles.get("en")
or titles.get("ja-ro")
or next(iter(titles.values()), None))
if title:
relations.setdefault(rel_type, []).append(title)
self._relations_cache[manga_id] = relations
return relations
# ----------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears all internal caches."""
self._cache.clear()
self._relations_cache.clear()
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
resolver = MangaDexVolumeResolver()
mid = resolver.find_manga_id("Yofukashi no Uta")
print("MangaDex ID :", mid)
if mid:
print("Volume for ch. 1 :", resolver.volume_for_chapter(mid, 66))
print("Full chapter map :", resolver.get_chapter_volume_map(mid))
print("Relations :", resolver.get_series_relations(mid))