merged ln metadata into manga mover
This commit is contained in:
@@ -0,0 +1,344 @@
|
||||
"""
|
||||
mangadex_volume_resolver.py
|
||||
===========================
|
||||
|
||||
Resolves chapter numbers to their corresponding volumes (tankobon) using
|
||||
the public MangaDex API.
|
||||
|
||||
Background
|
||||
----------
|
||||
The MangaBaka API only provides series-level data. MangaDex, however,
|
||||
stores a volume attribute per chapter. The endpoint
|
||||
|
||||
GET /manga/{id}/aggregate
|
||||
|
||||
returns a chapter overview grouped by volume. This class encapsulates
|
||||
that lookup so that `ComicInfoBuilder._determine_volume()` stays clean.
|
||||
|
||||
All available translations are queried (no language filter on the
|
||||
aggregate endpoint) so that chapters only published in non-English
|
||||
languages are still covered.
|
||||
|
||||
Chapter estimation
|
||||
------------------
|
||||
When a chapter is not present in the MangaDex aggregate at all (e.g.
|
||||
because it has never been uploaded to MangaDex in any language), the
|
||||
`estimate_volume_for_chapter()` method infers the most likely volume by
|
||||
examining the known chapter-to-volume boundaries on both sides of the
|
||||
target chapter. If MangaBaka page-count data is supplied, the page-count
|
||||
per chapter is used to estimate where a volume boundary falls within the
|
||||
gap; otherwise a simple midpoint heuristic is used.
|
||||
|
||||
Series relations
|
||||
----------------
|
||||
`get_series_relations()` returns related manga titles keyed by MangaDex
|
||||
relationship type ("main_story", "spin_off", "sequel", …). This is used
|
||||
by `ComicInfoBuilder` to populate the `<SeriesGroup>` element.
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def _normalise_chapter(value) -> str:
|
||||
"""
|
||||
Converts a chapter number into a canonical comparison string.
|
||||
|
||||
Examples: 1 -> "1" | 1.0 -> "1" | "01" -> "1"
|
||||
1.5 -> "1.5" | "1.50" -> "1.5"
|
||||
"""
|
||||
text = str(value).strip()
|
||||
try:
|
||||
number = float(text)
|
||||
except ValueError:
|
||||
return text.lower()
|
||||
if number.is_integer():
|
||||
return str(int(number))
|
||||
return ("%f" % number).rstrip("0").rstrip(".")
|
||||
|
||||
|
||||
class MangaDexVolumeResolver:
|
||||
"""
|
||||
Resolves chapter numbers to their volume numbers via the MangaDex API.
|
||||
|
||||
Typical usage
|
||||
-------------
|
||||
resolver = MangaDexVolumeResolver()
|
||||
manga_id = resolver.find_manga_id("Yofukashi no Uta")
|
||||
volume = resolver.volume_for_chapter(manga_id, 1)
|
||||
"""
|
||||
|
||||
def __init__(self, *,
|
||||
base_url: str = "https://api.mangadex.org",
|
||||
request_timeout: int = 30,
|
||||
session: "requests.Session | None" = None):
|
||||
"""
|
||||
base_url : Base URL of the MangaDex API.
|
||||
request_timeout : HTTP request timeout in seconds.
|
||||
session : Optional reusable requests.Session.
|
||||
"""
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.request_timeout = request_timeout
|
||||
self._session = session or requests.Session()
|
||||
self._session.headers.setdefault("User-Agent",
|
||||
"MangaDexVolumeResolver/1.0")
|
||||
# Cache: manga_id -> {chapter_number: volume}
|
||||
self._cache: dict[str, dict] = {}
|
||||
# Cache: manga_id -> {relation_type: [title, ...]}
|
||||
self._relations_cache: dict[str, dict] = {}
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Locate the manga ID
|
||||
# ----------------------------------------------------------------------
|
||||
def find_manga_id(self, title: str) -> "str | None":
|
||||
"""
|
||||
Searches MangaDex for `title` and returns the best matching manga
|
||||
ID, or None if no result is found.
|
||||
"""
|
||||
if not title or not title.strip():
|
||||
return None
|
||||
|
||||
resp = self._session.get(
|
||||
f"{self.base_url}/manga",
|
||||
params={"title": title, "limit": 5,
|
||||
"contentRating[]": ["safe", "suggestive",
|
||||
"erotica", "pornographic"]},
|
||||
timeout=self.request_timeout)
|
||||
resp.raise_for_status()
|
||||
results = resp.json().get("data") or []
|
||||
if not results:
|
||||
return None
|
||||
|
||||
def score(entry) -> float:
|
||||
attrs = entry.get("attributes", {})
|
||||
names: list[str] = []
|
||||
names.extend(str(v) for v in (attrs.get("title") or {}).values())
|
||||
for alt in (attrs.get("altTitles") or []):
|
||||
names.extend(str(v) for v in alt.values())
|
||||
best = 0.0
|
||||
for name in names:
|
||||
ratio = difflib.SequenceMatcher(
|
||||
None, title.lower(), name.lower()).ratio()
|
||||
best = max(best, ratio)
|
||||
return best
|
||||
|
||||
results.sort(key=score, reverse=True)
|
||||
return results[0].get("id")
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Main function: retrieve and return volume / chapter data
|
||||
# ----------------------------------------------------------------------
|
||||
def get_chapter_volume_map(self, manga_id: str, *,
|
||||
use_cache: bool = True) -> dict:
|
||||
"""
|
||||
Retrieves the complete chapter-to-volume mapping for a series.
|
||||
|
||||
All available languages are queried so that chapters only published
|
||||
in non-English translations are still included.
|
||||
|
||||
Returns: dict { chapter_number (str) : volume (str) or None }
|
||||
Example: { "1": "1", "2": "1", "11": "2", "57": None }
|
||||
|
||||
Chapters without a volume assignment are mapped to None.
|
||||
"""
|
||||
if not manga_id:
|
||||
return {}
|
||||
if use_cache and manga_id in self._cache:
|
||||
return self._cache[manga_id]
|
||||
|
||||
# No language filter: query all available translations so that every
|
||||
# chapter appears in the aggregate, regardless of translation status.
|
||||
resp = self._session.get(
|
||||
f"{self.base_url}/manga/{manga_id}/aggregate",
|
||||
timeout=self.request_timeout)
|
||||
resp.raise_for_status()
|
||||
volumes = resp.json().get("volumes") or {}
|
||||
|
||||
chapter_map: dict[str, "str | None"] = {}
|
||||
for volume_key, volume_data in volumes.items():
|
||||
if str(volume_key).lower() in ("none", ""):
|
||||
volume_value = None
|
||||
else:
|
||||
volume_value = str(volume_data.get("volume") or volume_key)
|
||||
|
||||
for chapter_key in (volume_data.get("chapters") or {}):
|
||||
chapter_map[_normalise_chapter(chapter_key)] = volume_value
|
||||
|
||||
if use_cache:
|
||||
self._cache[manga_id] = chapter_map
|
||||
return chapter_map
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Convenience: look up the volume for a single chapter number
|
||||
# ----------------------------------------------------------------------
|
||||
def volume_for_chapter(self, manga_id: str, chapter,
|
||||
*, use_cache: bool = True,
|
||||
volume_page_counts: "dict | None" = None) -> "str | None":
|
||||
"""
|
||||
Returns the volume for the given chapter number.
|
||||
|
||||
Falls back to `estimate_volume_for_chapter` when the chapter is not
|
||||
directly present in the MangaDex aggregate.
|
||||
|
||||
volume_page_counts : optional {volume_str: page_count} dict from
|
||||
MangaBakaWorksResolver.get_page_counts().
|
||||
Improves estimation accuracy when provided.
|
||||
"""
|
||||
chapter_map = self.get_chapter_volume_map(manga_id, use_cache=use_cache)
|
||||
result = chapter_map.get(_normalise_chapter(chapter))
|
||||
if result is None and chapter_map:
|
||||
result = self.estimate_volume_for_chapter(
|
||||
chapter_map, chapter, volume_page_counts)
|
||||
return result
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Chapter estimation for unmapped chapters
|
||||
# ----------------------------------------------------------------------
|
||||
def estimate_volume_for_chapter(self, chapter_map: dict, chapter,
|
||||
volume_page_counts: "dict | None" = None,
|
||||
) -> "str | None":
|
||||
"""
|
||||
Estimates the volume for a chapter that is absent from chapter_map.
|
||||
|
||||
Algorithm
|
||||
---------
|
||||
1. Sort all chapters that have a known volume assignment.
|
||||
2. Find the nearest mapped chapters before and after the target.
|
||||
3. If both neighbors belong to the same volume -> return that volume.
|
||||
4. If they differ (volume boundary somewhere in the gap):
|
||||
a. If page-count data is provided, estimate where the boundary
|
||||
falls based on average pages-per-chapter and remaining page
|
||||
budget of the left volume.
|
||||
b. Otherwise use a midpoint heuristic (favour the left volume).
|
||||
|
||||
Returns None if no suitable estimate can be made.
|
||||
"""
|
||||
target = float(_normalise_chapter(chapter))
|
||||
|
||||
known = sorted(
|
||||
[(float(k), v) for k, v in chapter_map.items() if v is not None],
|
||||
key=lambda x: x[0],
|
||||
)
|
||||
if not known:
|
||||
return None
|
||||
|
||||
# Insertion point: first index where known[i][0] > target
|
||||
pos = next((i for i, (c, _) in enumerate(known) if c > target),
|
||||
len(known))
|
||||
|
||||
if pos == 0:
|
||||
return known[0][1]
|
||||
if pos == len(known):
|
||||
return known[-1][1]
|
||||
|
||||
ch_left, vol_left = known[pos - 1]
|
||||
ch_right, vol_right = known[pos]
|
||||
|
||||
if vol_left == vol_right:
|
||||
return vol_left
|
||||
|
||||
# Volume boundary lies somewhere in (ch_left, ch_right)
|
||||
vol_left_chapters = [c for c, v in known if v == vol_left]
|
||||
|
||||
if volume_page_counts:
|
||||
# Estimate average pages per chapter across all known volumes.
|
||||
total_pages = sum(volume_page_counts.values())
|
||||
total_chapters = len(known)
|
||||
avg_pages = total_pages / total_chapters if total_chapters else 20.0
|
||||
|
||||
left_vol_pages = volume_page_counts.get(vol_left)
|
||||
if left_vol_pages:
|
||||
expected_chaps = max(len(vol_left_chapters),
|
||||
round(left_vol_pages / avg_pages))
|
||||
remaining_slots = expected_chaps - len(vol_left_chapters)
|
||||
boundary = max(vol_left_chapters) + max(0, remaining_slots)
|
||||
return vol_left if target <= boundary else vol_right
|
||||
|
||||
# Fallback: use average volume size to estimate the boundary.
|
||||
vol_sizes: dict[str, int] = {}
|
||||
for _, v in known:
|
||||
if v:
|
||||
vol_sizes[v] = vol_sizes.get(v, 0) + 1
|
||||
avg_size = sum(vol_sizes.values()) / len(vol_sizes) if vol_sizes else 10.0
|
||||
boundary = ch_left + max(1.0, avg_size - len(vol_left_chapters))
|
||||
return vol_left if target <= boundary else vol_right
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Related series (for SeriesGroup)
|
||||
# ----------------------------------------------------------------------
|
||||
def get_series_relations(self, manga_id: str) -> "dict[str, list[str]]":
|
||||
"""
|
||||
Returns related manga titles grouped by relationship type.
|
||||
|
||||
Example return value:
|
||||
{"main_story": ["Call of the Night"], "spin_off": ["Side Story A"]}
|
||||
|
||||
The MangaDex `?includes[]=manga` parameter is used to embed
|
||||
related manga attributes so their titles are available without
|
||||
additional requests.
|
||||
"""
|
||||
if not manga_id:
|
||||
return {}
|
||||
|
||||
if manga_id in self._relations_cache:
|
||||
return self._relations_cache[manga_id]
|
||||
|
||||
try:
|
||||
resp = self._session.get(
|
||||
f"{self.base_url}/manga/{manga_id}",
|
||||
params={"includes[]": "manga"},
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data") or {}
|
||||
except requests.RequestException:
|
||||
return {}
|
||||
|
||||
relations: dict[str, list[str]] = {}
|
||||
for rel in (data.get("relationships") or []):
|
||||
if rel.get("type") != "manga":
|
||||
continue
|
||||
rel_type = rel.get("related")
|
||||
if not rel_type:
|
||||
continue
|
||||
attrs = rel.get("attributes") or {}
|
||||
if not attrs:
|
||||
continue
|
||||
titles: dict = attrs.get("title") or {}
|
||||
# Prefer English, then romanized Japanese, then any available
|
||||
title = (titles.get("en")
|
||||
or titles.get("ja-ro")
|
||||
or next(iter(titles.values()), None))
|
||||
if title:
|
||||
relations.setdefault(rel_type, []).append(title)
|
||||
|
||||
self._relations_cache[manga_id] = relations
|
||||
return relations
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears all internal caches."""
|
||||
self._cache.clear()
|
||||
self._relations_cache.clear()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
resolver = MangaDexVolumeResolver()
|
||||
|
||||
mid = resolver.find_manga_id("Yofukashi no Uta")
|
||||
print("MangaDex ID :", mid)
|
||||
|
||||
if mid:
|
||||
print("Volume for ch. 1 :", resolver.volume_for_chapter(mid, 66))
|
||||
print("Full chapter map :", resolver.get_chapter_volume_map(mid))
|
||||
print("Relations :", resolver.get_series_relations(mid))
|
||||
Reference in New Issue
Block a user