307 lines
11 KiB
Python
307 lines
11 KiB
Python
"""
|
|
mangabaka_works_resolver.py
|
|
===========================
|
|
|
|
Fetches volume-level (work) data and volume cover images from the MangaBaka API.
|
|
|
|
Each "work" is a physical tankobon volume and may carry:
|
|
- volume number
|
|
- ISBN / GTIN
|
|
- page count (used for chapter-to-volume estimation)
|
|
- release date
|
|
- cover image (raw / default / small variants)
|
|
|
|
Cover resolution order (per volume)
|
|
------------------------------------
|
|
1. GET /v1/series/{id}/images — covers that exist independently of a work
|
|
(some series have covers but no works). English edition preferred;
|
|
original language used when no English cover is available.
|
|
2. GET /v1/series/{id}/works — physical tankobon data including covers.
|
|
Fallback when /images returns nothing for the requested volume.
|
|
|
|
If no volume cover is found at all, callers fall back to the series-level
|
|
default cover from the series object itself.
|
|
|
|
Dependencies
|
|
------------
|
|
requests -> pip install requests
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import requests
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Generic image-block URL picker (shared by /images and /works responses)
|
|
# --------------------------------------------------------------------------
|
|
def _pick_image_url(image) -> "str | None":
|
|
"""
|
|
Returns the best URL from a MangaBaka image block.
|
|
|
|
Handles the common ``{raw, x150, x250, x350}`` structure used by both
|
|
the ``cover`` field on series/work objects and the ``image`` field on
|
|
``/images`` endpoint items::
|
|
|
|
{
|
|
"raw": {"url": "...", "size": ..., "height": ..., "width": ...},
|
|
"x150": {"x1": "...", "x2": "...", "x3": "..."},
|
|
"x250": {...},
|
|
"x350": {...}
|
|
}
|
|
|
|
Preference: raw original > x350@x3 > x250@x3 > x150@x3 > … (falling
|
|
through to lower densities and sizes as needed).
|
|
"""
|
|
if not image:
|
|
return None
|
|
if isinstance(image, str):
|
|
return image
|
|
if not isinstance(image, dict):
|
|
return None
|
|
|
|
# 1) Raw / unscaled image
|
|
raw = image.get("raw")
|
|
if isinstance(raw, dict):
|
|
url = raw.get("url")
|
|
if isinstance(url, str) and url:
|
|
return url
|
|
elif isinstance(raw, str) and raw:
|
|
return raw
|
|
|
|
# 2) Size-keyed CDN variants, largest first, highest density first
|
|
for size_key in ("x350", "x250", "x150"):
|
|
variant = image.get(size_key)
|
|
if isinstance(variant, dict):
|
|
for density in ("x3", "x2", "x1"):
|
|
url = variant.get(density)
|
|
if isinstance(url, str) and url:
|
|
return url
|
|
elif isinstance(variant, str) and variant:
|
|
return variant
|
|
|
|
# 3) Last-ditch: any HTTP URL anywhere in the structure
|
|
for val in image.values():
|
|
if isinstance(val, str) and val.startswith("http"):
|
|
return val
|
|
if isinstance(val, dict):
|
|
for sub_val in val.values():
|
|
if isinstance(sub_val, str) and sub_val.startswith("http"):
|
|
return sub_val
|
|
return None
|
|
|
|
|
|
class MangaBakaWorksResolver:
|
|
"""
|
|
Fetches and caches MangaBaka volume (work) data and cover images.
|
|
|
|
Cover lookup order per volume
|
|
------------------------------
|
|
1. ``/v1/series/{id}/images`` — edition covers (English > original).
|
|
2. ``/v1/series/{id}/works`` — physical tankobon covers.
|
|
|
|
Only works that carry a cover image are retained in the works cache.
|
|
"""
|
|
|
|
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
|
|
request_timeout: int = 30,
|
|
session: "requests.Session | None" = None):
|
|
self.api_base_url = api_base_url.rstrip("/")
|
|
self.request_timeout = request_timeout
|
|
self._session = session or requests.Session()
|
|
self._session.headers.setdefault("User-Agent", "MangaBakaWorksResolver/1.0")
|
|
|
|
# Cache: series_id (str) -> list of work dicts (only those with covers)
|
|
self._cache: dict[str, list[dict]] = {}
|
|
# Cache: series_id (str) -> {norm_vol (str): url (str)}
|
|
self._images_cache: dict[str, dict[str, str]] = {}
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public API
|
|
# ------------------------------------------------------------------
|
|
def get_works(self, series_id: str) -> list[dict]:
|
|
"""
|
|
Returns volume-level works for a series, filtered to those that have
|
|
a usable cover image. Results are cached per series.
|
|
|
|
Pages through the API (limit=50) until the response returns an empty
|
|
page, collecting all works before applying the cover filter.
|
|
"""
|
|
if not series_id:
|
|
return []
|
|
|
|
if series_id in self._cache:
|
|
return self._cache[series_id]
|
|
|
|
all_works: list[dict] = []
|
|
page = 1
|
|
try:
|
|
while True:
|
|
resp = self._session.get(
|
|
f"{self.api_base_url}/series/{series_id}/works",
|
|
params={"limit": 50, "page": page},
|
|
timeout=self.request_timeout,
|
|
)
|
|
resp.raise_for_status()
|
|
page_data = resp.json().get("data") or []
|
|
if not page_data:
|
|
break
|
|
all_works.extend(page_data)
|
|
if len(page_data) < 50:
|
|
break
|
|
page += 1
|
|
except requests.RequestException:
|
|
if not all_works:
|
|
return []
|
|
|
|
# Discard works that carry no usable cover
|
|
works_with_cover = [w for w in all_works if w.get("images")]
|
|
self._cache[series_id] = works_with_cover
|
|
return works_with_cover
|
|
|
|
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
|
"""
|
|
Returns the work dict for a specific volume number, or None.
|
|
Volume comparison normalises trailing ".0" (e.g. "1.0" == "1").
|
|
"""
|
|
works = self.get_works(series_id)
|
|
if not works:
|
|
return None
|
|
|
|
target = _norm_vol(volume)
|
|
for work in works:
|
|
if _norm_vol(work.get("sequence_string")) == target:
|
|
return work
|
|
return None
|
|
|
|
def get_volume_covers(self, series_id: str) -> "dict[str, str]":
|
|
"""
|
|
Fetches all volume-type cover images for a series from
|
|
``/v1/series/{id}/images`` and returns a
|
|
``{normalised_volume_str: url}`` mapping.
|
|
|
|
English-edition covers are preferred; the first available language
|
|
is used as fallback when no English cover exists for a volume.
|
|
Results are cached per series.
|
|
"""
|
|
if not series_id:
|
|
return {}
|
|
|
|
if series_id in self._images_cache:
|
|
return self._images_cache[series_id]
|
|
|
|
raw_items: list[dict] = []
|
|
page = 1
|
|
try:
|
|
while True:
|
|
resp = self._session.get(
|
|
f"{self.api_base_url}/series/{series_id}/images",
|
|
params={"limit": 50, "page": page},
|
|
timeout=self.request_timeout,
|
|
)
|
|
resp.raise_for_status()
|
|
page_data = resp.json().get("data") or []
|
|
if not page_data:
|
|
break
|
|
raw_items.extend(page_data)
|
|
if len(page_data) < 50:
|
|
break
|
|
page += 1
|
|
except requests.RequestException:
|
|
pass
|
|
|
|
# Group by normalised volume index; collect all languages per volume.
|
|
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
|
|
for item in raw_items:
|
|
if item.get("type") != "volume":
|
|
continue
|
|
idx = item.get("index_numeric")
|
|
if idx is None:
|
|
continue
|
|
norm = _norm_vol(idx)
|
|
lang = (item.get("language") or "").lower() or "unknown"
|
|
url = _pick_image_url(item.get("image"))
|
|
if not url:
|
|
continue
|
|
if norm not in by_volume:
|
|
by_volume[norm] = {}
|
|
# First entry per language wins (API order reflects quality/rank).
|
|
if lang not in by_volume[norm]:
|
|
by_volume[norm][lang] = url
|
|
|
|
# Pick best language per volume: English first, then first available.
|
|
result: dict[str, str] = {}
|
|
for norm, lang_map in by_volume.items():
|
|
url = lang_map.get("en") or next(iter(lang_map.values()), None)
|
|
if url:
|
|
result[norm] = url
|
|
|
|
self._images_cache[series_id] = result
|
|
return result
|
|
|
|
def get_cover_for_volume_from_images(self, series_id: str,
|
|
volume) -> "str | None":
|
|
"""
|
|
Returns the cover URL for a specific volume from the /images endpoint,
|
|
or None if not available.
|
|
"""
|
|
covers = self.get_volume_covers(series_id)
|
|
if not covers:
|
|
return None
|
|
return covers.get(_norm_vol(volume))
|
|
|
|
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
|
|
"""
|
|
Returns the best cover URL for a specific volume.
|
|
|
|
Tries the ``/images`` endpoint first (covers that exist even when no
|
|
physical work has been catalogued), then falls back to the ``/works``
|
|
endpoint. Returns None if neither source has a cover for the volume.
|
|
"""
|
|
# 1. /images endpoint (covers without works)
|
|
url = self.get_cover_for_volume_from_images(series_id, volume)
|
|
if url:
|
|
return url
|
|
|
|
# 2. /works endpoint fallback
|
|
work = self.get_work_for_volume(series_id, volume)
|
|
if not work or not work.get("images"):
|
|
return None
|
|
return _pick_image_url(work["images"][0].get("image"))
|
|
|
|
def get_page_counts(self, series_id: str) -> "dict[str, int]":
|
|
"""
|
|
Returns {volume_str: page_count} for all cached works.
|
|
Used by MangaDexVolumeResolver for chapter-to-volume estimation.
|
|
"""
|
|
result: dict[str, int] = {}
|
|
for work in self.get_works(series_id):
|
|
vol = _norm_vol(work.get("volume"))
|
|
pages = work.get("pages")
|
|
if vol and pages is not None:
|
|
try:
|
|
result[vol] = int(pages)
|
|
except (TypeError, ValueError):
|
|
pass
|
|
return result
|
|
|
|
def clear_cache(self) -> None:
|
|
"""Clears both the works cache and the images cover cache."""
|
|
self._cache.clear()
|
|
self._images_cache.clear()
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Module helper
|
|
# --------------------------------------------------------------------------
|
|
def _norm_vol(value) -> str:
|
|
"""Normalises a volume identifier: strips whitespace, removes trailing .0."""
|
|
text = str(value or "").strip()
|
|
try:
|
|
f = float(text)
|
|
if f.is_integer():
|
|
return str(int(f))
|
|
except ValueError:
|
|
pass
|
|
return text
|