This commit is contained in:
2026-05-22 22:42:04 +02:00
commit d5817e908a
8 changed files with 1666 additions and 0 deletions
+169
View File
@@ -0,0 +1,169 @@
"""
mangabaka_works_resolver.py
===========================
Fetches volume-level (work) data from the MangaBaka API.
Each "work" is a physical tankobon volume and may carry:
- volume number
- ISBN / GTIN
- page count (used for chapter-to-volume estimation)
- release date
- cover image (raw / default / small variants)
Only works that have a usable cover are kept in the cache.
Works without a cover are discarded at fetch time.
If no volume is assigned for a chapter, callers fall back to the
default series cover from the series object itself.
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import requests
class MangaBakaWorksResolver:
"""
Fetches and caches MangaBaka volume (work) data for a series.
Only works that have a cover image are retained in the cache.
"""
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
request_timeout: int = 30,
session: "requests.Session | None" = None):
self.api_base_url = api_base_url.rstrip("/")
self.request_timeout = request_timeout
self._session = session or requests.Session()
self._session.headers.setdefault("User-Agent", "MangaBakaWorksResolver/1.0")
# Cache: series_id (str) -> list of work dicts (only those with covers)
self._cache: dict[str, list[dict]] = {}
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get_works(self, series_id: str) -> list[dict]:
"""
Returns volume-level works for a series, filtered to those that have
a usable cover image. Results are cached per series.
Pages through the API (limit=50) until the response returns an empty
page, collecting all works before applying the cover filter.
"""
if not series_id:
return []
if series_id in self._cache:
return self._cache[series_id]
all_works: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/works",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
resp.raise_for_status()
page_data = resp.json().get("data") or []
if not page_data:
break
all_works.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
if not all_works:
return []
# Discard works that carry no usable cover
works_with_cover = [w for w in all_works if w.get("images")]
self._cache[series_id] = works_with_cover
return works_with_cover
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
"""
Returns the work dict for a specific volume number, or None.
Volume comparison normalises trailing ".0" (e.g. "1.0" == "1").
"""
works = self.get_works(series_id)
if not works:
return None
target = _norm_vol(volume)
for work in works:
if _norm_vol(work.get("sequence_string")) == target:
return work
return None
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
"""Returns the cover URL for a specific volume, or None if not found."""
work = self.get_work_for_volume(series_id, volume)
if not work:
return None
return self._pick_cover_url(work.get("images")[0].get("image"))
def get_page_counts(self, series_id: str) -> "dict[str, int]":
"""
Returns {volume_str: page_count} for all cached works.
Used by MangaDexVolumeResolver for chapter-to-volume estimation.
"""
result: dict[str, int] = {}
for work in self.get_works(series_id):
vol = _norm_vol(work.get("volume"))
pages = work.get("pages")
if vol and pages is not None:
try:
result[vol] = int(pages)
except (TypeError, ValueError):
pass
return result
def clear_cache(self) -> None:
"""Clears the internal works cache."""
self._cache.clear()
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
@staticmethod
def _pick_cover_url(cover) -> "str | None":
"""Selects the best (raw-preferred) cover URL from a cover object."""
if not cover:
return None
if isinstance(cover, str):
return cover
if isinstance(cover, dict):
url = cover.get("raw").get("url") or None
if url:
return url
# Generic fallback: any HTTP URL in the dict
for val in cover.values():
if isinstance(val, str) and val.startswith("http"):
return val
if isinstance(val, dict):
for sub_val in val.values():
if isinstance(sub_val, str) and sub_val.startswith("http"):
return sub_val
return None
# --------------------------------------------------------------------------
# Module helper
# --------------------------------------------------------------------------
def _norm_vol(value) -> str:
"""Normalises a volume identifier: strips whitespace, removes trailing .0."""
text = str(value or "").strip()
try:
f = float(text)
if f.is_integer():
return str(int(f))
except ValueError:
pass
return text