missing cover fix

2026-05-30 09:23:58 +02:00
parent 054f974ddc
commit 97e4b10ac8
3 changed files with 193 additions and 129 deletions
@@ -45,7 +45,7 @@ from pathlib import Path
 import requests

 from MangadexVolumeResolver import MangaDexVolumeResolver
-from MangaBakaWorksResolver import MangaBakaWorksResolver
+from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
@@ -998,59 +998,12 @@ class ComicInfoBuilder:


 # --------------------------------------------------------------------------
-# Module-level helpers (shared with MangaBakaWorksResolver logic)
+# Module-level helpers
 # --------------------------------------------------------------------------
-def _pick_cover_url(cover) -> "str | None":
-    """
-    Selects the best cover URL from a MangaBaka cover object.

-    Real API shape (from `GET /v1/series/{id}` and `/works`):
-        {
-          "raw":  {"url": "...", "size": ..., "height": ..., "width": ...},
-          "x150": {"x1": "...", "x2": "...", "x3": "..."},
-          "x250": {"x1": "...", "x2": "...", "x3": "..."},
-          "x350": {"x1": "...", "x2": "...", "x3": "..."}
-        }
-
-    Order of preference: raw original  >  x350@x3  >  x250@x3  >  x150@x3
-    (falling through to lower densities and sizes as needed).
-    """
-    if not cover:
-        return None
-    if isinstance(cover, str):
-        return cover
-    if not isinstance(cover, dict):
-        return None
-
-    # 1) Preferred: the unscaled "raw" image
-    raw = cover.get("raw")
-    if isinstance(raw, dict):
-        url = raw.get("url")
-        if isinstance(url, str) and url:
-            return url
-    elif isinstance(raw, str) and raw:
-        return raw
-
-    # 2) Fallback: size-keyed variants, largest first, highest density first
-    for size_key in ("x350", "x250", "x150"):
-        variant = cover.get(size_key)
-        if isinstance(variant, dict):
-            for density in ("x3", "x2", "x1"):
-                url = variant.get(density)
-                if isinstance(url, str) and url:
-                    return url
-        elif isinstance(variant, str) and variant:
-            return variant
-
-    # 3) Last-ditch fallback: any http URL anywhere in the structure
-    for val in cover.values():
-        if isinstance(val, str) and val.startswith("http"):
-            return val
-        if isinstance(val, dict):
-            for sub in val.values():
-                if isinstance(sub, str) and sub.startswith("http"):
-                    return sub
-    return None
+# Alias: _pick_image_url (from MangaBakaWorksResolver) is the canonical
+# generic image-block picker; _pick_cover_url is kept for backward compat.
+_pick_cover_url = _pick_image_url


 def _pick_thumbnail_url(cover) -> "str | None":
@@ -2,7 +2,7 @@
 mangabaka_works_resolver.py
 ===========================

-Fetches volume-level (work) data from the MangaBaka API.
+Fetches volume-level (work) data and volume cover images from the MangaBaka API.

 Each "work" is a physical tankobon volume and may carry:
  - volume number
@@ -11,10 +11,16 @@ Each "work" is a physical tankobon volume and may carry:
  - release date
  - cover image  (raw / default / small variants)

-Only works that have a usable cover are kept in the cache.
-Works without a cover are discarded at fetch time.
-If no volume is assigned for a chapter, callers fall back to the
-default series cover from the series object itself.
+Cover resolution order (per volume)
+------------------------------------
+1. GET /v1/series/{id}/images  — covers that exist independently of a work
+   (some series have covers but no works).  English edition preferred;
+   original language used when no English cover is available.
+2. GET /v1/series/{id}/works   — physical tankobon data including covers.
+   Fallback when /images returns nothing for the requested volume.
+
+If no volume cover is found at all, callers fall back to the series-level
+default cover from the series object itself.

 Dependencies
 ------------
@@ -26,10 +32,75 @@ from __future__ import annotations
 import requests


+# --------------------------------------------------------------------------
+# Generic image-block URL picker (shared by /images and /works responses)
+# --------------------------------------------------------------------------
+def _pick_image_url(image) -> "str | None":
+    """
+    Returns the best URL from a MangaBaka image block.
+
+    Handles the common ``{raw, x150, x250, x350}`` structure used by both
+    the ``cover`` field on series/work objects and the ``image`` field on
+    ``/images`` endpoint items::
+
+        {
+          "raw":  {"url": "...", "size": ..., "height": ..., "width": ...},
+          "x150": {"x1": "...", "x2": "...", "x3": "..."},
+          "x250": {...},
+          "x350": {...}
+        }
+
+    Preference: raw original > x350@x3 > x250@x3 > x150@x3 > … (falling
+    through to lower densities and sizes as needed).
+    """
+    if not image:
+        return None
+    if isinstance(image, str):
+        return image
+    if not isinstance(image, dict):
+        return None
+
+    # 1) Raw / unscaled image
+    raw = image.get("raw")
+    if isinstance(raw, dict):
+        url = raw.get("url")
+        if isinstance(url, str) and url:
+            return url
+    elif isinstance(raw, str) and raw:
+        return raw
+
+    # 2) Size-keyed CDN variants, largest first, highest density first
+    for size_key in ("x350", "x250", "x150"):
+        variant = image.get(size_key)
+        if isinstance(variant, dict):
+            for density in ("x3", "x2", "x1"):
+                url = variant.get(density)
+                if isinstance(url, str) and url:
+                    return url
+        elif isinstance(variant, str) and variant:
+            return variant
+
+    # 3) Last-ditch: any HTTP URL anywhere in the structure
+    for val in image.values():
+        if isinstance(val, str) and val.startswith("http"):
+            return val
+        if isinstance(val, dict):
+            for sub_val in val.values():
+                if isinstance(sub_val, str) and sub_val.startswith("http"):
+                    return sub_val
+    return None
+
+
 class MangaBakaWorksResolver:
    """
-    Fetches and caches MangaBaka volume (work) data for a series.
-    Only works that have a cover image are retained in the cache.
+    Fetches and caches MangaBaka volume (work) data and cover images.
+
+    Cover lookup order per volume
+    ------------------------------
+    1. ``/v1/series/{id}/images`` — edition covers (English > original).
+    2. ``/v1/series/{id}/works``  — physical tankobon covers.
+
+    Only works that carry a cover image are retained in the works cache.
    """

    def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
@@ -42,6 +113,8 @@ class MangaBakaWorksResolver:

        # Cache: series_id (str) -> list of work dicts (only those with covers)
        self._cache: dict[str, list[dict]] = {}
+        # Cache: series_id (str) -> {norm_vol (str): url (str)}
+        self._images_cache: dict[str, dict[str, str]] = {}

    # ------------------------------------------------------------------
    # Public API
@@ -101,12 +174,100 @@ class MangaBakaWorksResolver:
                return work
        return None

-    def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
-        """Returns the cover URL for a specific volume, or None if not found."""
-        work = self.get_work_for_volume(series_id, volume)
-        if not work:
+    def get_volume_covers(self, series_id: str) -> "dict[str, str]":
+        """
+        Fetches all volume-type cover images for a series from
+        ``/v1/series/{id}/images`` and returns a
+        ``{normalised_volume_str: url}`` mapping.
+
+        English-edition covers are preferred; the first available language
+        is used as fallback when no English cover exists for a volume.
+        Results are cached per series.
+        """
+        if not series_id:
+            return {}
+
+        if series_id in self._images_cache:
+            return self._images_cache[series_id]
+
+        raw_items: list[dict] = []
+        page = 1
+        try:
+            while True:
+                resp = self._session.get(
+                    f"{self.api_base_url}/series/{series_id}/images",
+                    params={"limit": 50, "page": page},
+                    timeout=self.request_timeout,
+                )
+                resp.raise_for_status()
+                page_data = resp.json().get("data") or []
+                if not page_data:
+                    break
+                raw_items.extend(page_data)
+                if len(page_data) < 50:
+                    break
+                page += 1
+        except requests.RequestException:
+            pass
+
+        # Group by normalised volume index; collect all languages per volume.
+        by_volume: dict[str, dict[str, str]] = {}  # norm_vol -> {lang: url}
+        for item in raw_items:
+            if item.get("type") != "volume":
+                continue
+            idx = item.get("index_numeric")
+            if idx is None:
+                continue
+            norm = _norm_vol(idx)
+            lang = (item.get("language") or "").lower() or "unknown"
+            url  = _pick_image_url(item.get("image"))
+            if not url:
+                continue
+            if norm not in by_volume:
+                by_volume[norm] = {}
+            # First entry per language wins (API order reflects quality/rank).
+            if lang not in by_volume[norm]:
+                by_volume[norm][lang] = url
+
+        # Pick best language per volume: English first, then first available.
+        result: dict[str, str] = {}
+        for norm, lang_map in by_volume.items():
+            url = lang_map.get("en") or next(iter(lang_map.values()), None)
+            if url:
+                result[norm] = url
+
+        self._images_cache[series_id] = result
+        return result
+
+    def get_cover_for_volume_from_images(self, series_id: str,
+                                         volume) -> "str | None":
+        """
+        Returns the cover URL for a specific volume from the /images endpoint,
+        or None if not available.
+        """
+        covers = self.get_volume_covers(series_id)
+        if not covers:
            return None
-        return self._pick_cover_url(work.get("images")[0].get("image"))
+        return covers.get(_norm_vol(volume))
+
+    def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
+        """
+        Returns the best cover URL for a specific volume.
+
+        Tries the ``/images`` endpoint first (covers that exist even when no
+        physical work has been catalogued), then falls back to the ``/works``
+        endpoint.  Returns None if neither source has a cover for the volume.
+        """
+        # 1. /images endpoint (covers without works)
+        url = self.get_cover_for_volume_from_images(series_id, volume)
+        if url:
+            return url
+
+        # 2. /works endpoint fallback
+        work = self.get_work_for_volume(series_id, volume)
+        if not work or not work.get("images"):
+            return None
+        return _pick_image_url(work["images"][0].get("image"))

    def get_page_counts(self, series_id: str) -> "dict[str, int]":
        """
@@ -125,59 +286,9 @@ class MangaBakaWorksResolver:
        return result

    def clear_cache(self) -> None:
-        """Clears the internal works cache."""
+        """Clears both the works cache and the images cover cache."""
        self._cache.clear()
-
-    # ------------------------------------------------------------------
-    # Helpers
-    # ------------------------------------------------------------------
-    @staticmethod
-    def _pick_cover_url(cover) -> "str | None":
-        """
-        Selects the best cover URL from a MangaBaka cover object.
-
-        Real API shape:
-            "raw":  {"url": "...", "size": ..., "height": ..., "width": ...}
-            "x150": {"x1": "...", "x2": "...", "x3": "..."}
-            "x250": {...}
-            "x350": {...}
-
-        Order: raw original > x350@x3 > x250@x3 > x150@x3 ...
-        """
-        if not cover:
-            return None
-        if isinstance(cover, str):
-            return cover
-        if not isinstance(cover, dict):
-            return None
-
-        raw = cover.get("raw")
-        if isinstance(raw, dict):
-            url = raw.get("url")
-            if isinstance(url, str) and url:
-                return url
-        elif isinstance(raw, str) and raw:
-            return raw
-
-        for size_key in ("x350", "x250", "x150"):
-            variant = cover.get(size_key)
-            if isinstance(variant, dict):
-                for density in ("x3", "x2", "x1"):
-                    url = variant.get(density)
-                    if isinstance(url, str) and url:
-                        return url
-            elif isinstance(variant, str) and variant:
-                return variant
-
-        # Last-ditch: any HTTP URL anywhere in the structure
-        for val in cover.values():
-            if isinstance(val, str) and val.startswith("http"):
-                return val
-            if isinstance(val, dict):
-                for sub_val in val.values():
-                    if isinstance(sub_val, str) and sub_val.startswith("http"):
-                        return sub_val
-        return None
+        self._images_cache.clear()


 # --------------------------------------------------------------------------
@@ -591,21 +591,21 @@ if __name__ == "__main__":
    )

    # ---- Option A: build matches.json only (no moves / no Kavita sync) ----
-    data = mover.build_matches_only()
-    matches = data.get("matches", {})
-    print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}")
-    for title, entry in list(matches.items())[:10]:
-        print(f"  {title!r:50s}  id={entry.get('mangabakaId')}  "
-              f"name={entry.get('mangabakaName')!r}")
+    # data = mover.build_matches_only()
+    # matches = data.get("matches", {})
+    # print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}")
+    # for title, entry in list(matches.items())[:10]:
+    #     print(f"  {title!r:50s}  id={entry.get('mangabakaId')}  "
+    #           f"name={entry.get('mangabakaName')!r}")

    # ---- Option B: full pipeline for one series (uses the cache too) ----
-    # result = mover.process_series("Yofukashi no Uta")
-    # ok     = sum(1 for c in result["chapters"] if c["ok"])
-    # failed = sum(1 for c in result["chapters"] if not c["ok"])
-    # print(f"\nDone: {ok} ok, {failed} failed")
-    # for c in result["chapters"]:
-    #     if not c["ok"]:
-    #         print(f"  Chapter {c['chapter']}: {c['error']}")
+    result = mover.process_series("Wistoria - Wand and Sword")
+    ok     = sum(1 for c in result["chapters"] if c["ok"])
+    failed = sum(1 for c in result["chapters"] if not c["ok"])
+    print(f"\nDone: {ok} ok, {failed} failed")
+    for c in result["chapters"]:
+        if not c["ok"]:
+            print(f"  Chapter {c['chapter']}: {c['error']}")

    # Or process everything at once:
    # results = mover.process_all()