Add AniList resolver as MAL fallback; fix SeriesGroup, tag formatting, empty-cache bug

2026-05-23 22:35:08 +02:00
parent ec1342d146
commit b8f897fa2e
6 changed files with 730 additions and 72 deletions
@@ -0,0 +1,507 @@
+"""
+anilist_resolver.py
+===================
+
+Fetches and caches AniList manga metadata (statistics, characters, staff)
+using the public AniList GraphQL API.
+
+AniList API: https://graphql.anilist.co  (no authentication required)
+Rate limit: 90 req/min  ->  a 700 ms guard between calls is applied.
+On HTTP 429 (rate-limit exceeded) the response Retry-After header is
+honoured; the request is retried once automatically.
+
+Singleton
+---------
+Only one instance of this class exists per process.  Subsequent calls to
+AniListResolver() return the same object with its warm caches intact.
+
+Provided features
+-----------------
+- Title-based AniList ID lookup with best-match scoring
+- Manga statistics: score (0–10), rank, popularity, members, favorites
+- Character list for a manga (names only — for <Characters> XML tag)
+- Detailed character list: name, AniList character ID, image URL, role
+- Detailed staff list: name, AniList person ID, image URL, positions
+- Lazy full-detail fetches per character / person (for descriptions)
+
+Dependencies
+------------
+    requests    ->  pip install requests
+"""
+
+from __future__ import annotations
+
+import datetime
+import difflib
+import time
+
+import requests
+
+from MediaResolver import MediaResolver
+
+
+# --------------------------------------------------------------------------
+# GraphQL query strings
+# --------------------------------------------------------------------------
+_SEARCH_MANGA = """
+query ($search: String) {
+  Page(page: 1, perPage: 5) {
+    media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
+      id title { romaji english native } siteUrl
+    }
+  }
+}
+"""
+
+_MANGA_STATS = """
+query ($id: Int) {
+  Media(id: $id, type: MANGA) {
+    id title { romaji english native }
+    meanScore popularity favourites
+    rankings { rank type allTime }
+    siteUrl
+  }
+}
+"""
+
+_MANGA_CHARACTERS = """
+query ($id: Int) {
+  Media(id: $id, type: MANGA) {
+    characters(sort: [ROLE, RELEVANCE], perPage: 25) {
+      nodes { id name { full } image { large } siteUrl }
+      edges { role }
+    }
+  }
+}
+"""
+
+_MANGA_STAFF = """
+query ($id: Int) {
+  Media(id: $id, type: MANGA) {
+    staff(perPage: 25) {
+      nodes { id name { full } image { large } siteUrl }
+      edges { role }
+    }
+  }
+}
+"""
+
+_CHARACTER_DETAILS = """
+query ($id: Int) {
+  Character(id: $id) {
+    id name { full } image { large }
+    description(asHtml: false)
+    favourites siteUrl
+  }
+}
+"""
+
+_PERSON_DETAILS = """
+query ($id: Int) {
+  Staff(id: $id) {
+    id name { full native } image { large }
+    description(asHtml: false)
+    favourites siteUrl
+    dateOfBirth { year month day }
+    primaryOccupations
+    homeTown
+  }
+}
+"""
+
+_ANILIST_GQL = "https://graphql.anilist.co"
+
+
+class AniListResolver(MediaResolver):
+    """
+    Singleton: fetches and caches AniList manga data via GraphQL API.
+
+    The first call to AniListResolver() creates and initialises the instance;
+    all subsequent calls return the same object.
+    """
+
+    _instance: "AniListResolver | None" = None
+
+    # ------------------------------------------------------------------
+    # Singleton machinery
+    # ------------------------------------------------------------------
+    def __new__(cls, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+
+    def __init__(self, *, request_timeout: int = 30):
+        if self._initialized:
+            return
+
+        self.request_timeout = request_timeout
+
+        self._session = requests.Session()
+        self._session.headers.update({
+            "User-Agent":   "AniListResolver/1.0",
+            "Content-Type": "application/json",
+            "Accept":       "application/json",
+        })
+
+        # title_lower -> al_id
+        self._id_cache: dict[str, "int | None"] = {}
+        # al_id -> stats dict
+        self._stats_cache: dict[int, dict] = {}
+        # manga_al_id -> [name_str, ...]
+        self._char_names_cache: dict[int, list[str]] = {}
+        # manga_al_id -> [{al_id, name, image_url, role}]
+        self._char_detailed_cache: dict[int, list[dict]] = {}
+        # manga_al_id -> [{al_id, name, image_url, positions}]
+        self._staff_detailed_cache: dict[int, list[dict]] = {}
+        # char_al_id -> {al_id, name, image_url, about, favorites, url}
+        self._char_info_cache: dict[int, dict] = {}
+        # person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
+        self._person_info_cache: dict[int, dict] = {}
+
+        self._last_request_at: float = 0.0
+        self._initialized = True
+
+    # ------------------------------------------------------------------
+    # Public: ID lookup
+    # ------------------------------------------------------------------
+    def find_id(self, title: str) -> "int | None":
+        """
+        Searches AniList for a manga by title and returns the best-matching
+        AniList ID.  Returns None on failure or when no result is found.
+        """
+        if not title or not title.strip():
+            return None
+
+        key = title.strip().lower()
+        if key in self._id_cache:
+            return self._id_cache[key]
+
+        try:
+            data = self._gql(_SEARCH_MANGA, {"search": title})
+            results = ((data.get("data") or {})
+                       .get("Page", {})
+                       .get("media") or [])
+        except requests.RequestException:
+            return None
+
+        if not results:
+            self._id_cache[key] = None
+            return None
+
+        results.sort(key=lambda e: _score_title(title, e), reverse=True)
+        al_id = results[0].get("id")
+        self._id_cache[key] = al_id
+        return al_id
+
+    # ------------------------------------------------------------------
+    # Public: statistics
+    # ------------------------------------------------------------------
+    def get_stats(self, tracker_id: "int | None") -> "dict | None":
+        """
+        Returns a statistics dict for the given AniList manga ID:
+
+            {score, rank, scored_by, popularity, members, favorites,
+             url, title, as_of (DD-MM-YYYY)}
+
+        Returns None if tracker_id is None or on network failure.
+        """
+        if tracker_id is None:
+            return None
+        if tracker_id in self._stats_cache:
+            return self._stats_cache[tracker_id]
+
+        try:
+            data = self._gql(_MANGA_STATS, {"id": tracker_id})
+            entry = (data.get("data") or {}).get("Media") or {}
+        except requests.RequestException:
+            return None
+
+        title_obj = entry.get("title") or {}
+        title = (title_obj.get("romaji")
+                 or title_obj.get("english")
+                 or title_obj.get("native") or "")
+
+        # AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency
+        # with the MALResolver stats dict shape.
+        raw_score = entry.get("meanScore")
+        score = round(raw_score / 10, 1) if raw_score is not None else None
+
+        # Ranked and popularity ranks are in the rankings array.
+        rated_rank  = None
+        popular_rank = None
+        for r in (entry.get("rankings") or []):
+            if r.get("allTime"):
+                if r.get("type") == "RATED"   and rated_rank  is None:
+                    rated_rank  = r.get("rank")
+                if r.get("type") == "POPULAR" and popular_rank is None:
+                    popular_rank = r.get("rank")
+
+        stats: dict = {
+            "score":      score,
+            "rank":       rated_rank,
+            "scored_by":  None,            # not exposed by AniList API
+            "popularity": popular_rank,
+            "members":    entry.get("popularity"),   # AniList's popularity = member count
+            "favorites":  entry.get("favourites"),
+            "url":        entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
+            "title":      title,
+            "as_of":      datetime.date.today().strftime("%d-%m-%Y"),
+        }
+        self._stats_cache[tracker_id] = stats
+        return stats
+
+    # ------------------------------------------------------------------
+    # Public: character names (for ComicInfo <Characters> tag)
+    # ------------------------------------------------------------------
+    def get_characters(self, tracker_id: "int | None") -> list[str]:
+        """Returns a flat list of character names for the manga."""
+        if tracker_id is None:
+            return []
+        if tracker_id in self._char_names_cache:
+            return self._char_names_cache[tracker_id]
+
+        detailed = self.get_characters_detailed(tracker_id)
+        names = [e["name"] for e in detailed if e.get("name")]
+        if names:
+            self._char_names_cache[tracker_id] = names
+        return names
+
+    # ------------------------------------------------------------------
+    # Public: detailed character data
+    # ------------------------------------------------------------------
+    def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
+        """
+        Returns detailed character entries for a manga:
+            [{al_id, mal_id, name, image_url, role, about=None}, ...]
+        """
+        if tracker_id is None:
+            return []
+        if tracker_id in self._char_detailed_cache:
+            return self._char_detailed_cache[tracker_id]
+
+        try:
+            data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
+            chars = ((data.get("data") or {})
+                     .get("Media", {})
+                     .get("characters") or {})
+            nodes = chars.get("nodes") or []
+            edges = chars.get("edges") or []
+        except requests.RequestException:
+            return []
+
+        results = []
+        for node, edge in zip(nodes, edges):
+            name = (node.get("name") or {}).get("full") or ""
+            if not name:
+                continue
+            results.append({
+                "al_id":     node.get("id"),
+                "mal_id":    None,
+                "name":      name,
+                "raw_name":  name,
+                "image_url": (node.get("image") or {}).get("large"),
+                "role":      edge.get("role") or "SUPPORTING",
+                "about":     None,
+            })
+
+        if results:
+            self._char_detailed_cache[tracker_id] = results
+        return results
+
+    # ------------------------------------------------------------------
+    # Public: detailed staff data
+    # ------------------------------------------------------------------
+    def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
+        """
+        Returns detailed staff entries for a manga:
+            [{al_id, mal_id, name, image_url, positions, about=None}, ...]
+        """
+        if tracker_id is None:
+            return []
+        if tracker_id in self._staff_detailed_cache:
+            return self._staff_detailed_cache[tracker_id]
+
+        try:
+            data = self._gql(_MANGA_STAFF, {"id": tracker_id})
+            staff = ((data.get("data") or {})
+                     .get("Media", {})
+                     .get("staff") or {})
+            nodes = staff.get("nodes") or []
+            edges = staff.get("edges") or []
+        except requests.RequestException:
+            return []
+
+        results = []
+        for node, edge in zip(nodes, edges):
+            name = (node.get("name") or {}).get("full") or ""
+            if not name:
+                continue
+            results.append({
+                "al_id":     node.get("id"),
+                "mal_id":    None,
+                "name":      name,
+                "raw_name":  name,
+                "image_url": (node.get("image") or {}).get("large"),
+                "positions": [edge.get("role")] if edge.get("role") else [],
+                "about":     None,
+            })
+
+        if results:
+            self._staff_detailed_cache[tracker_id] = results
+        return results
+
+    # ------------------------------------------------------------------
+    # Public: individual character / person details
+    # ------------------------------------------------------------------
+    def get_character_details(self, char_id: "int | None") -> "dict | None":
+        """Returns full details for a single AniList character."""
+        if char_id is None:
+            return None
+        if char_id in self._char_info_cache:
+            return self._char_info_cache[char_id]
+
+        try:
+            data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
+            entry = (data.get("data") or {}).get("Character") or {}
+        except requests.RequestException:
+            return None
+
+        result = {
+            "al_id":     entry.get("id"),
+            "mal_id":    None,
+            "name":      (entry.get("name") or {}).get("full") or "",
+            "image_url": (entry.get("image") or {}).get("large"),
+            "about":     entry.get("description"),
+            "favorites": entry.get("favourites"),
+            "url":       entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
+        }
+        self._char_info_cache[char_id] = result
+        return result
+
+    def get_person_details(self, person_id: "int | None") -> "dict | None":
+        """Returns full details for a single AniList staff person."""
+        if person_id is None:
+            return None
+        if person_id in self._person_info_cache:
+            return self._person_info_cache[person_id]
+
+        try:
+            data = self._gql(_PERSON_DETAILS, {"id": person_id})
+            entry = (data.get("data") or {}).get("Staff") or {}
+        except requests.RequestException:
+            return None
+
+        # dateOfBirth: {year, month, day} → ISO string for _format_birthday
+        dob = entry.get("dateOfBirth") or {}
+        birthday: "str | None" = None
+        if dob.get("year"):
+            m = dob.get("month") or 1
+            d = dob.get("day") or 1
+            birthday = f"{dob['year']}-{m:02d}-{d:02d}"
+
+        name_obj = entry.get("name") or {}
+        result = {
+            "al_id":       entry.get("id"),
+            "mal_id":      None,
+            "name":        name_obj.get("full") or "",
+            "given_name":  None,      # AniList does not break names into given/family
+            "family_name": None,
+            "birthday":    birthday,
+            "image_url":   (entry.get("image") or {}).get("large"),
+            "about":       entry.get("description"),
+            "favorites":   entry.get("favourites"),
+            "website_url": None,      # not exposed by AniList public API
+            "url":         entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
+        }
+        self._person_info_cache[person_id] = result
+        return result
+
+    # ------------------------------------------------------------------
+    # Public: cache management
+    # ------------------------------------------------------------------
+    def clear_cache(self) -> None:
+        """Clears all internal caches (the Singleton instance is retained)."""
+        self._id_cache.clear()
+        self._stats_cache.clear()
+        self._char_names_cache.clear()
+        self._char_detailed_cache.clear()
+        self._staff_detailed_cache.clear()
+        self._char_info_cache.clear()
+        self._person_info_cache.clear()
+
+    # ------------------------------------------------------------------
+    # Internal: rate-limited GraphQL POST
+    # ------------------------------------------------------------------
+    def _gql(self, query: str, variables: "dict | None" = None) -> dict:
+        """
+        Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
+
+        On HTTP 429 the Retry-After header is honoured and the request is
+        retried once.
+        """
+        elapsed = time.monotonic() - self._last_request_at
+        if elapsed < 0.7:
+            time.sleep(0.7 - elapsed)
+
+        payload: dict = {"query": query}
+        if variables:
+            payload["variables"] = variables
+
+        resp = self._session.post(
+            _ANILIST_GQL, json=payload, timeout=self.request_timeout)
+        self._last_request_at = time.monotonic()
+
+        if resp.status_code == 429:
+            retry_after = int(resp.headers.get("Retry-After", 60))
+            time.sleep(retry_after)
+            resp = self._session.post(
+                _ANILIST_GQL, json=payload, timeout=self.request_timeout)
+            self._last_request_at = time.monotonic()
+
+        resp.raise_for_status()
+        return resp.json()
+
+
+# --------------------------------------------------------------------------
+# Module helpers
+# --------------------------------------------------------------------------
+def _score_title(query: str, entry: dict) -> float:
+    """Returns the best title-similarity score for an AniList media entry."""
+    title_obj = entry.get("title") or {}
+    candidates = [
+        title_obj.get("romaji") or "",
+        title_obj.get("english") or "",
+        title_obj.get("native") or "",
+    ]
+    best = 0.0
+    q = query.lower()
+    for t in candidates:
+        if t:
+            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
+            best = max(best, ratio)
+    return best
+
+
+# --------------------------------------------------------------------------
+# Usage example
+# --------------------------------------------------------------------------
+if __name__ == "__main__":
+    r1 = AniListResolver()
+    r2 = AniListResolver()
+    assert r1 is r2, "AniListResolver must be a Singleton"
+
+    al_id = r1.find_id("Yofukashi no Uta")
+    print("AniList ID   :", al_id)
+
+    stats = r1.get_stats(al_id)
+    if stats:
+        print("Score        :", stats["score"])
+        print("Rank         :", stats["rank"])
+        print("Members      :", stats["members"])
+
+    chars = r1.get_characters_detailed(al_id)
+    print("Characters (first 3):", [c["name"] for c in chars[:3]])
+
+    staff = r1.get_staff_detailed(al_id)
+    print("Staff        :", [s["name"] for s in staff])