Add AniList resolver as MAL fallback; fix SeriesGroup, tag formatting, empty-cache bug

This commit is contained in:
2026-05-23 22:35:08 +02:00
parent ec1342d146
commit b8f897fa2e
6 changed files with 730 additions and 72 deletions
+507
View File
@@ -0,0 +1,507 @@
"""
anilist_resolver.py
===================
Fetches and caches AniList manga metadata (statistics, characters, staff)
using the public AniList GraphQL API.
AniList API: https://graphql.anilist.co (no authentication required)
Rate limit: 90 req/min -> a 700 ms guard between calls is applied.
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
honoured; the request is retried once automatically.
Singleton
---------
Only one instance of this class exists per process. Subsequent calls to
AniListResolver() return the same object with its warm caches intact.
Provided features
-----------------
- Title-based AniList ID lookup with best-match scoring
- Manga statistics: score (010), rank, popularity, members, favorites
- Character list for a manga (names only — for <Characters> XML tag)
- Detailed character list: name, AniList character ID, image URL, role
- Detailed staff list: name, AniList person ID, image URL, positions
- Lazy full-detail fetches per character / person (for descriptions)
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import datetime
import difflib
import time
import requests
from MediaResolver import MediaResolver
# --------------------------------------------------------------------------
# GraphQL query strings
# --------------------------------------------------------------------------
_SEARCH_MANGA = """
query ($search: String) {
Page(page: 1, perPage: 5) {
media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
id title { romaji english native } siteUrl
}
}
}
"""
_MANGA_STATS = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
id title { romaji english native }
meanScore popularity favourites
rankings { rank type allTime }
siteUrl
}
}
"""
_MANGA_CHARACTERS = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
characters(sort: [ROLE, RELEVANCE], perPage: 25) {
nodes { id name { full } image { large } siteUrl }
edges { role }
}
}
}
"""
_MANGA_STAFF = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
staff(perPage: 25) {
nodes { id name { full } image { large } siteUrl }
edges { role }
}
}
}
"""
_CHARACTER_DETAILS = """
query ($id: Int) {
Character(id: $id) {
id name { full } image { large }
description(asHtml: false)
favourites siteUrl
}
}
"""
_PERSON_DETAILS = """
query ($id: Int) {
Staff(id: $id) {
id name { full native } image { large }
description(asHtml: false)
favourites siteUrl
dateOfBirth { year month day }
primaryOccupations
homeTown
}
}
"""
_ANILIST_GQL = "https://graphql.anilist.co"
class AniListResolver(MediaResolver):
"""
Singleton: fetches and caches AniList manga data via GraphQL API.
The first call to AniListResolver() creates and initialises the instance;
all subsequent calls return the same object.
"""
_instance: "AniListResolver | None" = None
# ------------------------------------------------------------------
# Singleton machinery
# ------------------------------------------------------------------
def __new__(cls, **kwargs):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, *, request_timeout: int = 30):
if self._initialized:
return
self.request_timeout = request_timeout
self._session = requests.Session()
self._session.headers.update({
"User-Agent": "AniListResolver/1.0",
"Content-Type": "application/json",
"Accept": "application/json",
})
# title_lower -> al_id
self._id_cache: dict[str, "int | None"] = {}
# al_id -> stats dict
self._stats_cache: dict[int, dict] = {}
# manga_al_id -> [name_str, ...]
self._char_names_cache: dict[int, list[str]] = {}
# manga_al_id -> [{al_id, name, image_url, role}]
self._char_detailed_cache: dict[int, list[dict]] = {}
# manga_al_id -> [{al_id, name, image_url, positions}]
self._staff_detailed_cache: dict[int, list[dict]] = {}
# char_al_id -> {al_id, name, image_url, about, favorites, url}
self._char_info_cache: dict[int, dict] = {}
# person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
self._person_info_cache: dict[int, dict] = {}
self._last_request_at: float = 0.0
self._initialized = True
# ------------------------------------------------------------------
# Public: ID lookup
# ------------------------------------------------------------------
def find_id(self, title: str) -> "int | None":
"""
Searches AniList for a manga by title and returns the best-matching
AniList ID. Returns None on failure or when no result is found.
"""
if not title or not title.strip():
return None
key = title.strip().lower()
if key in self._id_cache:
return self._id_cache[key]
try:
data = self._gql(_SEARCH_MANGA, {"search": title})
results = ((data.get("data") or {})
.get("Page", {})
.get("media") or [])
except requests.RequestException:
return None
if not results:
self._id_cache[key] = None
return None
results.sort(key=lambda e: _score_title(title, e), reverse=True)
al_id = results[0].get("id")
self._id_cache[key] = al_id
return al_id
# ------------------------------------------------------------------
# Public: statistics
# ------------------------------------------------------------------
def get_stats(self, tracker_id: "int | None") -> "dict | None":
"""
Returns a statistics dict for the given AniList manga ID:
{score, rank, scored_by, popularity, members, favorites,
url, title, as_of (DD-MM-YYYY)}
Returns None if tracker_id is None or on network failure.
"""
if tracker_id is None:
return None
if tracker_id in self._stats_cache:
return self._stats_cache[tracker_id]
try:
data = self._gql(_MANGA_STATS, {"id": tracker_id})
entry = (data.get("data") or {}).get("Media") or {}
except requests.RequestException:
return None
title_obj = entry.get("title") or {}
title = (title_obj.get("romaji")
or title_obj.get("english")
or title_obj.get("native") or "")
# AniList meanScore is 0100; normalise to 0.010.0 for consistency
# with the MALResolver stats dict shape.
raw_score = entry.get("meanScore")
score = round(raw_score / 10, 1) if raw_score is not None else None
# Ranked and popularity ranks are in the rankings array.
rated_rank = None
popular_rank = None
for r in (entry.get("rankings") or []):
if r.get("allTime"):
if r.get("type") == "RATED" and rated_rank is None:
rated_rank = r.get("rank")
if r.get("type") == "POPULAR" and popular_rank is None:
popular_rank = r.get("rank")
stats: dict = {
"score": score,
"rank": rated_rank,
"scored_by": None, # not exposed by AniList API
"popularity": popular_rank,
"members": entry.get("popularity"), # AniList's popularity = member count
"favorites": entry.get("favourites"),
"url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
"title": title,
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
}
self._stats_cache[tracker_id] = stats
return stats
# ------------------------------------------------------------------
# Public: character names (for ComicInfo <Characters> tag)
# ------------------------------------------------------------------
def get_characters(self, tracker_id: "int | None") -> list[str]:
"""Returns a flat list of character names for the manga."""
if tracker_id is None:
return []
if tracker_id in self._char_names_cache:
return self._char_names_cache[tracker_id]
detailed = self.get_characters_detailed(tracker_id)
names = [e["name"] for e in detailed if e.get("name")]
if names:
self._char_names_cache[tracker_id] = names
return names
# ------------------------------------------------------------------
# Public: detailed character data
# ------------------------------------------------------------------
def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
"""
Returns detailed character entries for a manga:
[{al_id, mal_id, name, image_url, role, about=None}, ...]
"""
if tracker_id is None:
return []
if tracker_id in self._char_detailed_cache:
return self._char_detailed_cache[tracker_id]
try:
data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
chars = ((data.get("data") or {})
.get("Media", {})
.get("characters") or {})
nodes = chars.get("nodes") or []
edges = chars.get("edges") or []
except requests.RequestException:
return []
results = []
for node, edge in zip(nodes, edges):
name = (node.get("name") or {}).get("full") or ""
if not name:
continue
results.append({
"al_id": node.get("id"),
"mal_id": None,
"name": name,
"raw_name": name,
"image_url": (node.get("image") or {}).get("large"),
"role": edge.get("role") or "SUPPORTING",
"about": None,
})
if results:
self._char_detailed_cache[tracker_id] = results
return results
# ------------------------------------------------------------------
# Public: detailed staff data
# ------------------------------------------------------------------
def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
"""
Returns detailed staff entries for a manga:
[{al_id, mal_id, name, image_url, positions, about=None}, ...]
"""
if tracker_id is None:
return []
if tracker_id in self._staff_detailed_cache:
return self._staff_detailed_cache[tracker_id]
try:
data = self._gql(_MANGA_STAFF, {"id": tracker_id})
staff = ((data.get("data") or {})
.get("Media", {})
.get("staff") or {})
nodes = staff.get("nodes") or []
edges = staff.get("edges") or []
except requests.RequestException:
return []
results = []
for node, edge in zip(nodes, edges):
name = (node.get("name") or {}).get("full") or ""
if not name:
continue
results.append({
"al_id": node.get("id"),
"mal_id": None,
"name": name,
"raw_name": name,
"image_url": (node.get("image") or {}).get("large"),
"positions": [edge.get("role")] if edge.get("role") else [],
"about": None,
})
if results:
self._staff_detailed_cache[tracker_id] = results
return results
# ------------------------------------------------------------------
# Public: individual character / person details
# ------------------------------------------------------------------
def get_character_details(self, char_id: "int | None") -> "dict | None":
"""Returns full details for a single AniList character."""
if char_id is None:
return None
if char_id in self._char_info_cache:
return self._char_info_cache[char_id]
try:
data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
entry = (data.get("data") or {}).get("Character") or {}
except requests.RequestException:
return None
result = {
"al_id": entry.get("id"),
"mal_id": None,
"name": (entry.get("name") or {}).get("full") or "",
"image_url": (entry.get("image") or {}).get("large"),
"about": entry.get("description"),
"favorites": entry.get("favourites"),
"url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
}
self._char_info_cache[char_id] = result
return result
def get_person_details(self, person_id: "int | None") -> "dict | None":
"""Returns full details for a single AniList staff person."""
if person_id is None:
return None
if person_id in self._person_info_cache:
return self._person_info_cache[person_id]
try:
data = self._gql(_PERSON_DETAILS, {"id": person_id})
entry = (data.get("data") or {}).get("Staff") or {}
except requests.RequestException:
return None
# dateOfBirth: {year, month, day} → ISO string for _format_birthday
dob = entry.get("dateOfBirth") or {}
birthday: "str | None" = None
if dob.get("year"):
m = dob.get("month") or 1
d = dob.get("day") or 1
birthday = f"{dob['year']}-{m:02d}-{d:02d}"
name_obj = entry.get("name") or {}
result = {
"al_id": entry.get("id"),
"mal_id": None,
"name": name_obj.get("full") or "",
"given_name": None, # AniList does not break names into given/family
"family_name": None,
"birthday": birthday,
"image_url": (entry.get("image") or {}).get("large"),
"about": entry.get("description"),
"favorites": entry.get("favourites"),
"website_url": None, # not exposed by AniList public API
"url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
}
self._person_info_cache[person_id] = result
return result
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears all internal caches (the Singleton instance is retained)."""
self._id_cache.clear()
self._stats_cache.clear()
self._char_names_cache.clear()
self._char_detailed_cache.clear()
self._staff_detailed_cache.clear()
self._char_info_cache.clear()
self._person_info_cache.clear()
# ------------------------------------------------------------------
# Internal: rate-limited GraphQL POST
# ------------------------------------------------------------------
def _gql(self, query: str, variables: "dict | None" = None) -> dict:
"""
Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
On HTTP 429 the Retry-After header is honoured and the request is
retried once.
"""
elapsed = time.monotonic() - self._last_request_at
if elapsed < 0.7:
time.sleep(0.7 - elapsed)
payload: dict = {"query": query}
if variables:
payload["variables"] = variables
resp = self._session.post(
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
self._last_request_at = time.monotonic()
if resp.status_code == 429:
retry_after = int(resp.headers.get("Retry-After", 60))
time.sleep(retry_after)
resp = self._session.post(
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
self._last_request_at = time.monotonic()
resp.raise_for_status()
return resp.json()
# --------------------------------------------------------------------------
# Module helpers
# --------------------------------------------------------------------------
def _score_title(query: str, entry: dict) -> float:
"""Returns the best title-similarity score for an AniList media entry."""
title_obj = entry.get("title") or {}
candidates = [
title_obj.get("romaji") or "",
title_obj.get("english") or "",
title_obj.get("native") or "",
]
best = 0.0
q = query.lower()
for t in candidates:
if t:
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
best = max(best, ratio)
return best
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
r1 = AniListResolver()
r2 = AniListResolver()
assert r1 is r2, "AniListResolver must be a Singleton"
al_id = r1.find_id("Yofukashi no Uta")
print("AniList ID :", al_id)
stats = r1.get_stats(al_id)
if stats:
print("Score :", stats["score"])
print("Rank :", stats["rank"])
print("Members :", stats["members"])
chars = r1.get_characters_detailed(al_id)
print("Characters (first 3):", [c["name"] for c in chars[:3]])
staff = r1.get_staff_detailed(al_id)
print("Staff :", [s["name"] for s in staff])