Add AniList resolver as MAL fallback; fix SeriesGroup, tag formatting, empty-cache bug
This commit is contained in:
@@ -0,0 +1,507 @@
|
||||
"""
|
||||
anilist_resolver.py
|
||||
===================
|
||||
|
||||
Fetches and caches AniList manga metadata (statistics, characters, staff)
|
||||
using the public AniList GraphQL API.
|
||||
|
||||
AniList API: https://graphql.anilist.co (no authentication required)
|
||||
Rate limit: 90 req/min -> a 700 ms guard between calls is applied.
|
||||
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
|
||||
honoured; the request is retried once automatically.
|
||||
|
||||
Singleton
|
||||
---------
|
||||
Only one instance of this class exists per process. Subsequent calls to
|
||||
AniListResolver() return the same object with its warm caches intact.
|
||||
|
||||
Provided features
|
||||
-----------------
|
||||
- Title-based AniList ID lookup with best-match scoring
|
||||
- Manga statistics: score (0–10), rank, popularity, members, favorites
|
||||
- Character list for a manga (names only — for <Characters> XML tag)
|
||||
- Detailed character list: name, AniList character ID, image URL, role
|
||||
- Detailed staff list: name, AniList person ID, image URL, positions
|
||||
- Lazy full-detail fetches per character / person (for descriptions)
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import difflib
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from MediaResolver import MediaResolver
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# GraphQL query strings
|
||||
# --------------------------------------------------------------------------
|
||||
_SEARCH_MANGA = """
|
||||
query ($search: String) {
|
||||
Page(page: 1, perPage: 5) {
|
||||
media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
|
||||
id title { romaji english native } siteUrl
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_STATS = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
id title { romaji english native }
|
||||
meanScore popularity favourites
|
||||
rankings { rank type allTime }
|
||||
siteUrl
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_CHARACTERS = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
characters(sort: [ROLE, RELEVANCE], perPage: 25) {
|
||||
nodes { id name { full } image { large } siteUrl }
|
||||
edges { role }
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_STAFF = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
staff(perPage: 25) {
|
||||
nodes { id name { full } image { large } siteUrl }
|
||||
edges { role }
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_CHARACTER_DETAILS = """
|
||||
query ($id: Int) {
|
||||
Character(id: $id) {
|
||||
id name { full } image { large }
|
||||
description(asHtml: false)
|
||||
favourites siteUrl
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_PERSON_DETAILS = """
|
||||
query ($id: Int) {
|
||||
Staff(id: $id) {
|
||||
id name { full native } image { large }
|
||||
description(asHtml: false)
|
||||
favourites siteUrl
|
||||
dateOfBirth { year month day }
|
||||
primaryOccupations
|
||||
homeTown
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_ANILIST_GQL = "https://graphql.anilist.co"
|
||||
|
||||
|
||||
class AniListResolver(MediaResolver):
|
||||
"""
|
||||
Singleton: fetches and caches AniList manga data via GraphQL API.
|
||||
|
||||
The first call to AniListResolver() creates and initialises the instance;
|
||||
all subsequent calls return the same object.
|
||||
"""
|
||||
|
||||
_instance: "AniListResolver | None" = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Singleton machinery
|
||||
# ------------------------------------------------------------------
|
||||
def __new__(cls, **kwargs):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, *, request_timeout: int = 30):
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.request_timeout = request_timeout
|
||||
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update({
|
||||
"User-Agent": "AniListResolver/1.0",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
})
|
||||
|
||||
# title_lower -> al_id
|
||||
self._id_cache: dict[str, "int | None"] = {}
|
||||
# al_id -> stats dict
|
||||
self._stats_cache: dict[int, dict] = {}
|
||||
# manga_al_id -> [name_str, ...]
|
||||
self._char_names_cache: dict[int, list[str]] = {}
|
||||
# manga_al_id -> [{al_id, name, image_url, role}]
|
||||
self._char_detailed_cache: dict[int, list[dict]] = {}
|
||||
# manga_al_id -> [{al_id, name, image_url, positions}]
|
||||
self._staff_detailed_cache: dict[int, list[dict]] = {}
|
||||
# char_al_id -> {al_id, name, image_url, about, favorites, url}
|
||||
self._char_info_cache: dict[int, dict] = {}
|
||||
# person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
|
||||
self._person_info_cache: dict[int, dict] = {}
|
||||
|
||||
self._last_request_at: float = 0.0
|
||||
self._initialized = True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: ID lookup
|
||||
# ------------------------------------------------------------------
|
||||
def find_id(self, title: str) -> "int | None":
|
||||
"""
|
||||
Searches AniList for a manga by title and returns the best-matching
|
||||
AniList ID. Returns None on failure or when no result is found.
|
||||
"""
|
||||
if not title or not title.strip():
|
||||
return None
|
||||
|
||||
key = title.strip().lower()
|
||||
if key in self._id_cache:
|
||||
return self._id_cache[key]
|
||||
|
||||
try:
|
||||
data = self._gql(_SEARCH_MANGA, {"search": title})
|
||||
results = ((data.get("data") or {})
|
||||
.get("Page", {})
|
||||
.get("media") or [])
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
if not results:
|
||||
self._id_cache[key] = None
|
||||
return None
|
||||
|
||||
results.sort(key=lambda e: _score_title(title, e), reverse=True)
|
||||
al_id = results[0].get("id")
|
||||
self._id_cache[key] = al_id
|
||||
return al_id
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: statistics
|
||||
# ------------------------------------------------------------------
|
||||
def get_stats(self, tracker_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns a statistics dict for the given AniList manga ID:
|
||||
|
||||
{score, rank, scored_by, popularity, members, favorites,
|
||||
url, title, as_of (DD-MM-YYYY)}
|
||||
|
||||
Returns None if tracker_id is None or on network failure.
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return None
|
||||
if tracker_id in self._stats_cache:
|
||||
return self._stats_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_STATS, {"id": tracker_id})
|
||||
entry = (data.get("data") or {}).get("Media") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
title_obj = entry.get("title") or {}
|
||||
title = (title_obj.get("romaji")
|
||||
or title_obj.get("english")
|
||||
or title_obj.get("native") or "")
|
||||
|
||||
# AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency
|
||||
# with the MALResolver stats dict shape.
|
||||
raw_score = entry.get("meanScore")
|
||||
score = round(raw_score / 10, 1) if raw_score is not None else None
|
||||
|
||||
# Ranked and popularity ranks are in the rankings array.
|
||||
rated_rank = None
|
||||
popular_rank = None
|
||||
for r in (entry.get("rankings") or []):
|
||||
if r.get("allTime"):
|
||||
if r.get("type") == "RATED" and rated_rank is None:
|
||||
rated_rank = r.get("rank")
|
||||
if r.get("type") == "POPULAR" and popular_rank is None:
|
||||
popular_rank = r.get("rank")
|
||||
|
||||
stats: dict = {
|
||||
"score": score,
|
||||
"rank": rated_rank,
|
||||
"scored_by": None, # not exposed by AniList API
|
||||
"popularity": popular_rank,
|
||||
"members": entry.get("popularity"), # AniList's popularity = member count
|
||||
"favorites": entry.get("favourites"),
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
|
||||
"title": title,
|
||||
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
|
||||
}
|
||||
self._stats_cache[tracker_id] = stats
|
||||
return stats
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: character names (for ComicInfo <Characters> tag)
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters(self, tracker_id: "int | None") -> list[str]:
|
||||
"""Returns a flat list of character names for the manga."""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._char_names_cache:
|
||||
return self._char_names_cache[tracker_id]
|
||||
|
||||
detailed = self.get_characters_detailed(tracker_id)
|
||||
names = [e["name"] for e in detailed if e.get("name")]
|
||||
if names:
|
||||
self._char_names_cache[tracker_id] = names
|
||||
return names
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed character data
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed character entries for a manga:
|
||||
[{al_id, mal_id, name, image_url, role, about=None}, ...]
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._char_detailed_cache:
|
||||
return self._char_detailed_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
|
||||
chars = ((data.get("data") or {})
|
||||
.get("Media", {})
|
||||
.get("characters") or {})
|
||||
nodes = chars.get("nodes") or []
|
||||
edges = chars.get("edges") or []
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for node, edge in zip(nodes, edges):
|
||||
name = (node.get("name") or {}).get("full") or ""
|
||||
if not name:
|
||||
continue
|
||||
results.append({
|
||||
"al_id": node.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name,
|
||||
"raw_name": name,
|
||||
"image_url": (node.get("image") or {}).get("large"),
|
||||
"role": edge.get("role") or "SUPPORTING",
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._char_detailed_cache[tracker_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed staff data
|
||||
# ------------------------------------------------------------------
|
||||
def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed staff entries for a manga:
|
||||
[{al_id, mal_id, name, image_url, positions, about=None}, ...]
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._staff_detailed_cache:
|
||||
return self._staff_detailed_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_STAFF, {"id": tracker_id})
|
||||
staff = ((data.get("data") or {})
|
||||
.get("Media", {})
|
||||
.get("staff") or {})
|
||||
nodes = staff.get("nodes") or []
|
||||
edges = staff.get("edges") or []
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for node, edge in zip(nodes, edges):
|
||||
name = (node.get("name") or {}).get("full") or ""
|
||||
if not name:
|
||||
continue
|
||||
results.append({
|
||||
"al_id": node.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name,
|
||||
"raw_name": name,
|
||||
"image_url": (node.get("image") or {}).get("large"),
|
||||
"positions": [edge.get("role")] if edge.get("role") else [],
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._staff_detailed_cache[tracker_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: individual character / person details
|
||||
# ------------------------------------------------------------------
|
||||
def get_character_details(self, char_id: "int | None") -> "dict | None":
|
||||
"""Returns full details for a single AniList character."""
|
||||
if char_id is None:
|
||||
return None
|
||||
if char_id in self._char_info_cache:
|
||||
return self._char_info_cache[char_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
|
||||
entry = (data.get("data") or {}).get("Character") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
result = {
|
||||
"al_id": entry.get("id"),
|
||||
"mal_id": None,
|
||||
"name": (entry.get("name") or {}).get("full") or "",
|
||||
"image_url": (entry.get("image") or {}).get("large"),
|
||||
"about": entry.get("description"),
|
||||
"favorites": entry.get("favourites"),
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
|
||||
}
|
||||
self._char_info_cache[char_id] = result
|
||||
return result
|
||||
|
||||
def get_person_details(self, person_id: "int | None") -> "dict | None":
|
||||
"""Returns full details for a single AniList staff person."""
|
||||
if person_id is None:
|
||||
return None
|
||||
if person_id in self._person_info_cache:
|
||||
return self._person_info_cache[person_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_PERSON_DETAILS, {"id": person_id})
|
||||
entry = (data.get("data") or {}).get("Staff") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
# dateOfBirth: {year, month, day} → ISO string for _format_birthday
|
||||
dob = entry.get("dateOfBirth") or {}
|
||||
birthday: "str | None" = None
|
||||
if dob.get("year"):
|
||||
m = dob.get("month") or 1
|
||||
d = dob.get("day") or 1
|
||||
birthday = f"{dob['year']}-{m:02d}-{d:02d}"
|
||||
|
||||
name_obj = entry.get("name") or {}
|
||||
result = {
|
||||
"al_id": entry.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name_obj.get("full") or "",
|
||||
"given_name": None, # AniList does not break names into given/family
|
||||
"family_name": None,
|
||||
"birthday": birthday,
|
||||
"image_url": (entry.get("image") or {}).get("large"),
|
||||
"about": entry.get("description"),
|
||||
"favorites": entry.get("favourites"),
|
||||
"website_url": None, # not exposed by AniList public API
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
|
||||
}
|
||||
self._person_info_cache[person_id] = result
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: cache management
|
||||
# ------------------------------------------------------------------
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears all internal caches (the Singleton instance is retained)."""
|
||||
self._id_cache.clear()
|
||||
self._stats_cache.clear()
|
||||
self._char_names_cache.clear()
|
||||
self._char_detailed_cache.clear()
|
||||
self._staff_detailed_cache.clear()
|
||||
self._char_info_cache.clear()
|
||||
self._person_info_cache.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: rate-limited GraphQL POST
|
||||
# ------------------------------------------------------------------
|
||||
def _gql(self, query: str, variables: "dict | None" = None) -> dict:
|
||||
"""
|
||||
Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
|
||||
|
||||
On HTTP 429 the Retry-After header is honoured and the request is
|
||||
retried once.
|
||||
"""
|
||||
elapsed = time.monotonic() - self._last_request_at
|
||||
if elapsed < 0.7:
|
||||
time.sleep(0.7 - elapsed)
|
||||
|
||||
payload: dict = {"query": query}
|
||||
if variables:
|
||||
payload["variables"] = variables
|
||||
|
||||
resp = self._session.post(
|
||||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||||
self._last_request_at = time.monotonic()
|
||||
|
||||
if resp.status_code == 429:
|
||||
retry_after = int(resp.headers.get("Retry-After", 60))
|
||||
time.sleep(retry_after)
|
||||
resp = self._session.post(
|
||||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||||
self._last_request_at = time.monotonic()
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helpers
|
||||
# --------------------------------------------------------------------------
|
||||
def _score_title(query: str, entry: dict) -> float:
|
||||
"""Returns the best title-similarity score for an AniList media entry."""
|
||||
title_obj = entry.get("title") or {}
|
||||
candidates = [
|
||||
title_obj.get("romaji") or "",
|
||||
title_obj.get("english") or "",
|
||||
title_obj.get("native") or "",
|
||||
]
|
||||
best = 0.0
|
||||
q = query.lower()
|
||||
for t in candidates:
|
||||
if t:
|
||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
||||
best = max(best, ratio)
|
||||
return best
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
r1 = AniListResolver()
|
||||
r2 = AniListResolver()
|
||||
assert r1 is r2, "AniListResolver must be a Singleton"
|
||||
|
||||
al_id = r1.find_id("Yofukashi no Uta")
|
||||
print("AniList ID :", al_id)
|
||||
|
||||
stats = r1.get_stats(al_id)
|
||||
if stats:
|
||||
print("Score :", stats["score"])
|
||||
print("Rank :", stats["rank"])
|
||||
print("Members :", stats["members"])
|
||||
|
||||
chars = r1.get_characters_detailed(al_id)
|
||||
print("Characters (first 3):", [c["name"] for c in chars[:3]])
|
||||
|
||||
staff = r1.get_staff_detailed(al_id)
|
||||
print("Staff :", [s["name"] for s in staff])
|
||||
Reference in New Issue
Block a user