523 lines
18 KiB
Python
523 lines
18 KiB
Python
"""
|
||
anilist_resolver.py
|
||
===================
|
||
|
||
Fetches and caches AniList manga metadata (statistics, characters, staff)
|
||
using the public AniList GraphQL API.
|
||
|
||
AniList API: https://graphql.anilist.co (no authentication required)
|
||
Rate limit: 90 req/min -> a 700 ms guard between calls is applied.
|
||
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
|
||
honoured; the request is retried once automatically.
|
||
|
||
Singleton
|
||
---------
|
||
Only one instance of this class exists per process. Subsequent calls to
|
||
AniListResolver() return the same object with its warm caches intact.
|
||
|
||
Provided features
|
||
-----------------
|
||
- Title-based AniList ID lookup with best-match scoring
|
||
- Manga statistics: score (0–10), rank, popularity, members, favorites
|
||
- Character list for a manga (names only — for <Characters> XML tag)
|
||
- Detailed character list: name, AniList character ID, image URL, role
|
||
- Detailed staff list: name, AniList person ID, image URL, positions
|
||
- Lazy full-detail fetches per character / person (for descriptions)
|
||
|
||
Dependencies
|
||
------------
|
||
requests -> pip install requests
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import datetime
|
||
import time
|
||
|
||
import requests
|
||
|
||
from MediaResolver import MediaResolver
|
||
from TextUtils import best_similarity
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# GraphQL query strings
|
||
# --------------------------------------------------------------------------
|
||
# AniList models both manga and light novels as type MANGA; the format
|
||
# clause decides which of the two a search returns. The placeholder is
|
||
# substituted at construction time (see `media_format`).
|
||
_SEARCH_MANGA_TEMPLATE = """
|
||
query ($search: String) {
|
||
Page(page: 1, perPage: 5) {
|
||
media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
|
||
id title { romaji english native } siteUrl
|
||
}
|
||
}
|
||
}
|
||
"""
|
||
|
||
_FORMAT_CLAUSES = {
|
||
"manga": "format_not_in: [NOVEL]",
|
||
"novel": "format_in: [NOVEL]",
|
||
}
|
||
|
||
_MANGA_STATS = """
|
||
query ($id: Int) {
|
||
Media(id: $id, type: MANGA) {
|
||
id title { romaji english native }
|
||
meanScore popularity favourites
|
||
rankings { rank type allTime }
|
||
siteUrl
|
||
}
|
||
}
|
||
"""
|
||
|
||
_MANGA_CHARACTERS = """
|
||
query ($id: Int) {
|
||
Media(id: $id, type: MANGA) {
|
||
characters(sort: [ROLE, RELEVANCE], perPage: 25) {
|
||
nodes { id name { full } image { large } siteUrl }
|
||
edges { role }
|
||
}
|
||
}
|
||
}
|
||
"""
|
||
|
||
_MANGA_STAFF = """
|
||
query ($id: Int) {
|
||
Media(id: $id, type: MANGA) {
|
||
staff(perPage: 25) {
|
||
nodes { id name { full } image { large } siteUrl }
|
||
edges { role }
|
||
}
|
||
}
|
||
}
|
||
"""
|
||
|
||
_CHARACTER_DETAILS = """
|
||
query ($id: Int) {
|
||
Character(id: $id) {
|
||
id name { full } image { large }
|
||
description(asHtml: false)
|
||
favourites siteUrl
|
||
}
|
||
}
|
||
"""
|
||
|
||
_PERSON_DETAILS = """
|
||
query ($id: Int) {
|
||
Staff(id: $id) {
|
||
id name { full native } image { large }
|
||
description(asHtml: false)
|
||
favourites siteUrl
|
||
dateOfBirth { year month day }
|
||
primaryOccupations
|
||
homeTown
|
||
}
|
||
}
|
||
"""
|
||
|
||
_ANILIST_GQL = "https://graphql.anilist.co"
|
||
|
||
|
||
class AniListResolver(MediaResolver):
|
||
"""
|
||
Singleton: fetches and caches AniList manga data via GraphQL API.
|
||
|
||
The first call to AniListResolver() creates and initialises the instance;
|
||
all subsequent calls return the same object.
|
||
"""
|
||
|
||
_instance: "AniListResolver | None" = None
|
||
|
||
# ------------------------------------------------------------------
|
||
# Singleton machinery
|
||
# ------------------------------------------------------------------
|
||
def __new__(cls, **kwargs):
|
||
if cls._instance is None:
|
||
cls._instance = super().__new__(cls)
|
||
cls._instance._initialized = False
|
||
return cls._instance
|
||
|
||
def __init__(self, *, request_timeout: int = 30,
|
||
media_format: str = "manga"):
|
||
"""
|
||
media_format : "manga" (excludes novels) or "novel" (novels only).
|
||
Only the FIRST construction in the process sets it
|
||
(singleton); construct the resolver with the correct
|
||
format in the entry point / orchestrator.
|
||
"""
|
||
if self._initialized:
|
||
return
|
||
|
||
if media_format not in _FORMAT_CLAUSES:
|
||
raise ValueError(f"media_format must be one of "
|
||
f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
|
||
self.media_format = media_format
|
||
self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
|
||
"__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
|
||
|
||
self.request_timeout = request_timeout
|
||
|
||
self._session = requests.Session()
|
||
self._session.headers.update({
|
||
"User-Agent": "AniListResolver/1.0",
|
||
"Content-Type": "application/json",
|
||
"Accept": "application/json",
|
||
})
|
||
|
||
# title_lower -> al_id
|
||
self._id_cache: dict[str, "int | None"] = {}
|
||
# al_id -> stats dict
|
||
self._stats_cache: dict[int, dict] = {}
|
||
# manga_al_id -> [name_str, ...]
|
||
self._char_names_cache: dict[int, list[str]] = {}
|
||
# manga_al_id -> [{al_id, name, image_url, role}]
|
||
self._char_detailed_cache: dict[int, list[dict]] = {}
|
||
# manga_al_id -> [{al_id, name, image_url, positions}]
|
||
self._staff_detailed_cache: dict[int, list[dict]] = {}
|
||
# char_al_id -> {al_id, name, image_url, about, favorites, url}
|
||
self._char_info_cache: dict[int, dict] = {}
|
||
# person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
|
||
self._person_info_cache: dict[int, dict] = {}
|
||
|
||
self._last_request_at: float = 0.0
|
||
self._initialized = True
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: ID lookup
|
||
# ------------------------------------------------------------------
|
||
def find_id(self, title: str) -> "int | None":
|
||
"""
|
||
Searches AniList for a manga by title and returns the best-matching
|
||
AniList ID. Returns None on failure or when no result is found.
|
||
"""
|
||
if not title or not title.strip():
|
||
return None
|
||
|
||
key = title.strip().lower()
|
||
if key in self._id_cache:
|
||
return self._id_cache[key]
|
||
|
||
try:
|
||
data = self._gql(self._search_query, {"search": title})
|
||
results = ((data.get("data") or {})
|
||
.get("Page", {})
|
||
.get("media") or [])
|
||
except requests.RequestException:
|
||
return None
|
||
|
||
if not results:
|
||
self._id_cache[key] = None
|
||
return None
|
||
|
||
results.sort(key=lambda e: _score_title(title, e), reverse=True)
|
||
al_id = results[0].get("id")
|
||
self._id_cache[key] = al_id
|
||
return al_id
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: statistics
|
||
# ------------------------------------------------------------------
|
||
def get_stats(self, tracker_id: "int | None") -> "dict | None":
|
||
"""
|
||
Returns a statistics dict for the given AniList manga ID:
|
||
|
||
{score, rank, scored_by, popularity, members, favorites,
|
||
url, title, as_of (DD-MM-YYYY)}
|
||
|
||
Returns None if tracker_id is None or on network failure.
|
||
"""
|
||
if tracker_id is None:
|
||
return None
|
||
if tracker_id in self._stats_cache:
|
||
return self._stats_cache[tracker_id]
|
||
|
||
try:
|
||
data = self._gql(_MANGA_STATS, {"id": tracker_id})
|
||
entry = (data.get("data") or {}).get("Media") or {}
|
||
except requests.RequestException:
|
||
return None
|
||
|
||
title_obj = entry.get("title") or {}
|
||
title = (title_obj.get("romaji")
|
||
or title_obj.get("english")
|
||
or title_obj.get("native") or "")
|
||
|
||
# AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency
|
||
# with the MALResolver stats dict shape.
|
||
raw_score = entry.get("meanScore")
|
||
score = round(raw_score / 10, 1) if raw_score is not None else None
|
||
|
||
# Ranked and popularity ranks are in the rankings array.
|
||
rated_rank = None
|
||
popular_rank = None
|
||
for r in (entry.get("rankings") or []):
|
||
if r.get("allTime"):
|
||
if r.get("type") == "RATED" and rated_rank is None:
|
||
rated_rank = r.get("rank")
|
||
if r.get("type") == "POPULAR" and popular_rank is None:
|
||
popular_rank = r.get("rank")
|
||
|
||
stats: dict = {
|
||
"score": score,
|
||
"rank": rated_rank,
|
||
"scored_by": None, # not exposed by AniList API
|
||
"popularity": popular_rank,
|
||
"members": entry.get("popularity"), # AniList's popularity = member count
|
||
"favorites": entry.get("favourites"),
|
||
"url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
|
||
"title": title,
|
||
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
|
||
}
|
||
self._stats_cache[tracker_id] = stats
|
||
return stats
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: character names (for ComicInfo <Characters> tag)
|
||
# ------------------------------------------------------------------
|
||
def get_characters(self, tracker_id: "int | None") -> list[str]:
|
||
"""Returns a flat list of character names for the manga."""
|
||
if tracker_id is None:
|
||
return []
|
||
if tracker_id in self._char_names_cache:
|
||
return self._char_names_cache[tracker_id]
|
||
|
||
detailed = self.get_characters_detailed(tracker_id)
|
||
names = [e["name"] for e in detailed if e.get("name")]
|
||
if names:
|
||
self._char_names_cache[tracker_id] = names
|
||
return names
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: detailed character data
|
||
# ------------------------------------------------------------------
|
||
def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||
"""
|
||
Returns detailed character entries for a manga:
|
||
[{al_id, mal_id, name, image_url, role, about=None}, ...]
|
||
"""
|
||
if tracker_id is None:
|
||
return []
|
||
if tracker_id in self._char_detailed_cache:
|
||
return self._char_detailed_cache[tracker_id]
|
||
|
||
try:
|
||
data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
|
||
chars = ((data.get("data") or {})
|
||
.get("Media", {})
|
||
.get("characters") or {})
|
||
nodes = chars.get("nodes") or []
|
||
edges = chars.get("edges") or []
|
||
except requests.RequestException:
|
||
return []
|
||
|
||
results = []
|
||
for node, edge in zip(nodes, edges):
|
||
name = (node.get("name") or {}).get("full") or ""
|
||
if not name:
|
||
continue
|
||
results.append({
|
||
"al_id": node.get("id"),
|
||
"mal_id": None,
|
||
"name": name,
|
||
"raw_name": name,
|
||
"image_url": (node.get("image") or {}).get("large"),
|
||
"role": edge.get("role") or "SUPPORTING",
|
||
"about": None,
|
||
})
|
||
|
||
if results:
|
||
self._char_detailed_cache[tracker_id] = results
|
||
return results
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: detailed staff data
|
||
# ------------------------------------------------------------------
|
||
def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||
"""
|
||
Returns detailed staff entries for a manga:
|
||
[{al_id, mal_id, name, image_url, positions, about=None}, ...]
|
||
"""
|
||
if tracker_id is None:
|
||
return []
|
||
if tracker_id in self._staff_detailed_cache:
|
||
return self._staff_detailed_cache[tracker_id]
|
||
|
||
try:
|
||
data = self._gql(_MANGA_STAFF, {"id": tracker_id})
|
||
staff = ((data.get("data") or {})
|
||
.get("Media", {})
|
||
.get("staff") or {})
|
||
nodes = staff.get("nodes") or []
|
||
edges = staff.get("edges") or []
|
||
except requests.RequestException:
|
||
return []
|
||
|
||
results = []
|
||
for node, edge in zip(nodes, edges):
|
||
name = (node.get("name") or {}).get("full") or ""
|
||
if not name:
|
||
continue
|
||
results.append({
|
||
"al_id": node.get("id"),
|
||
"mal_id": None,
|
||
"name": name,
|
||
"raw_name": name,
|
||
"image_url": (node.get("image") or {}).get("large"),
|
||
"positions": [edge.get("role")] if edge.get("role") else [],
|
||
"about": None,
|
||
})
|
||
|
||
if results:
|
||
self._staff_detailed_cache[tracker_id] = results
|
||
return results
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: individual character / person details
|
||
# ------------------------------------------------------------------
|
||
def get_character_details(self, char_id: "int | None") -> "dict | None":
|
||
"""Returns full details for a single AniList character."""
|
||
if char_id is None:
|
||
return None
|
||
if char_id in self._char_info_cache:
|
||
return self._char_info_cache[char_id]
|
||
|
||
try:
|
||
data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
|
||
entry = (data.get("data") or {}).get("Character") or {}
|
||
except requests.RequestException:
|
||
return None
|
||
|
||
result = {
|
||
"al_id": entry.get("id"),
|
||
"mal_id": None,
|
||
"name": (entry.get("name") or {}).get("full") or "",
|
||
"image_url": (entry.get("image") or {}).get("large"),
|
||
"about": entry.get("description"),
|
||
"favorites": entry.get("favourites"),
|
||
"url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
|
||
}
|
||
self._char_info_cache[char_id] = result
|
||
return result
|
||
|
||
def get_person_details(self, person_id: "int | None") -> "dict | None":
|
||
"""Returns full details for a single AniList staff person."""
|
||
if person_id is None:
|
||
return None
|
||
if person_id in self._person_info_cache:
|
||
return self._person_info_cache[person_id]
|
||
|
||
try:
|
||
data = self._gql(_PERSON_DETAILS, {"id": person_id})
|
||
entry = (data.get("data") or {}).get("Staff") or {}
|
||
except requests.RequestException:
|
||
return None
|
||
|
||
# dateOfBirth: {year, month, day} → ISO string for _format_birthday
|
||
dob = entry.get("dateOfBirth") or {}
|
||
birthday: "str | None" = None
|
||
if dob.get("year"):
|
||
m = dob.get("month") or 1
|
||
d = dob.get("day") or 1
|
||
birthday = f"{dob['year']}-{m:02d}-{d:02d}"
|
||
|
||
name_obj = entry.get("name") or {}
|
||
result = {
|
||
"al_id": entry.get("id"),
|
||
"mal_id": None,
|
||
"name": name_obj.get("full") or "",
|
||
"given_name": None, # AniList does not break names into given/family
|
||
"family_name": None,
|
||
"birthday": birthday,
|
||
"image_url": (entry.get("image") or {}).get("large"),
|
||
"about": entry.get("description"),
|
||
"favorites": entry.get("favourites"),
|
||
"website_url": None, # not exposed by AniList public API
|
||
"url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
|
||
}
|
||
self._person_info_cache[person_id] = result
|
||
return result
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public: cache management
|
||
# ------------------------------------------------------------------
|
||
def clear_cache(self) -> None:
|
||
"""Clears all internal caches (the Singleton instance is retained)."""
|
||
self._id_cache.clear()
|
||
self._stats_cache.clear()
|
||
self._char_names_cache.clear()
|
||
self._char_detailed_cache.clear()
|
||
self._staff_detailed_cache.clear()
|
||
self._char_info_cache.clear()
|
||
self._person_info_cache.clear()
|
||
|
||
# ------------------------------------------------------------------
|
||
# Internal: rate-limited GraphQL POST
|
||
# ------------------------------------------------------------------
|
||
def _gql(self, query: str, variables: "dict | None" = None) -> dict:
|
||
"""
|
||
Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
|
||
|
||
On HTTP 429 the Retry-After header is honoured and the request is
|
||
retried once.
|
||
"""
|
||
elapsed = time.monotonic() - self._last_request_at
|
||
if elapsed < 0.7:
|
||
time.sleep(0.7 - elapsed)
|
||
|
||
payload: dict = {"query": query}
|
||
if variables:
|
||
payload["variables"] = variables
|
||
|
||
resp = self._session.post(
|
||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||
self._last_request_at = time.monotonic()
|
||
|
||
if resp.status_code == 429:
|
||
retry_after = int(resp.headers.get("Retry-After", 60))
|
||
time.sleep(retry_after)
|
||
resp = self._session.post(
|
||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||
self._last_request_at = time.monotonic()
|
||
|
||
resp.raise_for_status()
|
||
return resp.json()
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Module helpers
|
||
# --------------------------------------------------------------------------
|
||
def _score_title(query: str, entry: dict) -> float:
|
||
"""Returns the best title-similarity score for an AniList media entry."""
|
||
title_obj = entry.get("title") or {}
|
||
return best_similarity(query, (
|
||
title_obj.get("romaji"),
|
||
title_obj.get("english"),
|
||
title_obj.get("native"),
|
||
))
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Usage example
|
||
# --------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
r1 = AniListResolver()
|
||
r2 = AniListResolver()
|
||
assert r1 is r2, "AniListResolver must be a Singleton"
|
||
|
||
al_id = r1.find_id("Yofukashi no Uta")
|
||
print("AniList ID :", al_id)
|
||
|
||
stats = r1.get_stats(al_id)
|
||
if stats:
|
||
print("Score :", stats["score"])
|
||
print("Rank :", stats["rank"])
|
||
print("Members :", stats["members"])
|
||
|
||
chars = r1.get_characters_detailed(al_id)
|
||
print("Characters (first 3):", [c["name"] for c in chars[:3]])
|
||
|
||
staff = r1.get_staff_detailed(al_id)
|
||
print("Staff :", [s["name"] for s in staff])
|