Files
manga-mover-and-metadata-co…/src/AniListResolver.py
T
johannesbot 216771f709
Build and Deploy / build (push) Successful in 59s
Build and Deploy / deploy (push) Successful in 24s
merged ln metadata into manga mover
2026-06-14 10:47:47 +02:00

523 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
anilist_resolver.py
===================
Fetches and caches AniList manga metadata (statistics, characters, staff)
using the public AniList GraphQL API.
AniList API: https://graphql.anilist.co (no authentication required)
Rate limit: 90 req/min -> a 700 ms guard between calls is applied.
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
honoured; the request is retried once automatically.
Singleton
---------
Only one instance of this class exists per process. Subsequent calls to
AniListResolver() return the same object with its warm caches intact.
Provided features
-----------------
- Title-based AniList ID lookup with best-match scoring
- Manga statistics: score (010), rank, popularity, members, favorites
- Character list for a manga (names only — for <Characters> XML tag)
- Detailed character list: name, AniList character ID, image URL, role
- Detailed staff list: name, AniList person ID, image URL, positions
- Lazy full-detail fetches per character / person (for descriptions)
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import datetime
import time
import requests
from MediaResolver import MediaResolver
from TextUtils import best_similarity
# --------------------------------------------------------------------------
# GraphQL query strings
# --------------------------------------------------------------------------
# AniList models both manga and light novels as type MANGA; the format
# clause decides which of the two a search returns. The placeholder is
# substituted at construction time (see `media_format`).
_SEARCH_MANGA_TEMPLATE = """
query ($search: String) {
Page(page: 1, perPage: 5) {
media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
id title { romaji english native } siteUrl
}
}
}
"""
_FORMAT_CLAUSES = {
"manga": "format_not_in: [NOVEL]",
"novel": "format_in: [NOVEL]",
}
_MANGA_STATS = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
id title { romaji english native }
meanScore popularity favourites
rankings { rank type allTime }
siteUrl
}
}
"""
_MANGA_CHARACTERS = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
characters(sort: [ROLE, RELEVANCE], perPage: 25) {
nodes { id name { full } image { large } siteUrl }
edges { role }
}
}
}
"""
_MANGA_STAFF = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
staff(perPage: 25) {
nodes { id name { full } image { large } siteUrl }
edges { role }
}
}
}
"""
_CHARACTER_DETAILS = """
query ($id: Int) {
Character(id: $id) {
id name { full } image { large }
description(asHtml: false)
favourites siteUrl
}
}
"""
_PERSON_DETAILS = """
query ($id: Int) {
Staff(id: $id) {
id name { full native } image { large }
description(asHtml: false)
favourites siteUrl
dateOfBirth { year month day }
primaryOccupations
homeTown
}
}
"""
_ANILIST_GQL = "https://graphql.anilist.co"
class AniListResolver(MediaResolver):
"""
Singleton: fetches and caches AniList manga data via GraphQL API.
The first call to AniListResolver() creates and initialises the instance;
all subsequent calls return the same object.
"""
_instance: "AniListResolver | None" = None
# ------------------------------------------------------------------
# Singleton machinery
# ------------------------------------------------------------------
def __new__(cls, **kwargs):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, *, request_timeout: int = 30,
media_format: str = "manga"):
"""
media_format : "manga" (excludes novels) or "novel" (novels only).
Only the FIRST construction in the process sets it
(singleton); construct the resolver with the correct
format in the entry point / orchestrator.
"""
if self._initialized:
return
if media_format not in _FORMAT_CLAUSES:
raise ValueError(f"media_format must be one of "
f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
self.media_format = media_format
self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
"__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
self.request_timeout = request_timeout
self._session = requests.Session()
self._session.headers.update({
"User-Agent": "AniListResolver/1.0",
"Content-Type": "application/json",
"Accept": "application/json",
})
# title_lower -> al_id
self._id_cache: dict[str, "int | None"] = {}
# al_id -> stats dict
self._stats_cache: dict[int, dict] = {}
# manga_al_id -> [name_str, ...]
self._char_names_cache: dict[int, list[str]] = {}
# manga_al_id -> [{al_id, name, image_url, role}]
self._char_detailed_cache: dict[int, list[dict]] = {}
# manga_al_id -> [{al_id, name, image_url, positions}]
self._staff_detailed_cache: dict[int, list[dict]] = {}
# char_al_id -> {al_id, name, image_url, about, favorites, url}
self._char_info_cache: dict[int, dict] = {}
# person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
self._person_info_cache: dict[int, dict] = {}
self._last_request_at: float = 0.0
self._initialized = True
# ------------------------------------------------------------------
# Public: ID lookup
# ------------------------------------------------------------------
def find_id(self, title: str) -> "int | None":
"""
Searches AniList for a manga by title and returns the best-matching
AniList ID. Returns None on failure or when no result is found.
"""
if not title or not title.strip():
return None
key = title.strip().lower()
if key in self._id_cache:
return self._id_cache[key]
try:
data = self._gql(self._search_query, {"search": title})
results = ((data.get("data") or {})
.get("Page", {})
.get("media") or [])
except requests.RequestException:
return None
if not results:
self._id_cache[key] = None
return None
results.sort(key=lambda e: _score_title(title, e), reverse=True)
al_id = results[0].get("id")
self._id_cache[key] = al_id
return al_id
# ------------------------------------------------------------------
# Public: statistics
# ------------------------------------------------------------------
def get_stats(self, tracker_id: "int | None") -> "dict | None":
"""
Returns a statistics dict for the given AniList manga ID:
{score, rank, scored_by, popularity, members, favorites,
url, title, as_of (DD-MM-YYYY)}
Returns None if tracker_id is None or on network failure.
"""
if tracker_id is None:
return None
if tracker_id in self._stats_cache:
return self._stats_cache[tracker_id]
try:
data = self._gql(_MANGA_STATS, {"id": tracker_id})
entry = (data.get("data") or {}).get("Media") or {}
except requests.RequestException:
return None
title_obj = entry.get("title") or {}
title = (title_obj.get("romaji")
or title_obj.get("english")
or title_obj.get("native") or "")
# AniList meanScore is 0100; normalise to 0.010.0 for consistency
# with the MALResolver stats dict shape.
raw_score = entry.get("meanScore")
score = round(raw_score / 10, 1) if raw_score is not None else None
# Ranked and popularity ranks are in the rankings array.
rated_rank = None
popular_rank = None
for r in (entry.get("rankings") or []):
if r.get("allTime"):
if r.get("type") == "RATED" and rated_rank is None:
rated_rank = r.get("rank")
if r.get("type") == "POPULAR" and popular_rank is None:
popular_rank = r.get("rank")
stats: dict = {
"score": score,
"rank": rated_rank,
"scored_by": None, # not exposed by AniList API
"popularity": popular_rank,
"members": entry.get("popularity"), # AniList's popularity = member count
"favorites": entry.get("favourites"),
"url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
"title": title,
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
}
self._stats_cache[tracker_id] = stats
return stats
# ------------------------------------------------------------------
# Public: character names (for ComicInfo <Characters> tag)
# ------------------------------------------------------------------
def get_characters(self, tracker_id: "int | None") -> list[str]:
"""Returns a flat list of character names for the manga."""
if tracker_id is None:
return []
if tracker_id in self._char_names_cache:
return self._char_names_cache[tracker_id]
detailed = self.get_characters_detailed(tracker_id)
names = [e["name"] for e in detailed if e.get("name")]
if names:
self._char_names_cache[tracker_id] = names
return names
# ------------------------------------------------------------------
# Public: detailed character data
# ------------------------------------------------------------------
def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
"""
Returns detailed character entries for a manga:
[{al_id, mal_id, name, image_url, role, about=None}, ...]
"""
if tracker_id is None:
return []
if tracker_id in self._char_detailed_cache:
return self._char_detailed_cache[tracker_id]
try:
data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
chars = ((data.get("data") or {})
.get("Media", {})
.get("characters") or {})
nodes = chars.get("nodes") or []
edges = chars.get("edges") or []
except requests.RequestException:
return []
results = []
for node, edge in zip(nodes, edges):
name = (node.get("name") or {}).get("full") or ""
if not name:
continue
results.append({
"al_id": node.get("id"),
"mal_id": None,
"name": name,
"raw_name": name,
"image_url": (node.get("image") or {}).get("large"),
"role": edge.get("role") or "SUPPORTING",
"about": None,
})
if results:
self._char_detailed_cache[tracker_id] = results
return results
# ------------------------------------------------------------------
# Public: detailed staff data
# ------------------------------------------------------------------
def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
"""
Returns detailed staff entries for a manga:
[{al_id, mal_id, name, image_url, positions, about=None}, ...]
"""
if tracker_id is None:
return []
if tracker_id in self._staff_detailed_cache:
return self._staff_detailed_cache[tracker_id]
try:
data = self._gql(_MANGA_STAFF, {"id": tracker_id})
staff = ((data.get("data") or {})
.get("Media", {})
.get("staff") or {})
nodes = staff.get("nodes") or []
edges = staff.get("edges") or []
except requests.RequestException:
return []
results = []
for node, edge in zip(nodes, edges):
name = (node.get("name") or {}).get("full") or ""
if not name:
continue
results.append({
"al_id": node.get("id"),
"mal_id": None,
"name": name,
"raw_name": name,
"image_url": (node.get("image") or {}).get("large"),
"positions": [edge.get("role")] if edge.get("role") else [],
"about": None,
})
if results:
self._staff_detailed_cache[tracker_id] = results
return results
# ------------------------------------------------------------------
# Public: individual character / person details
# ------------------------------------------------------------------
def get_character_details(self, char_id: "int | None") -> "dict | None":
"""Returns full details for a single AniList character."""
if char_id is None:
return None
if char_id in self._char_info_cache:
return self._char_info_cache[char_id]
try:
data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
entry = (data.get("data") or {}).get("Character") or {}
except requests.RequestException:
return None
result = {
"al_id": entry.get("id"),
"mal_id": None,
"name": (entry.get("name") or {}).get("full") or "",
"image_url": (entry.get("image") or {}).get("large"),
"about": entry.get("description"),
"favorites": entry.get("favourites"),
"url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
}
self._char_info_cache[char_id] = result
return result
def get_person_details(self, person_id: "int | None") -> "dict | None":
"""Returns full details for a single AniList staff person."""
if person_id is None:
return None
if person_id in self._person_info_cache:
return self._person_info_cache[person_id]
try:
data = self._gql(_PERSON_DETAILS, {"id": person_id})
entry = (data.get("data") or {}).get("Staff") or {}
except requests.RequestException:
return None
# dateOfBirth: {year, month, day} → ISO string for _format_birthday
dob = entry.get("dateOfBirth") or {}
birthday: "str | None" = None
if dob.get("year"):
m = dob.get("month") or 1
d = dob.get("day") or 1
birthday = f"{dob['year']}-{m:02d}-{d:02d}"
name_obj = entry.get("name") or {}
result = {
"al_id": entry.get("id"),
"mal_id": None,
"name": name_obj.get("full") or "",
"given_name": None, # AniList does not break names into given/family
"family_name": None,
"birthday": birthday,
"image_url": (entry.get("image") or {}).get("large"),
"about": entry.get("description"),
"favorites": entry.get("favourites"),
"website_url": None, # not exposed by AniList public API
"url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
}
self._person_info_cache[person_id] = result
return result
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears all internal caches (the Singleton instance is retained)."""
self._id_cache.clear()
self._stats_cache.clear()
self._char_names_cache.clear()
self._char_detailed_cache.clear()
self._staff_detailed_cache.clear()
self._char_info_cache.clear()
self._person_info_cache.clear()
# ------------------------------------------------------------------
# Internal: rate-limited GraphQL POST
# ------------------------------------------------------------------
def _gql(self, query: str, variables: "dict | None" = None) -> dict:
"""
Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
On HTTP 429 the Retry-After header is honoured and the request is
retried once.
"""
elapsed = time.monotonic() - self._last_request_at
if elapsed < 0.7:
time.sleep(0.7 - elapsed)
payload: dict = {"query": query}
if variables:
payload["variables"] = variables
resp = self._session.post(
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
self._last_request_at = time.monotonic()
if resp.status_code == 429:
retry_after = int(resp.headers.get("Retry-After", 60))
time.sleep(retry_after)
resp = self._session.post(
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
self._last_request_at = time.monotonic()
resp.raise_for_status()
return resp.json()
# --------------------------------------------------------------------------
# Module helpers
# --------------------------------------------------------------------------
def _score_title(query: str, entry: dict) -> float:
"""Returns the best title-similarity score for an AniList media entry."""
title_obj = entry.get("title") or {}
return best_similarity(query, (
title_obj.get("romaji"),
title_obj.get("english"),
title_obj.get("native"),
))
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
r1 = AniListResolver()
r2 = AniListResolver()
assert r1 is r2, "AniListResolver must be a Singleton"
al_id = r1.find_id("Yofukashi no Uta")
print("AniList ID :", al_id)
stats = r1.get_stats(al_id)
if stats:
print("Score :", stats["score"])
print("Rank :", stats["rank"])
print("Members :", stats["members"])
chars = r1.get_characters_detailed(al_id)
print("Characters (first 3):", [c["name"] for c in chars[:3]])
staff = r1.get_staff_detailed(al_id)
print("Staff :", [s["name"] for s in staff])