""" anilist_resolver.py =================== Fetches and caches AniList manga metadata (statistics, characters, staff) using the public AniList GraphQL API. AniList API: https://graphql.anilist.co (no authentication required) Rate limit: 90 req/min -> a 700 ms guard between calls is applied. On HTTP 429 (rate-limit exceeded) the response Retry-After header is honoured; the request is retried once automatically. Singleton --------- Only one instance of this class exists per process. Subsequent calls to AniListResolver() return the same object with its warm caches intact. Provided features ----------------- - Title-based AniList ID lookup with best-match scoring - Manga statistics: score (0–10), rank, popularity, members, favorites - Character list for a manga (names only — for XML tag) - Detailed character list: name, AniList character ID, image URL, role - Detailed staff list: name, AniList person ID, image URL, positions - Lazy full-detail fetches per character / person (for descriptions) Dependencies ------------ requests -> pip install requests """ from __future__ import annotations import datetime import time import requests from MediaResolver import MediaResolver from TextUtils import best_similarity # -------------------------------------------------------------------------- # GraphQL query strings # -------------------------------------------------------------------------- # AniList models both manga and light novels as type MANGA; the format # clause decides which of the two a search returns. The placeholder is # substituted at construction time (see `media_format`). _SEARCH_MANGA_TEMPLATE = """ query ($search: String) { Page(page: 1, perPage: 5) { media(search: $search, type: MANGA, __FORMAT_CLAUSE__) { id title { romaji english native } siteUrl } } } """ _FORMAT_CLAUSES = { "manga": "format_not_in: [NOVEL]", "novel": "format_in: [NOVEL]", } _MANGA_STATS = """ query ($id: Int) { Media(id: $id, type: MANGA) { id title { romaji english native } meanScore popularity favourites rankings { rank type allTime } siteUrl } } """ _MANGA_CHARACTERS = """ query ($id: Int) { Media(id: $id, type: MANGA) { characters(sort: [ROLE, RELEVANCE], perPage: 25) { nodes { id name { full } image { large } siteUrl } edges { role } } } } """ _MANGA_STAFF = """ query ($id: Int) { Media(id: $id, type: MANGA) { staff(perPage: 25) { nodes { id name { full } image { large } siteUrl } edges { role } } } } """ _CHARACTER_DETAILS = """ query ($id: Int) { Character(id: $id) { id name { full } image { large } description(asHtml: false) favourites siteUrl } } """ _PERSON_DETAILS = """ query ($id: Int) { Staff(id: $id) { id name { full native } image { large } description(asHtml: false) favourites siteUrl dateOfBirth { year month day } primaryOccupations homeTown } } """ _ANILIST_GQL = "https://graphql.anilist.co" class AniListResolver(MediaResolver): """ Singleton: fetches and caches AniList manga data via GraphQL API. The first call to AniListResolver() creates and initialises the instance; all subsequent calls return the same object. """ _instance: "AniListResolver | None" = None # ------------------------------------------------------------------ # Singleton machinery # ------------------------------------------------------------------ def __new__(cls, **kwargs): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._initialized = False return cls._instance def __init__(self, *, request_timeout: int = 30, media_format: str = "manga"): """ media_format : "manga" (excludes novels) or "novel" (novels only). Only the FIRST construction in the process sets it (singleton); construct the resolver with the correct format in the entry point / orchestrator. """ if self._initialized: return if media_format not in _FORMAT_CLAUSES: raise ValueError(f"media_format must be one of " f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}") self.media_format = media_format self._search_query = _SEARCH_MANGA_TEMPLATE.replace( "__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format]) self.request_timeout = request_timeout self._session = requests.Session() self._session.headers.update({ "User-Agent": "AniListResolver/1.0", "Content-Type": "application/json", "Accept": "application/json", }) # title_lower -> al_id self._id_cache: dict[str, "int | None"] = {} # al_id -> stats dict self._stats_cache: dict[int, dict] = {} # manga_al_id -> [name_str, ...] self._char_names_cache: dict[int, list[str]] = {} # manga_al_id -> [{al_id, name, image_url, role}] self._char_detailed_cache: dict[int, list[dict]] = {} # manga_al_id -> [{al_id, name, image_url, positions}] self._staff_detailed_cache: dict[int, list[dict]] = {} # char_al_id -> {al_id, name, image_url, about, favorites, url} self._char_info_cache: dict[int, dict] = {} # person_al_id -> {al_id, name, image_url, about, favorites, url, ...} self._person_info_cache: dict[int, dict] = {} self._last_request_at: float = 0.0 self._initialized = True # ------------------------------------------------------------------ # Public: ID lookup # ------------------------------------------------------------------ def find_id(self, title: str) -> "int | None": """ Searches AniList for a manga by title and returns the best-matching AniList ID. Returns None on failure or when no result is found. """ if not title or not title.strip(): return None key = title.strip().lower() if key in self._id_cache: return self._id_cache[key] try: data = self._gql(self._search_query, {"search": title}) results = ((data.get("data") or {}) .get("Page", {}) .get("media") or []) except requests.RequestException: return None if not results: self._id_cache[key] = None return None results.sort(key=lambda e: _score_title(title, e), reverse=True) al_id = results[0].get("id") self._id_cache[key] = al_id return al_id # ------------------------------------------------------------------ # Public: statistics # ------------------------------------------------------------------ def get_stats(self, tracker_id: "int | None") -> "dict | None": """ Returns a statistics dict for the given AniList manga ID: {score, rank, scored_by, popularity, members, favorites, url, title, as_of (DD-MM-YYYY)} Returns None if tracker_id is None or on network failure. """ if tracker_id is None: return None if tracker_id in self._stats_cache: return self._stats_cache[tracker_id] try: data = self._gql(_MANGA_STATS, {"id": tracker_id}) entry = (data.get("data") or {}).get("Media") or {} except requests.RequestException: return None title_obj = entry.get("title") or {} title = (title_obj.get("romaji") or title_obj.get("english") or title_obj.get("native") or "") # AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency # with the MALResolver stats dict shape. raw_score = entry.get("meanScore") score = round(raw_score / 10, 1) if raw_score is not None else None # Ranked and popularity ranks are in the rankings array. rated_rank = None popular_rank = None for r in (entry.get("rankings") or []): if r.get("allTime"): if r.get("type") == "RATED" and rated_rank is None: rated_rank = r.get("rank") if r.get("type") == "POPULAR" and popular_rank is None: popular_rank = r.get("rank") stats: dict = { "score": score, "rank": rated_rank, "scored_by": None, # not exposed by AniList API "popularity": popular_rank, "members": entry.get("popularity"), # AniList's popularity = member count "favorites": entry.get("favourites"), "url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}", "title": title, "as_of": datetime.date.today().strftime("%d-%m-%Y"), } self._stats_cache[tracker_id] = stats return stats # ------------------------------------------------------------------ # Public: character names (for ComicInfo tag) # ------------------------------------------------------------------ def get_characters(self, tracker_id: "int | None") -> list[str]: """Returns a flat list of character names for the manga.""" if tracker_id is None: return [] if tracker_id in self._char_names_cache: return self._char_names_cache[tracker_id] detailed = self.get_characters_detailed(tracker_id) names = [e["name"] for e in detailed if e.get("name")] if names: self._char_names_cache[tracker_id] = names return names # ------------------------------------------------------------------ # Public: detailed character data # ------------------------------------------------------------------ def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]: """ Returns detailed character entries for a manga: [{al_id, mal_id, name, image_url, role, about=None}, ...] """ if tracker_id is None: return [] if tracker_id in self._char_detailed_cache: return self._char_detailed_cache[tracker_id] try: data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id}) chars = ((data.get("data") or {}) .get("Media", {}) .get("characters") or {}) nodes = chars.get("nodes") or [] edges = chars.get("edges") or [] except requests.RequestException: return [] results = [] for node, edge in zip(nodes, edges): name = (node.get("name") or {}).get("full") or "" if not name: continue results.append({ "al_id": node.get("id"), "mal_id": None, "name": name, "raw_name": name, "image_url": (node.get("image") or {}).get("large"), "role": edge.get("role") or "SUPPORTING", "about": None, }) if results: self._char_detailed_cache[tracker_id] = results return results # ------------------------------------------------------------------ # Public: detailed staff data # ------------------------------------------------------------------ def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]: """ Returns detailed staff entries for a manga: [{al_id, mal_id, name, image_url, positions, about=None}, ...] """ if tracker_id is None: return [] if tracker_id in self._staff_detailed_cache: return self._staff_detailed_cache[tracker_id] try: data = self._gql(_MANGA_STAFF, {"id": tracker_id}) staff = ((data.get("data") or {}) .get("Media", {}) .get("staff") or {}) nodes = staff.get("nodes") or [] edges = staff.get("edges") or [] except requests.RequestException: return [] results = [] for node, edge in zip(nodes, edges): name = (node.get("name") or {}).get("full") or "" if not name: continue results.append({ "al_id": node.get("id"), "mal_id": None, "name": name, "raw_name": name, "image_url": (node.get("image") or {}).get("large"), "positions": [edge.get("role")] if edge.get("role") else [], "about": None, }) if results: self._staff_detailed_cache[tracker_id] = results return results # ------------------------------------------------------------------ # Public: individual character / person details # ------------------------------------------------------------------ def get_character_details(self, char_id: "int | None") -> "dict | None": """Returns full details for a single AniList character.""" if char_id is None: return None if char_id in self._char_info_cache: return self._char_info_cache[char_id] try: data = self._gql(_CHARACTER_DETAILS, {"id": char_id}) entry = (data.get("data") or {}).get("Character") or {} except requests.RequestException: return None result = { "al_id": entry.get("id"), "mal_id": None, "name": (entry.get("name") or {}).get("full") or "", "image_url": (entry.get("image") or {}).get("large"), "about": entry.get("description"), "favorites": entry.get("favourites"), "url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}", } self._char_info_cache[char_id] = result return result def get_person_details(self, person_id: "int | None") -> "dict | None": """Returns full details for a single AniList staff person.""" if person_id is None: return None if person_id in self._person_info_cache: return self._person_info_cache[person_id] try: data = self._gql(_PERSON_DETAILS, {"id": person_id}) entry = (data.get("data") or {}).get("Staff") or {} except requests.RequestException: return None # dateOfBirth: {year, month, day} → ISO string for _format_birthday dob = entry.get("dateOfBirth") or {} birthday: "str | None" = None if dob.get("year"): m = dob.get("month") or 1 d = dob.get("day") or 1 birthday = f"{dob['year']}-{m:02d}-{d:02d}" name_obj = entry.get("name") or {} result = { "al_id": entry.get("id"), "mal_id": None, "name": name_obj.get("full") or "", "given_name": None, # AniList does not break names into given/family "family_name": None, "birthday": birthday, "image_url": (entry.get("image") or {}).get("large"), "about": entry.get("description"), "favorites": entry.get("favourites"), "website_url": None, # not exposed by AniList public API "url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}", } self._person_info_cache[person_id] = result return result # ------------------------------------------------------------------ # Public: cache management # ------------------------------------------------------------------ def clear_cache(self) -> None: """Clears all internal caches (the Singleton instance is retained).""" self._id_cache.clear() self._stats_cache.clear() self._char_names_cache.clear() self._char_detailed_cache.clear() self._staff_detailed_cache.clear() self._char_info_cache.clear() self._person_info_cache.clear() # ------------------------------------------------------------------ # Internal: rate-limited GraphQL POST # ------------------------------------------------------------------ def _gql(self, query: str, variables: "dict | None" = None) -> dict: """ Rate-limited GraphQL POST request (respects AniList's 90 req/min limit). On HTTP 429 the Retry-After header is honoured and the request is retried once. """ elapsed = time.monotonic() - self._last_request_at if elapsed < 0.7: time.sleep(0.7 - elapsed) payload: dict = {"query": query} if variables: payload["variables"] = variables resp = self._session.post( _ANILIST_GQL, json=payload, timeout=self.request_timeout) self._last_request_at = time.monotonic() if resp.status_code == 429: retry_after = int(resp.headers.get("Retry-After", 60)) time.sleep(retry_after) resp = self._session.post( _ANILIST_GQL, json=payload, timeout=self.request_timeout) self._last_request_at = time.monotonic() resp.raise_for_status() return resp.json() # -------------------------------------------------------------------------- # Module helpers # -------------------------------------------------------------------------- def _score_title(query: str, entry: dict) -> float: """Returns the best title-similarity score for an AniList media entry.""" title_obj = entry.get("title") or {} return best_similarity(query, ( title_obj.get("romaji"), title_obj.get("english"), title_obj.get("native"), )) # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": r1 = AniListResolver() r2 = AniListResolver() assert r1 is r2, "AniListResolver must be a Singleton" al_id = r1.find_id("Yofukashi no Uta") print("AniList ID :", al_id) stats = r1.get_stats(al_id) if stats: print("Score :", stats["score"]) print("Rank :", stats["rank"]) print("Members :", stats["members"]) chars = r1.get_characters_detailed(al_id) print("Characters (first 3):", [c["name"] for c in chars[:3]]) staff = r1.get_staff_detailed(al_id) print("Staff :", [s["name"] for s in staff])