""" mal_resolver.py =============== Fetches and caches MyAnimeList manga metadata (statistics, characters, staff) using the public Jikan REST API v4. Jikan API: https://api.jikan.moe/v4 (no authentication required) Rate limit: 3 req/s, 60 req/min -> a 400 ms guard between calls is applied. Singleton --------- Only one instance of this class exists per process. Subsequent calls to MALResolver() return the same object with its warm caches intact. Provided features ----------------- - Title-based MAL ID lookup with best-match scoring - MAL statistics: score, rank, scored_by, popularity, members, favorites - Character list for a manga (names only — for XML tag) - Detailed character list: name, MAL character ID, image URL, role - Detailed staff list: name, MAL person ID, image URL, positions - Lazy full-detail fetches per character / person (for descriptions) Dependencies ------------ requests -> pip install requests """ from __future__ import annotations import datetime import difflib import time import requests class MALResolver: """ Singleton: fetches and caches MAL manga data via Jikan API v4. The first call to MALResolver() creates and initialises the instance; all subsequent calls return the same object. """ _instance: "MALResolver | None" = None # ------------------------------------------------------------------ # Singleton machinery # ------------------------------------------------------------------ def __new__(cls, **kwargs): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._initialized = False return cls._instance def __init__(self, *, request_timeout: int = 30): if self._initialized: return self.JIKAN_BASE = "https://api.jikan.moe/v4" self.request_timeout = request_timeout self._session = requests.Session() self._session.headers.setdefault("User-Agent", "MALResolver/1.0") # title_lower -> mal_id self._id_cache: dict[str, "int | None"] = {} # mal_id -> stats dict self._stats_cache: dict[int, dict] = {} # manga_mal_id -> [name_str, ...] (for ComicInfo ) self._char_names_cache: dict[int, list[str]] = {} # manga_mal_id -> [{mal_id, name, image_url, role}] self._char_detailed_cache: dict[int, list[dict]] = {} # manga_mal_id -> [{mal_id, name, image_url, positions}] self._staff_detailed_cache: dict[int, list[dict]] = {} # char_mal_id -> {mal_id, name, image_url, about} self._char_info_cache: dict[int, dict] = {} # person_mal_id -> {mal_id, name, image_url, about, website_url} self._person_info_cache: dict[int, dict] = {} self._last_request_at: float = 0.0 self._initialized = True # ------------------------------------------------------------------ # Public: ID lookup # ------------------------------------------------------------------ def find_mal_id(self, title: str) -> "int | None": """ Searches MAL for a manga by title and returns the best-matching MAL ID. Returns None on failure or when no result is found. """ if not title or not title.strip(): return None key = title.strip().lower() if key in self._id_cache: return self._id_cache[key] try: data = self._get(f"{self.JIKAN_BASE}/manga", {"q": title, "limit": 5, "type": "manga"}) results = data.get("data") or [] except requests.RequestException: return None if not results: self._id_cache[key] = None return None results.sort(key=lambda e: _score_title(title, e), reverse=True) mal_id = results[0].get("mal_id") self._id_cache[key] = mal_id return mal_id # ------------------------------------------------------------------ # Public: statistics # ------------------------------------------------------------------ def get_stats(self, mal_id: "int | None") -> "dict | None": """ Returns a statistics dict for the given MAL manga ID: {score, rank, scored_by, popularity, members, favorites, url, title, as_of (DD-MM-YYYY)} Returns None if mal_id is None or on network failure. """ if mal_id is None: return None if mal_id in self._stats_cache: return self._stats_cache[mal_id] try: data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}") entry = data.get("data") or {} except requests.RequestException: return None stats: dict = { "score": entry.get("score"), "rank": entry.get("rank"), "scored_by": entry.get("scored_by"), "popularity": entry.get("popularity"), "members": entry.get("members"), "favorites": entry.get("favorites"), "url": (entry.get("url") or f"https://myanimelist.net/manga/{mal_id}"), "title": entry.get("title") or "", "as_of": datetime.date.today().strftime("%d-%m-%Y"), } self._stats_cache[mal_id] = stats return stats def get_stats_for_manga(self, title: str) -> "dict | None": """Convenience: find MAL ID by title, then return stats.""" return self.get_stats(self.find_mal_id(title)) # ------------------------------------------------------------------ # Public: character names (for ComicInfo tag) # ------------------------------------------------------------------ def get_characters(self, mal_id: "int | None") -> list[str]: """ Returns a flat list of character names for the manga. Used by ComicInfoBuilder to populate the XML element. """ if mal_id is None: return [] if mal_id in self._char_names_cache: return self._char_names_cache[mal_id] detailed = self.get_characters_detailed(mal_id) names = [e["name"] for e in detailed if e.get("name")] if names: # Only cache a successful result — empty could be a transient # API failure and we want the next call to retry. self._char_names_cache[mal_id] = names return names def get_characters_for_manga(self, title: str) -> list[str]: """Convenience: search by title, then return character names.""" return self.get_characters(self.find_mal_id(title)) # ------------------------------------------------------------------ # Public: detailed character data (for KavitaPersonUpdater) # ------------------------------------------------------------------ def get_characters_detailed(self, mal_id: "int | None") -> list[dict]: """ Returns detailed character entries for a manga: [{mal_id, name, image_url, role, about=None}, ...] `about` is not populated here; call get_character_details(char_mal_id) to fetch it lazily when needed. """ if mal_id is None: return [] if mal_id in self._char_detailed_cache: return self._char_detailed_cache[mal_id] try: data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/characters") entries = data.get("data") or [] except requests.RequestException: return [] results = [] for entry in entries: char = entry.get("character") or {} raw_name = char.get("name") or "" if not raw_name: continue jpg = (char.get("images") or {}).get("jpg") or {} results.append({ "mal_id": char.get("mal_id"), # Cleaned name: "Hibino, Susuki" -> "Susuki Hibino". ComicInfo # is comma-separated, so commas in names would # cause Kavita to split a single character into two persons. "name": _clean_mal_name(raw_name), "raw_name": raw_name, "image_url": jpg.get("image_url") or jpg.get("small_image_url"), "role": entry.get("role") or "Supporting", "about": None, }) self._char_detailed_cache[mal_id] = results return results # ------------------------------------------------------------------ # Public: detailed staff data (for KavitaPersonUpdater) # ------------------------------------------------------------------ def get_staff_detailed(self, mal_id: "int | None") -> list[dict]: """ Returns detailed staff (author) entries for a manga: [{mal_id, name, image_url, positions, about=None}, ...] Jikan has no `/manga/{id}/staff` endpoint — that route only exists for anime. For manga the authors are listed on `/manga/{id}` under `data.authors`, but each entry only has {mal_id, name, url}; the image URL is fetched lazily via get_person_details (cached, so the later description fetch is free). """ if mal_id is None: return [] if mal_id in self._staff_detailed_cache: return self._staff_detailed_cache[mal_id] try: data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}") entry = data.get("data") or {} except requests.RequestException: return [] results = [] for author in (entry.get("authors") or []): raw_name = author.get("name") or "" person_mal_id = author.get("mal_id") if not raw_name or person_mal_id is None: continue details = self.get_person_details(person_mal_id) or {} results.append({ "mal_id": person_mal_id, "name": _clean_mal_name(raw_name), "raw_name": raw_name, "image_url": details.get("image_url"), "positions": [], "about": None, }) if results: self._staff_detailed_cache[mal_id] = results return results # ------------------------------------------------------------------ # Public: individual character / person details (lazy, with description) # ------------------------------------------------------------------ def get_character_details(self, char_mal_id: "int | None") -> "dict | None": """ Returns full details for a single MAL character, including `about`. Result is cached. """ if char_mal_id is None: return None if char_mal_id in self._char_info_cache: return self._char_info_cache[char_mal_id] try: data = self._get(f"{self.JIKAN_BASE}/characters/{char_mal_id}") entry = data.get("data") or {} except requests.RequestException: return None jpg = (entry.get("images") or {}).get("jpg") or {} result = { "mal_id": entry.get("mal_id"), "name": entry.get("name") or "", "image_url": jpg.get("image_url") or jpg.get("small_image_url"), "about": entry.get("about"), "favorites": entry.get("favorites"), "url": (entry.get("url") or f"https://myanimelist.net/character/{char_mal_id}"), } self._char_info_cache[char_mal_id] = result return result def get_person_details(self, person_mal_id: "int | None") -> "dict | None": """ Returns full details for a single MAL person (staff), including `about`. Result is cached. """ if person_mal_id is None: return None if person_mal_id in self._person_info_cache: return self._person_info_cache[person_mal_id] try: data = self._get(f"{self.JIKAN_BASE}/people/{person_mal_id}") entry = data.get("data") or {} except requests.RequestException: return None jpg = (entry.get("images") or {}).get("jpg") or {} result = { "mal_id": entry.get("mal_id"), "name": entry.get("name") or "", "given_name": entry.get("given_name"), "family_name": entry.get("family_name"), "birthday": entry.get("birthday"), "image_url": jpg.get("image_url") or jpg.get("small_image_url"), "about": entry.get("about"), "favorites": entry.get("favorites"), "website_url": entry.get("website_url"), "url": (entry.get("url") or f"https://myanimelist.net/people/{person_mal_id}"), } self._person_info_cache[person_mal_id] = result return result # ------------------------------------------------------------------ # Public: cache management # ------------------------------------------------------------------ def clear_cache(self) -> None: """Clears all internal caches (the Singleton instance is retained).""" self._id_cache.clear() self._stats_cache.clear() self._char_names_cache.clear() self._char_detailed_cache.clear() self._staff_detailed_cache.clear() self._char_info_cache.clear() self._person_info_cache.clear() # ------------------------------------------------------------------ # Internal: rate-limited HTTP # ------------------------------------------------------------------ def _get(self, url: str, params: "dict | None" = None) -> dict: """Rate-limited GET request (respects Jikan's ~3 req/s limit).""" elapsed = time.monotonic() - self._last_request_at if elapsed < 0.4: time.sleep(0.4 - elapsed) resp = self._session.get(url, params=params, timeout=self.request_timeout) self._last_request_at = time.monotonic() resp.raise_for_status() return resp.json() # -------------------------------------------------------------------------- # Module helper # -------------------------------------------------------------------------- def _clean_mal_name(name: str) -> str: """ Converts an MAL name into a comma-free, ComicInfo-safe form. The ComicInfo tag is comma-separated, so a single MAL character "Hibino, Susuki" written into the XML would be parsed by Kavita as two persons ("Hibino" and "Susuki"). Conversion: "Hibino, Susuki" -> "Susuki Hibino" (Western: First Last) "Yamori, Kou" -> "Kou Yamori" "Kotoyama" -> "Kotoyama" (unchanged) Trailing/leading commas and stray whitespace are stripped defensively. """ if not name: return "" name = name.strip() if "," in name: last, _, first = name.partition(",") first = first.strip() last = last.strip() if first and last: return f"{first} {last}" # Fallback: strip any remaining commas return name.replace(",", " ").strip() return name def _score_title(query: str, entry: dict) -> float: """Returns the best title-similarity score for a Jikan manga entry.""" candidates = [ entry.get("title") or "", entry.get("title_english") or "", entry.get("title_japanese") or "", ] for alt in (entry.get("titles") or []): candidates.append(alt.get("title") or "") best = 0.0 q = query.lower() for t in candidates: if t: ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio() best = max(best, ratio) return best # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": r1 = MALResolver() r2 = MALResolver() assert r1 is r2, "MALResolver must be a Singleton" mal_id = r1.find_mal_id("Yofukashi no Uta") print("MAL ID :", mal_id) stats = r1.get_stats(mal_id) if stats: print("Score :", stats["score"]) print("Rank :", stats["rank"]) chars = r1.get_characters_detailed(mal_id) print("Characters (first 3):", [c["name"] for c in chars[:3]]) staff = r1.get_staff_detailed(mal_id) print("Staff :", [s["name"] for s in staff])