436 lines
16 KiB
Python
436 lines
16 KiB
Python
"""
|
|
mal_resolver.py
|
|
===============
|
|
|
|
Fetches and caches MyAnimeList manga metadata (statistics, characters, staff)
|
|
using the public Jikan REST API v4.
|
|
|
|
Jikan API: https://api.jikan.moe/v4 (no authentication required)
|
|
Rate limit: 3 req/s, 60 req/min -> a 400 ms guard between calls is applied.
|
|
|
|
Singleton
|
|
---------
|
|
Only one instance of this class exists per process. Subsequent calls to
|
|
MALResolver() return the same object with its warm caches intact.
|
|
|
|
Provided features
|
|
-----------------
|
|
- Title-based MAL ID lookup with best-match scoring
|
|
- MAL statistics: score, rank, scored_by, popularity, members, favorites
|
|
- Character list for a manga (names only — for <Characters> XML tag)
|
|
- Detailed character list: name, MAL character ID, image URL, role
|
|
- Detailed staff list: name, MAL person ID, image URL, positions
|
|
- Lazy full-detail fetches per character / person (for descriptions)
|
|
|
|
Dependencies
|
|
------------
|
|
requests -> pip install requests
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import difflib
|
|
import time
|
|
|
|
import requests
|
|
|
|
|
|
class MALResolver:
|
|
"""
|
|
Singleton: fetches and caches MAL manga data via Jikan API v4.
|
|
|
|
The first call to MALResolver() creates and initialises the instance;
|
|
all subsequent calls return the same object.
|
|
"""
|
|
|
|
_instance: "MALResolver | None" = None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Singleton machinery
|
|
# ------------------------------------------------------------------
|
|
def __new__(cls, **kwargs):
|
|
if cls._instance is None:
|
|
cls._instance = super().__new__(cls)
|
|
cls._instance._initialized = False
|
|
return cls._instance
|
|
|
|
def __init__(self, *, request_timeout: int = 30):
|
|
if self._initialized:
|
|
return
|
|
|
|
self.JIKAN_BASE = "https://api.jikan.moe/v4"
|
|
self.request_timeout = request_timeout
|
|
|
|
self._session = requests.Session()
|
|
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
|
|
|
|
# title_lower -> mal_id
|
|
self._id_cache: dict[str, "int | None"] = {}
|
|
# mal_id -> stats dict
|
|
self._stats_cache: dict[int, dict] = {}
|
|
# manga_mal_id -> [name_str, ...] (for ComicInfo <Characters>)
|
|
self._char_names_cache: dict[int, list[str]] = {}
|
|
# manga_mal_id -> [{mal_id, name, image_url, role}]
|
|
self._char_detailed_cache: dict[int, list[dict]] = {}
|
|
# manga_mal_id -> [{mal_id, name, image_url, positions}]
|
|
self._staff_detailed_cache: dict[int, list[dict]] = {}
|
|
# char_mal_id -> {mal_id, name, image_url, about}
|
|
self._char_info_cache: dict[int, dict] = {}
|
|
# person_mal_id -> {mal_id, name, image_url, about, website_url}
|
|
self._person_info_cache: dict[int, dict] = {}
|
|
|
|
self._last_request_at: float = 0.0
|
|
self._initialized = True
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: ID lookup
|
|
# ------------------------------------------------------------------
|
|
def find_mal_id(self, title: str) -> "int | None":
|
|
"""
|
|
Searches MAL for a manga by title and returns the best-matching MAL ID.
|
|
Returns None on failure or when no result is found.
|
|
"""
|
|
if not title or not title.strip():
|
|
return None
|
|
|
|
key = title.strip().lower()
|
|
if key in self._id_cache:
|
|
return self._id_cache[key]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/manga",
|
|
{"q": title, "limit": 5, "type": "manga"})
|
|
results = data.get("data") or []
|
|
except requests.RequestException:
|
|
return None
|
|
|
|
if not results:
|
|
self._id_cache[key] = None
|
|
return None
|
|
|
|
results.sort(key=lambda e: _score_title(title, e), reverse=True)
|
|
mal_id = results[0].get("mal_id")
|
|
self._id_cache[key] = mal_id
|
|
return mal_id
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: statistics
|
|
# ------------------------------------------------------------------
|
|
def get_stats(self, mal_id: "int | None") -> "dict | None":
|
|
"""
|
|
Returns a statistics dict for the given MAL manga ID:
|
|
|
|
{score, rank, scored_by, popularity, members, favorites,
|
|
url, title, as_of (DD-MM-YYYY)}
|
|
|
|
Returns None if mal_id is None or on network failure.
|
|
"""
|
|
if mal_id is None:
|
|
return None
|
|
if mal_id in self._stats_cache:
|
|
return self._stats_cache[mal_id]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}")
|
|
entry = data.get("data") or {}
|
|
except requests.RequestException:
|
|
return None
|
|
|
|
stats: dict = {
|
|
"score": entry.get("score"),
|
|
"rank": entry.get("rank"),
|
|
"scored_by": entry.get("scored_by"),
|
|
"popularity": entry.get("popularity"),
|
|
"members": entry.get("members"),
|
|
"favorites": entry.get("favorites"),
|
|
"url": (entry.get("url")
|
|
or f"https://myanimelist.net/manga/{mal_id}"),
|
|
"title": entry.get("title") or "",
|
|
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
|
|
}
|
|
self._stats_cache[mal_id] = stats
|
|
return stats
|
|
|
|
def get_stats_for_manga(self, title: str) -> "dict | None":
|
|
"""Convenience: find MAL ID by title, then return stats."""
|
|
return self.get_stats(self.find_mal_id(title))
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: character names (for ComicInfo <Characters> tag)
|
|
# ------------------------------------------------------------------
|
|
def get_characters(self, mal_id: "int | None") -> list[str]:
|
|
"""
|
|
Returns a flat list of character names for the manga.
|
|
Used by ComicInfoBuilder to populate the <Characters> XML element.
|
|
"""
|
|
if mal_id is None:
|
|
return []
|
|
if mal_id in self._char_names_cache:
|
|
return self._char_names_cache[mal_id]
|
|
|
|
detailed = self.get_characters_detailed(mal_id)
|
|
names = [e["name"] for e in detailed if e.get("name")]
|
|
if names:
|
|
# Only cache a successful result — empty could be a transient
|
|
# API failure and we want the next call to retry.
|
|
self._char_names_cache[mal_id] = names
|
|
return names
|
|
|
|
def get_characters_for_manga(self, title: str) -> list[str]:
|
|
"""Convenience: search by title, then return character names."""
|
|
return self.get_characters(self.find_mal_id(title))
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: detailed character data (for KavitaPersonUpdater)
|
|
# ------------------------------------------------------------------
|
|
def get_characters_detailed(self, mal_id: "int | None") -> list[dict]:
|
|
"""
|
|
Returns detailed character entries for a manga:
|
|
[{mal_id, name, image_url, role, about=None}, ...]
|
|
|
|
`about` is not populated here; call get_character_details(char_mal_id)
|
|
to fetch it lazily when needed.
|
|
"""
|
|
if mal_id is None:
|
|
return []
|
|
if mal_id in self._char_detailed_cache:
|
|
return self._char_detailed_cache[mal_id]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/characters")
|
|
entries = data.get("data") or []
|
|
except requests.RequestException:
|
|
return []
|
|
|
|
results = []
|
|
for entry in entries:
|
|
char = entry.get("character") or {}
|
|
raw_name = char.get("name") or ""
|
|
if not raw_name:
|
|
continue
|
|
jpg = (char.get("images") or {}).get("jpg") or {}
|
|
results.append({
|
|
"mal_id": char.get("mal_id"),
|
|
# Cleaned name: "Hibino, Susuki" -> "Susuki Hibino". ComicInfo
|
|
# <Characters> is comma-separated, so commas in names would
|
|
# cause Kavita to split a single character into two persons.
|
|
"name": _clean_mal_name(raw_name),
|
|
"raw_name": raw_name,
|
|
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
|
"role": entry.get("role") or "Supporting",
|
|
"about": None,
|
|
})
|
|
|
|
self._char_detailed_cache[mal_id] = results
|
|
return results
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: detailed staff data (for KavitaPersonUpdater)
|
|
# ------------------------------------------------------------------
|
|
def get_staff_detailed(self, mal_id: "int | None") -> list[dict]:
|
|
"""
|
|
Returns detailed staff (author) entries for a manga:
|
|
[{mal_id, name, image_url, positions, about=None}, ...]
|
|
|
|
Jikan has no `/manga/{id}/staff` endpoint — that route only exists for
|
|
anime. For manga the authors are listed on `/manga/{id}` under
|
|
`data.authors`, but each entry only has {mal_id, name, url}; the image
|
|
URL is fetched lazily via get_person_details (cached, so the later
|
|
description fetch is free).
|
|
"""
|
|
if mal_id is None:
|
|
return []
|
|
if mal_id in self._staff_detailed_cache:
|
|
return self._staff_detailed_cache[mal_id]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}")
|
|
entry = data.get("data") or {}
|
|
except requests.RequestException:
|
|
return []
|
|
|
|
results = []
|
|
for author in (entry.get("authors") or []):
|
|
raw_name = author.get("name") or ""
|
|
person_mal_id = author.get("mal_id")
|
|
if not raw_name or person_mal_id is None:
|
|
continue
|
|
details = self.get_person_details(person_mal_id) or {}
|
|
results.append({
|
|
"mal_id": person_mal_id,
|
|
"name": _clean_mal_name(raw_name),
|
|
"raw_name": raw_name,
|
|
"image_url": details.get("image_url"),
|
|
"positions": [],
|
|
"about": None,
|
|
})
|
|
|
|
if results:
|
|
self._staff_detailed_cache[mal_id] = results
|
|
return results
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: individual character / person details (lazy, with description)
|
|
# ------------------------------------------------------------------
|
|
def get_character_details(self, char_mal_id: "int | None") -> "dict | None":
|
|
"""
|
|
Returns full details for a single MAL character, including `about`.
|
|
Result is cached.
|
|
"""
|
|
if char_mal_id is None:
|
|
return None
|
|
if char_mal_id in self._char_info_cache:
|
|
return self._char_info_cache[char_mal_id]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/characters/{char_mal_id}")
|
|
entry = data.get("data") or {}
|
|
except requests.RequestException:
|
|
return None
|
|
|
|
jpg = (entry.get("images") or {}).get("jpg") or {}
|
|
result = {
|
|
"mal_id": entry.get("mal_id"),
|
|
"name": entry.get("name") or "",
|
|
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
|
"about": entry.get("about"),
|
|
"favorites": entry.get("favorites"),
|
|
"url": (entry.get("url")
|
|
or f"https://myanimelist.net/character/{char_mal_id}"),
|
|
}
|
|
self._char_info_cache[char_mal_id] = result
|
|
return result
|
|
|
|
def get_person_details(self, person_mal_id: "int | None") -> "dict | None":
|
|
"""
|
|
Returns full details for a single MAL person (staff), including `about`.
|
|
Result is cached.
|
|
"""
|
|
if person_mal_id is None:
|
|
return None
|
|
if person_mal_id in self._person_info_cache:
|
|
return self._person_info_cache[person_mal_id]
|
|
|
|
try:
|
|
data = self._get(f"{self.JIKAN_BASE}/people/{person_mal_id}")
|
|
entry = data.get("data") or {}
|
|
except requests.RequestException:
|
|
return None
|
|
|
|
jpg = (entry.get("images") or {}).get("jpg") or {}
|
|
result = {
|
|
"mal_id": entry.get("mal_id"),
|
|
"name": entry.get("name") or "",
|
|
"given_name": entry.get("given_name"),
|
|
"family_name": entry.get("family_name"),
|
|
"birthday": entry.get("birthday"),
|
|
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
|
"about": entry.get("about"),
|
|
"favorites": entry.get("favorites"),
|
|
"website_url": entry.get("website_url"),
|
|
"url": (entry.get("url")
|
|
or f"https://myanimelist.net/people/{person_mal_id}"),
|
|
}
|
|
self._person_info_cache[person_mal_id] = result
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public: cache management
|
|
# ------------------------------------------------------------------
|
|
def clear_cache(self) -> None:
|
|
"""Clears all internal caches (the Singleton instance is retained)."""
|
|
self._id_cache.clear()
|
|
self._stats_cache.clear()
|
|
self._char_names_cache.clear()
|
|
self._char_detailed_cache.clear()
|
|
self._staff_detailed_cache.clear()
|
|
self._char_info_cache.clear()
|
|
self._person_info_cache.clear()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Internal: rate-limited HTTP
|
|
# ------------------------------------------------------------------
|
|
def _get(self, url: str, params: "dict | None" = None) -> dict:
|
|
"""Rate-limited GET request (respects Jikan's ~3 req/s limit)."""
|
|
elapsed = time.monotonic() - self._last_request_at
|
|
if elapsed < 0.4:
|
|
time.sleep(0.4 - elapsed)
|
|
resp = self._session.get(url, params=params, timeout=self.request_timeout)
|
|
self._last_request_at = time.monotonic()
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Module helper
|
|
# --------------------------------------------------------------------------
|
|
def _clean_mal_name(name: str) -> str:
|
|
"""
|
|
Converts an MAL name into a comma-free, ComicInfo-safe form.
|
|
|
|
The ComicInfo <Characters> tag is comma-separated, so a single MAL
|
|
character "Hibino, Susuki" written into the XML would be parsed by
|
|
Kavita as two persons ("Hibino" and "Susuki").
|
|
|
|
Conversion:
|
|
"Hibino, Susuki" -> "Susuki Hibino" (Western: First Last)
|
|
"Yamori, Kou" -> "Kou Yamori"
|
|
"Kotoyama" -> "Kotoyama" (unchanged)
|
|
|
|
Trailing/leading commas and stray whitespace are stripped defensively.
|
|
"""
|
|
if not name:
|
|
return ""
|
|
name = name.strip()
|
|
if "," in name:
|
|
last, _, first = name.partition(",")
|
|
first = first.strip()
|
|
last = last.strip()
|
|
if first and last:
|
|
return f"{first} {last}"
|
|
# Fallback: strip any remaining commas
|
|
return name.replace(",", " ").strip()
|
|
return name
|
|
|
|
|
|
def _score_title(query: str, entry: dict) -> float:
|
|
"""Returns the best title-similarity score for a Jikan manga entry."""
|
|
candidates = [
|
|
entry.get("title") or "",
|
|
entry.get("title_english") or "",
|
|
entry.get("title_japanese") or "",
|
|
]
|
|
for alt in (entry.get("titles") or []):
|
|
candidates.append(alt.get("title") or "")
|
|
best = 0.0
|
|
q = query.lower()
|
|
for t in candidates:
|
|
if t:
|
|
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
|
best = max(best, ratio)
|
|
return best
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Usage example
|
|
# --------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
r1 = MALResolver()
|
|
r2 = MALResolver()
|
|
assert r1 is r2, "MALResolver must be a Singleton"
|
|
|
|
mal_id = r1.find_mal_id("Yofukashi no Uta")
|
|
print("MAL ID :", mal_id)
|
|
|
|
stats = r1.get_stats(mal_id)
|
|
if stats:
|
|
print("Score :", stats["score"])
|
|
print("Rank :", stats["rank"])
|
|
|
|
chars = r1.get_characters_detailed(mal_id)
|
|
print("Characters (first 3):", [c["name"] for c in chars[:3]])
|
|
|
|
staff = r1.get_staff_detailed(mal_id)
|
|
print("Staff :", [s["name"] for s in staff])
|