MAl and Kavita update

This commit is contained in:
2026-05-23 10:21:09 +02:00
parent d5817e908a
commit 852f6b84ef
4 changed files with 675 additions and 57 deletions
+240 -52
View File
@@ -2,18 +2,25 @@
mal_resolver.py
===============
Fetches and caches MyAnimeList manga metadata (statistics and characters)
Fetches and caches MyAnimeList manga metadata (statistics, characters, staff)
using the public Jikan REST API v4.
Jikan API: https://api.jikan.moe/v4 (no authentication required)
Rate limit: 3 req/s, 60 req/min -> a 400 ms delay between calls is applied.
Rate limit: 3 req/s, 60 req/min -> a 400 ms guard between calls is applied.
Singleton
---------
Only one instance of this class exists per process. Subsequent calls to
MALResolver() return the same object with its warm caches intact.
Provided features
-----------------
- Title-based MAL ID lookup with best-match scoring (cached)
- Title-based MAL ID lookup with best-match scoring
- MAL statistics: score, rank, scored_by, popularity, members, favorites
- Character list for a manga (names only, cached)
- Convenience: get_characters_for_manga(title) -> list[str]
- Character list for a manga (names only — for <Characters> XML tag)
- Detailed character list: name, MAL character ID, image URL, role
- Detailed staff list: name, MAL person ID, image URL, positions
- Lazy full-detail fetches per character / person (for descriptions)
Dependencies
------------
@@ -31,23 +38,50 @@ import requests
class MALResolver:
"""
Fetches and caches MyAnimeList manga data via the Jikan API v4.
Singleton: fetches and caches MAL manga data via Jikan API v4.
The first call to MALResolver() creates and initialises the instance;
all subsequent calls return the same object.
"""
JIKAN_BASE = "https://api.jikan.moe/v4"
_instance: "MALResolver | None" = None
def __init__(self, *,
request_timeout: int = 30,
session: "requests.Session | None" = None):
# ------------------------------------------------------------------
# Singleton machinery
# ------------------------------------------------------------------
def __new__(cls, **kwargs):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self, *, request_timeout: int = 30):
if self._initialized:
return
self.JIKAN_BASE = "https://api.jikan.moe/v4"
self.request_timeout = request_timeout
self._session = session or requests.Session()
self._session = requests.Session()
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
self._id_cache: dict[str, "int | None"] = {} # title_lower -> mal_id
self._stats_cache: dict[int, dict] = {} # mal_id -> stats dict
self._char_cache: dict[int, list[str]] = {} # mal_id -> [name, ...]
# title_lower -> mal_id
self._id_cache: dict[str, "int | None"] = {}
# mal_id -> stats dict
self._stats_cache: dict[int, dict] = {}
# manga_mal_id -> [name_str, ...] (for ComicInfo <Characters>)
self._char_names_cache: dict[int, list[str]] = {}
# manga_mal_id -> [{mal_id, name, image_url, role}]
self._char_detailed_cache: dict[int, list[dict]] = {}
# manga_mal_id -> [{mal_id, name, image_url, positions}]
self._staff_detailed_cache: dict[int, list[dict]] = {}
# char_mal_id -> {mal_id, name, image_url, about}
self._char_info_cache: dict[int, dict] = {}
# person_mal_id -> {mal_id, name, image_url, about, website_url}
self._person_info_cache: dict[int, dict] = {}
self._last_request_at: float = 0.0
self._initialized = True
# ------------------------------------------------------------------
# Public: ID lookup
@@ -87,23 +121,13 @@ class MALResolver:
"""
Returns a statistics dict for the given MAL manga ID:
{
"score": float | None,
"rank": int | None,
"scored_by": int | None,
"popularity": int | None,
"members": int | None,
"favorites": int | None,
"url": str,
"title": str,
"as_of": str (DD-MM-YYYY),
}
{score, rank, scored_by, popularity, members, favorites,
url, title, as_of (DD-MM-YYYY)}
Returns None if mal_id is None or on network failure.
"""
if mal_id is None:
return None
if mal_id in self._stats_cache:
return self._stats_cache[mal_id]
@@ -133,18 +157,42 @@ class MALResolver:
return self.get_stats(self.find_mal_id(title))
# ------------------------------------------------------------------
# Public: characters
# Public: character names (for ComicInfo <Characters> tag)
# ------------------------------------------------------------------
def get_characters(self, mal_id: "int | None") -> list[str]:
"""
Returns a list of character names (strings) for the manga.
Returns an empty list on failure.
Returns a flat list of character names for the manga.
Used by ComicInfoBuilder to populate the <Characters> XML element.
"""
if mal_id is None:
return []
if mal_id in self._char_names_cache:
return self._char_names_cache[mal_id]
if mal_id in self._char_cache:
return self._char_cache[mal_id]
detailed = self.get_characters_detailed(mal_id)
names = [e["name"] for e in detailed if e.get("name")]
self._char_names_cache[mal_id] = names
return names
def get_characters_for_manga(self, title: str) -> list[str]:
"""Convenience: search by title, then return character names."""
return self.get_characters(self.find_mal_id(title))
# ------------------------------------------------------------------
# Public: detailed character data (for KavitaPersonUpdater)
# ------------------------------------------------------------------
def get_characters_detailed(self, mal_id: "int | None") -> list[dict]:
"""
Returns detailed character entries for a manga:
[{mal_id, name, image_url, role, about=None}, ...]
`about` is not populated here; call get_character_details(char_mal_id)
to fetch it lazily when needed.
"""
if mal_id is None:
return []
if mal_id in self._char_detailed_cache:
return self._char_detailed_cache[mal_id]
try:
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/characters")
@@ -152,36 +200,143 @@ class MALResolver:
except requests.RequestException:
return []
names = []
results = []
for entry in entries:
char = entry.get("character") or {}
name = char.get("name")
if name:
names.append(name)
raw_name = char.get("name") or ""
if not raw_name:
continue
jpg = (char.get("images") or {}).get("jpg") or {}
results.append({
"mal_id": char.get("mal_id"),
# Cleaned name: "Hibino, Susuki" -> "Susuki Hibino". ComicInfo
# <Characters> is comma-separated, so commas in names would
# cause Kavita to split a single character into two persons.
"name": _clean_mal_name(raw_name),
"raw_name": raw_name,
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
"role": entry.get("role") or "Supporting",
"about": None,
})
self._char_cache[mal_id] = names
return names
self._char_detailed_cache[mal_id] = results
return results
def get_characters_for_manga(self, title: str) -> list[str]:
# ------------------------------------------------------------------
# Public: detailed staff data (for KavitaPersonUpdater)
# ------------------------------------------------------------------
def get_staff_detailed(self, mal_id: "int | None") -> list[dict]:
"""
Convenience: search for manga by title, then return its characters.
Returns detailed staff entries for a manga:
[{mal_id, name, image_url, positions, about=None}, ...]
`about` is not populated here; call get_person_details(person_mal_id)
to fetch it lazily when needed.
"""
return self.get_characters(self.find_mal_id(title))
if mal_id is None:
return []
if mal_id in self._staff_detailed_cache:
return self._staff_detailed_cache[mal_id]
try:
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/staff")
entries = data.get("data") or []
except requests.RequestException:
return []
results = []
for entry in entries:
person = entry.get("person") or {}
raw_name = person.get("name") or ""
if not raw_name:
continue
jpg = (person.get("images") or {}).get("jpg") or {}
results.append({
"mal_id": person.get("mal_id"),
"name": _clean_mal_name(raw_name),
"raw_name": raw_name,
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
"positions": entry.get("positions") or [],
"about": None,
})
self._staff_detailed_cache[mal_id] = results
return results
# ------------------------------------------------------------------
# Public: individual character / person details (lazy, with description)
# ------------------------------------------------------------------
def get_character_details(self, char_mal_id: "int | None") -> "dict | None":
"""
Returns full details for a single MAL character, including `about`.
Result is cached.
"""
if char_mal_id is None:
return None
if char_mal_id in self._char_info_cache:
return self._char_info_cache[char_mal_id]
try:
data = self._get(f"{self.JIKAN_BASE}/characters/{char_mal_id}")
entry = data.get("data") or {}
except requests.RequestException:
return None
jpg = (entry.get("images") or {}).get("jpg") or {}
result = {
"mal_id": entry.get("mal_id"),
"name": entry.get("name") or "",
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
"about": entry.get("about"),
}
self._char_info_cache[char_mal_id] = result
return result
def get_person_details(self, person_mal_id: "int | None") -> "dict | None":
"""
Returns full details for a single MAL person (staff), including `about`.
Result is cached.
"""
if person_mal_id is None:
return None
if person_mal_id in self._person_info_cache:
return self._person_info_cache[person_mal_id]
try:
data = self._get(f"{self.JIKAN_BASE}/people/{person_mal_id}")
entry = data.get("data") or {}
except requests.RequestException:
return None
jpg = (entry.get("images") or {}).get("jpg") or {}
result = {
"mal_id": entry.get("mal_id"),
"name": entry.get("name") or "",
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
"about": entry.get("about"),
"website_url": entry.get("website_url"),
}
self._person_info_cache[person_mal_id] = result
return result
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears all internal caches."""
"""Clears all internal caches (the Singleton instance is retained)."""
self._id_cache.clear()
self._stats_cache.clear()
self._char_cache.clear()
self._char_names_cache.clear()
self._char_detailed_cache.clear()
self._staff_detailed_cache.clear()
self._char_info_cache.clear()
self._person_info_cache.clear()
# ------------------------------------------------------------------
# Internal: rate-limited HTTP
# ------------------------------------------------------------------
def _get(self, url: str, params: "dict | None" = None) -> dict:
"""Rate-limited GET request (respects Jikan's 3 req/s limit)."""
"""Rate-limited GET request (respects Jikan's ~3 req/s limit)."""
elapsed = time.monotonic() - self._last_request_at
if elapsed < 0.4:
time.sleep(0.4 - elapsed)
@@ -194,6 +349,35 @@ class MALResolver:
# --------------------------------------------------------------------------
# Module helper
# --------------------------------------------------------------------------
def _clean_mal_name(name: str) -> str:
"""
Converts an MAL name into a comma-free, ComicInfo-safe form.
The ComicInfo <Characters> tag is comma-separated, so a single MAL
character "Hibino, Susuki" written into the XML would be parsed by
Kavita as two persons ("Hibino" and "Susuki").
Conversion:
"Hibino, Susuki" -> "Susuki Hibino" (Western: First Last)
"Yamori, Kou" -> "Kou Yamori"
"Kotoyama" -> "Kotoyama" (unchanged)
Trailing/leading commas and stray whitespace are stripped defensively.
"""
if not name:
return ""
name = name.strip()
if "," in name:
last, _, first = name.partition(",")
first = first.strip()
last = last.strip()
if first and last:
return f"{first} {last}"
# Fallback: strip any remaining commas
return name.replace(",", " ").strip()
return name
def _score_title(query: str, entry: dict) -> float:
"""Returns the best title-similarity score for a Jikan manga entry."""
candidates = [
@@ -203,7 +387,6 @@ def _score_title(query: str, entry: dict) -> float:
]
for alt in (entry.get("titles") or []):
candidates.append(alt.get("title") or "")
best = 0.0
q = query.lower()
for t in candidates:
@@ -217,15 +400,20 @@ def _score_title(query: str, entry: dict) -> float:
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
resolver = MALResolver()
r1 = MALResolver()
r2 = MALResolver()
assert r1 is r2, "MALResolver must be a Singleton"
mal_id = resolver.find_mal_id("Yofukashi no Uta")
print("MAL ID :", mal_id)
mal_id = r1.find_mal_id("Yofukashi no Uta")
print("MAL ID :", mal_id)
stats = resolver.get_stats(mal_id)
stats = r1.get_stats(mal_id)
if stats:
print("Score :", stats["score"])
print("Rank :", stats["rank"])
print("Score :", stats["score"])
print("Rank :", stats["rank"])
chars = resolver.get_characters(mal_id)
print("Characters (first 5):", chars[:5])
chars = r1.get_characters_detailed(mal_id)
print("Characters (first 3):", [c["name"] for c in chars[:3]])
staff = r1.get_staff_detailed(mal_id)
print("Staff :", [s["name"] for s in staff])