init
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
KAVITA_URL=http://192.168.1.100:5000
|
||||
KAVITA_API_KEY=your-api-key-here
|
||||
LIBRARY_IDS=3,5
|
||||
LANGUAGE=en
|
||||
MATCH_PATH=matches.json
|
||||
WEB_PORT=8080
|
||||
@@ -267,3 +267,10 @@ pyvenv.cfg
|
||||
.venv
|
||||
pip-selfcheck.json
|
||||
|
||||
manga-mover-and-metadata-collector/
|
||||
|
||||
# Project-local state
|
||||
matches.json
|
||||
config/
|
||||
output/
|
||||
|
||||
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY src/ /app/src/
|
||||
COPY main.py /app/main.py
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
VOLUME ["/config"]
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD ["python", "/app/main.py"]
|
||||
@@ -1,2 +1,56 @@
|
||||
# kavita-lightnovel-metadata-fetcher
|
||||
|
||||
Pulls metadata (summary, tags, genres, characters, staff, score,
|
||||
cover, links, related series) for light novels from **MangaBaka**,
|
||||
enriched with **MyAnimeList** and **AniList** data, and writes it
|
||||
back to a **Kavita** server through its REST API.
|
||||
|
||||
No file mover, no ComicInfo.xml — the source of truth is Kavita
|
||||
itself. Series are discovered via the Kavita library API.
|
||||
|
||||
## Features
|
||||
|
||||
- Match every series in one or more Kavita libraries against
|
||||
MangaBaka and persist the match in `matches.json` (editable via
|
||||
the web UI).
|
||||
- Update metadata for a single series or all matched series at
|
||||
once. Updates are diff-based:
|
||||
- Locked fields in Kavita are never overwritten.
|
||||
- List fields (tags, genres, characters, writers, …) are merged:
|
||||
new items are added, removed items are dropped.
|
||||
- Cover images are only re-uploaded when MangaBaka's cover URL
|
||||
actually changed.
|
||||
- Characters and authors are synced to Kavita Person records
|
||||
(image, description, MAL/AniList id) via Kavita's `/api/Person`
|
||||
endpoints.
|
||||
- MangaBaka relationships (sequel / prequel / spin-off / …) are
|
||||
mirrored as Kavita series relationships, and every related
|
||||
series that exists in Kavita is added to a shared collection.
|
||||
|
||||
## Environment
|
||||
|
||||
| Variable | Default | Description |
|
||||
| ------------------ | ------------------------- | -------------------------------------------------------- |
|
||||
| `KAVITA_URL` | — | Base URL of the Kavita server, e.g. `http://kavita:5000` |
|
||||
| `KAVITA_API_KEY` | — | API key from Kavita user settings |
|
||||
| `LIBRARY_IDS` | _(empty)_ | Default libraries (CSV of ids). Empty = pick in WebUI. |
|
||||
| `LANGUAGE` | `en` | Series language ISO code (used for `language` field) |
|
||||
| `REQUEST_TIMEOUT` | `30` | HTTP timeout in seconds |
|
||||
| `MATCH_PATH` | `/config/matches.json` | Where to persist the match cache |
|
||||
| `WEB_HOST` | `0.0.0.0` | Bind host for the Flask UI |
|
||||
| `WEB_PORT` | `8080` | Bind port for the Flask UI |
|
||||
|
||||
## Running locally
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
KAVITA_URL=http://localhost:5000 KAVITA_API_KEY=... python main.py
|
||||
```
|
||||
|
||||
Then open <http://localhost:8080/>.
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
```
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
services:
|
||||
kavita-lightnovel-metadata-fetcher:
|
||||
image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest
|
||||
container_name: kavita-lightnovel-metadata-fetcher
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
KAVITA_URL: "${KAVITA_URL}"
|
||||
KAVITA_API_KEY: "${KAVITA_API_KEY}"
|
||||
LIBRARY_IDS: "${LIBRARY_IDS}"
|
||||
LANGUAGE: "${LANGUAGE:-en}"
|
||||
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
|
||||
WEB_PORT: "${WEB_PORT:-8080}"
|
||||
ports:
|
||||
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
|
||||
volumes:
|
||||
- "${HOST_CONFIG_PATH}:/config"
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
main.py
|
||||
=======
|
||||
|
||||
Container entry point for the Kavita light-novel metadata fetcher.
|
||||
|
||||
Reads configuration from environment variables, starts the orchestrator
|
||||
and exposes the Flask WebApp on WEB_HOST:WEB_PORT. Everything happens
|
||||
through HTTP — there is no folder watcher and no file mover (Kavita is
|
||||
the source of truth for the library content; this service only writes
|
||||
metadata back to it).
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
Required:
|
||||
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
|
||||
KAVITA_API_KEY Kavita API key (Settings -> User -> API key)
|
||||
|
||||
Optional:
|
||||
LIBRARY_IDS comma-separated default library ids (e.g. "3,5").
|
||||
Empty = user picks in the WebUI each time.
|
||||
LANGUAGE default "en"
|
||||
REQUEST_TIMEOUT default 30
|
||||
MATCH_PATH default /config/matches.json
|
||||
WEB_PORT default 8080
|
||||
WEB_HOST default 0.0.0.0
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Make src/ importable when running as `python main.py`.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
|
||||
|
||||
from src.MatchesCache import MatchesCache # noqa: E402
|
||||
from src.LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
||||
from src.MatchesWebApp import MatchesWebApp # noqa: E402
|
||||
|
||||
|
||||
def _env_str(name: str, default: "str | None" = None,
|
||||
required: bool = False) -> "str | None":
|
||||
value = os.environ.get(name, default)
|
||||
if required and not value:
|
||||
print(f"[main] missing required env var: {name}", flush=True)
|
||||
sys.exit(2)
|
||||
return value
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.environ.get(name)
|
||||
if raw is None or raw == "":
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
print(f"[main] {name}={raw!r} is not a valid integer; "
|
||||
f"falling back to {default}", flush=True)
|
||||
return default
|
||||
|
||||
|
||||
def _env_int_list(name: str) -> list[int]:
|
||||
raw = os.environ.get(name) or ""
|
||||
out: list[int] = []
|
||||
for part in raw.split(","):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
try:
|
||||
out.append(int(part))
|
||||
except ValueError:
|
||||
print(f"[main] {name}: ignoring non-integer value {part!r}",
|
||||
flush=True)
|
||||
return out
|
||||
|
||||
|
||||
def main() -> int:
|
||||
kavita_url = _env_str("KAVITA_URL", required=True)
|
||||
kavita_api_key = _env_str("KAVITA_API_KEY", required=True)
|
||||
language = _env_str("LANGUAGE", "en") or "en"
|
||||
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
|
||||
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||
web_port = _env_int("WEB_PORT", 8080)
|
||||
library_ids = _env_int_list("LIBRARY_IDS")
|
||||
|
||||
print(f"[main] kavita url = {kavita_url}", flush=True)
|
||||
print(f"[main] language = {language}", flush=True)
|
||||
print(f"[main] match path = {match_path}", flush=True)
|
||||
print(f"[main] libraries = {library_ids or '(picked in WebUI)'}",
|
||||
flush=True)
|
||||
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
||||
|
||||
cache = MatchesCache(match_path)
|
||||
orchestrator = LightNovelOrchestrator(
|
||||
kavita_url=kavita_url,
|
||||
kavita_api_key=kavita_api_key,
|
||||
matches_cache=cache,
|
||||
language=language,
|
||||
request_timeout=request_timeout,
|
||||
)
|
||||
|
||||
app = MatchesWebApp(
|
||||
cache, orchestrator=orchestrator,
|
||||
default_library_ids=library_ids,
|
||||
host=web_host, port=web_port,
|
||||
)
|
||||
app.start()
|
||||
app.wait()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,3 @@
|
||||
requests>=2.31
|
||||
Flask>=3.0
|
||||
python-dotenv>=1.0
|
||||
@@ -0,0 +1,507 @@
|
||||
"""
|
||||
anilist_resolver.py
|
||||
===================
|
||||
|
||||
Fetches and caches AniList manga metadata (statistics, characters, staff)
|
||||
using the public AniList GraphQL API.
|
||||
|
||||
AniList API: https://graphql.anilist.co (no authentication required)
|
||||
Rate limit: 90 req/min -> a 700 ms guard between calls is applied.
|
||||
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
|
||||
honoured; the request is retried once automatically.
|
||||
|
||||
Singleton
|
||||
---------
|
||||
Only one instance of this class exists per process. Subsequent calls to
|
||||
AniListResolver() return the same object with its warm caches intact.
|
||||
|
||||
Provided features
|
||||
-----------------
|
||||
- Title-based AniList ID lookup with best-match scoring
|
||||
- Manga statistics: score (0–10), rank, popularity, members, favorites
|
||||
- Character list for a manga (names only — for <Characters> XML tag)
|
||||
- Detailed character list: name, AniList character ID, image URL, role
|
||||
- Detailed staff list: name, AniList person ID, image URL, positions
|
||||
- Lazy full-detail fetches per character / person (for descriptions)
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import difflib
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from MediaResolver import MediaResolver
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# GraphQL query strings
|
||||
# --------------------------------------------------------------------------
|
||||
_SEARCH_MANGA = """
|
||||
query ($search: String) {
|
||||
Page(page: 1, perPage: 5) {
|
||||
media(search: $search, type: MANGA, format_in: [NOVEL]) {
|
||||
id title { romaji english native } siteUrl
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_STATS = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
id title { romaji english native }
|
||||
meanScore popularity favourites
|
||||
rankings { rank type allTime }
|
||||
siteUrl
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_CHARACTERS = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
characters(sort: [ROLE, RELEVANCE], perPage: 25) {
|
||||
nodes { id name { full } image { large } siteUrl }
|
||||
edges { role }
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_MANGA_STAFF = """
|
||||
query ($id: Int) {
|
||||
Media(id: $id, type: MANGA) {
|
||||
staff(perPage: 25) {
|
||||
nodes { id name { full } image { large } siteUrl }
|
||||
edges { role }
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_CHARACTER_DETAILS = """
|
||||
query ($id: Int) {
|
||||
Character(id: $id) {
|
||||
id name { full } image { large }
|
||||
description(asHtml: false)
|
||||
favourites siteUrl
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_PERSON_DETAILS = """
|
||||
query ($id: Int) {
|
||||
Staff(id: $id) {
|
||||
id name { full native } image { large }
|
||||
description(asHtml: false)
|
||||
favourites siteUrl
|
||||
dateOfBirth { year month day }
|
||||
primaryOccupations
|
||||
homeTown
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
_ANILIST_GQL = "https://graphql.anilist.co"
|
||||
|
||||
|
||||
class AniListResolver(MediaResolver):
|
||||
"""
|
||||
Singleton: fetches and caches AniList manga data via GraphQL API.
|
||||
|
||||
The first call to AniListResolver() creates and initialises the instance;
|
||||
all subsequent calls return the same object.
|
||||
"""
|
||||
|
||||
_instance: "AniListResolver | None" = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Singleton machinery
|
||||
# ------------------------------------------------------------------
|
||||
def __new__(cls, **kwargs):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, *, request_timeout: int = 30):
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.request_timeout = request_timeout
|
||||
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update({
|
||||
"User-Agent": "AniListResolver/1.0",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
})
|
||||
|
||||
# title_lower -> al_id
|
||||
self._id_cache: dict[str, "int | None"] = {}
|
||||
# al_id -> stats dict
|
||||
self._stats_cache: dict[int, dict] = {}
|
||||
# manga_al_id -> [name_str, ...]
|
||||
self._char_names_cache: dict[int, list[str]] = {}
|
||||
# manga_al_id -> [{al_id, name, image_url, role}]
|
||||
self._char_detailed_cache: dict[int, list[dict]] = {}
|
||||
# manga_al_id -> [{al_id, name, image_url, positions}]
|
||||
self._staff_detailed_cache: dict[int, list[dict]] = {}
|
||||
# char_al_id -> {al_id, name, image_url, about, favorites, url}
|
||||
self._char_info_cache: dict[int, dict] = {}
|
||||
# person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
|
||||
self._person_info_cache: dict[int, dict] = {}
|
||||
|
||||
self._last_request_at: float = 0.0
|
||||
self._initialized = True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: ID lookup
|
||||
# ------------------------------------------------------------------
|
||||
def find_id(self, title: str) -> "int | None":
|
||||
"""
|
||||
Searches AniList for a manga by title and returns the best-matching
|
||||
AniList ID. Returns None on failure or when no result is found.
|
||||
"""
|
||||
if not title or not title.strip():
|
||||
return None
|
||||
|
||||
key = title.strip().lower()
|
||||
if key in self._id_cache:
|
||||
return self._id_cache[key]
|
||||
|
||||
try:
|
||||
data = self._gql(_SEARCH_MANGA, {"search": title})
|
||||
results = ((data.get("data") or {})
|
||||
.get("Page", {})
|
||||
.get("media") or [])
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
if not results:
|
||||
self._id_cache[key] = None
|
||||
return None
|
||||
|
||||
results.sort(key=lambda e: _score_title(title, e), reverse=True)
|
||||
al_id = results[0].get("id")
|
||||
self._id_cache[key] = al_id
|
||||
return al_id
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: statistics
|
||||
# ------------------------------------------------------------------
|
||||
def get_stats(self, tracker_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns a statistics dict for the given AniList manga ID:
|
||||
|
||||
{score, rank, scored_by, popularity, members, favorites,
|
||||
url, title, as_of (DD-MM-YYYY)}
|
||||
|
||||
Returns None if tracker_id is None or on network failure.
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return None
|
||||
if tracker_id in self._stats_cache:
|
||||
return self._stats_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_STATS, {"id": tracker_id})
|
||||
entry = (data.get("data") or {}).get("Media") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
title_obj = entry.get("title") or {}
|
||||
title = (title_obj.get("romaji")
|
||||
or title_obj.get("english")
|
||||
or title_obj.get("native") or "")
|
||||
|
||||
# AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency
|
||||
# with the MALResolver stats dict shape.
|
||||
raw_score = entry.get("meanScore")
|
||||
score = round(raw_score / 10, 1) if raw_score is not None else None
|
||||
|
||||
# Ranked and popularity ranks are in the rankings array.
|
||||
rated_rank = None
|
||||
popular_rank = None
|
||||
for r in (entry.get("rankings") or []):
|
||||
if r.get("allTime"):
|
||||
if r.get("type") == "RATED" and rated_rank is None:
|
||||
rated_rank = r.get("rank")
|
||||
if r.get("type") == "POPULAR" and popular_rank is None:
|
||||
popular_rank = r.get("rank")
|
||||
|
||||
stats: dict = {
|
||||
"score": score,
|
||||
"rank": rated_rank,
|
||||
"scored_by": None, # not exposed by AniList API
|
||||
"popularity": popular_rank,
|
||||
"members": entry.get("popularity"), # AniList's popularity = member count
|
||||
"favorites": entry.get("favourites"),
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
|
||||
"title": title,
|
||||
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
|
||||
}
|
||||
self._stats_cache[tracker_id] = stats
|
||||
return stats
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: character names (for ComicInfo <Characters> tag)
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters(self, tracker_id: "int | None") -> list[str]:
|
||||
"""Returns a flat list of character names for the manga."""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._char_names_cache:
|
||||
return self._char_names_cache[tracker_id]
|
||||
|
||||
detailed = self.get_characters_detailed(tracker_id)
|
||||
names = [e["name"] for e in detailed if e.get("name")]
|
||||
if names:
|
||||
self._char_names_cache[tracker_id] = names
|
||||
return names
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed character data
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed character entries for a manga:
|
||||
[{al_id, mal_id, name, image_url, role, about=None}, ...]
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._char_detailed_cache:
|
||||
return self._char_detailed_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
|
||||
chars = ((data.get("data") or {})
|
||||
.get("Media", {})
|
||||
.get("characters") or {})
|
||||
nodes = chars.get("nodes") or []
|
||||
edges = chars.get("edges") or []
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for node, edge in zip(nodes, edges):
|
||||
name = (node.get("name") or {}).get("full") or ""
|
||||
if not name:
|
||||
continue
|
||||
results.append({
|
||||
"al_id": node.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name,
|
||||
"raw_name": name,
|
||||
"image_url": (node.get("image") or {}).get("large"),
|
||||
"role": edge.get("role") or "SUPPORTING",
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._char_detailed_cache[tracker_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed staff data
|
||||
# ------------------------------------------------------------------
|
||||
def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed staff entries for a manga:
|
||||
[{al_id, mal_id, name, image_url, positions, about=None}, ...]
|
||||
"""
|
||||
if tracker_id is None:
|
||||
return []
|
||||
if tracker_id in self._staff_detailed_cache:
|
||||
return self._staff_detailed_cache[tracker_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_MANGA_STAFF, {"id": tracker_id})
|
||||
staff = ((data.get("data") or {})
|
||||
.get("Media", {})
|
||||
.get("staff") or {})
|
||||
nodes = staff.get("nodes") or []
|
||||
edges = staff.get("edges") or []
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for node, edge in zip(nodes, edges):
|
||||
name = (node.get("name") or {}).get("full") or ""
|
||||
if not name:
|
||||
continue
|
||||
results.append({
|
||||
"al_id": node.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name,
|
||||
"raw_name": name,
|
||||
"image_url": (node.get("image") or {}).get("large"),
|
||||
"positions": [edge.get("role")] if edge.get("role") else [],
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._staff_detailed_cache[tracker_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: individual character / person details
|
||||
# ------------------------------------------------------------------
|
||||
def get_character_details(self, char_id: "int | None") -> "dict | None":
|
||||
"""Returns full details for a single AniList character."""
|
||||
if char_id is None:
|
||||
return None
|
||||
if char_id in self._char_info_cache:
|
||||
return self._char_info_cache[char_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
|
||||
entry = (data.get("data") or {}).get("Character") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
result = {
|
||||
"al_id": entry.get("id"),
|
||||
"mal_id": None,
|
||||
"name": (entry.get("name") or {}).get("full") or "",
|
||||
"image_url": (entry.get("image") or {}).get("large"),
|
||||
"about": entry.get("description"),
|
||||
"favorites": entry.get("favourites"),
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
|
||||
}
|
||||
self._char_info_cache[char_id] = result
|
||||
return result
|
||||
|
||||
def get_person_details(self, person_id: "int | None") -> "dict | None":
|
||||
"""Returns full details for a single AniList staff person."""
|
||||
if person_id is None:
|
||||
return None
|
||||
if person_id in self._person_info_cache:
|
||||
return self._person_info_cache[person_id]
|
||||
|
||||
try:
|
||||
data = self._gql(_PERSON_DETAILS, {"id": person_id})
|
||||
entry = (data.get("data") or {}).get("Staff") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
# dateOfBirth: {year, month, day} → ISO string for _format_birthday
|
||||
dob = entry.get("dateOfBirth") or {}
|
||||
birthday: "str | None" = None
|
||||
if dob.get("year"):
|
||||
m = dob.get("month") or 1
|
||||
d = dob.get("day") or 1
|
||||
birthday = f"{dob['year']}-{m:02d}-{d:02d}"
|
||||
|
||||
name_obj = entry.get("name") or {}
|
||||
result = {
|
||||
"al_id": entry.get("id"),
|
||||
"mal_id": None,
|
||||
"name": name_obj.get("full") or "",
|
||||
"given_name": None, # AniList does not break names into given/family
|
||||
"family_name": None,
|
||||
"birthday": birthday,
|
||||
"image_url": (entry.get("image") or {}).get("large"),
|
||||
"about": entry.get("description"),
|
||||
"favorites": entry.get("favourites"),
|
||||
"website_url": None, # not exposed by AniList public API
|
||||
"url": entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
|
||||
}
|
||||
self._person_info_cache[person_id] = result
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: cache management
|
||||
# ------------------------------------------------------------------
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears all internal caches (the Singleton instance is retained)."""
|
||||
self._id_cache.clear()
|
||||
self._stats_cache.clear()
|
||||
self._char_names_cache.clear()
|
||||
self._char_detailed_cache.clear()
|
||||
self._staff_detailed_cache.clear()
|
||||
self._char_info_cache.clear()
|
||||
self._person_info_cache.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: rate-limited GraphQL POST
|
||||
# ------------------------------------------------------------------
|
||||
def _gql(self, query: str, variables: "dict | None" = None) -> dict:
|
||||
"""
|
||||
Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).
|
||||
|
||||
On HTTP 429 the Retry-After header is honoured and the request is
|
||||
retried once.
|
||||
"""
|
||||
elapsed = time.monotonic() - self._last_request_at
|
||||
if elapsed < 0.7:
|
||||
time.sleep(0.7 - elapsed)
|
||||
|
||||
payload: dict = {"query": query}
|
||||
if variables:
|
||||
payload["variables"] = variables
|
||||
|
||||
resp = self._session.post(
|
||||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||||
self._last_request_at = time.monotonic()
|
||||
|
||||
if resp.status_code == 429:
|
||||
retry_after = int(resp.headers.get("Retry-After", 60))
|
||||
time.sleep(retry_after)
|
||||
resp = self._session.post(
|
||||
_ANILIST_GQL, json=payload, timeout=self.request_timeout)
|
||||
self._last_request_at = time.monotonic()
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helpers
|
||||
# --------------------------------------------------------------------------
|
||||
def _score_title(query: str, entry: dict) -> float:
|
||||
"""Returns the best title-similarity score for an AniList media entry."""
|
||||
title_obj = entry.get("title") or {}
|
||||
candidates = [
|
||||
title_obj.get("romaji") or "",
|
||||
title_obj.get("english") or "",
|
||||
title_obj.get("native") or "",
|
||||
]
|
||||
best = 0.0
|
||||
q = query.lower()
|
||||
for t in candidates:
|
||||
if t:
|
||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
||||
best = max(best, ratio)
|
||||
return best
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
r1 = AniListResolver()
|
||||
r2 = AniListResolver()
|
||||
assert r1 is r2, "AniListResolver must be a Singleton"
|
||||
|
||||
al_id = r1.find_id("Yofukashi no Uta")
|
||||
print("AniList ID :", al_id)
|
||||
|
||||
stats = r1.get_stats(al_id)
|
||||
if stats:
|
||||
print("Score :", stats["score"])
|
||||
print("Rank :", stats["rank"])
|
||||
print("Members :", stats["members"])
|
||||
|
||||
chars = r1.get_characters_detailed(al_id)
|
||||
print("Characters (first 3):", [c["name"] for c in chars[:3]])
|
||||
|
||||
staff = r1.get_staff_detailed(al_id)
|
||||
print("Staff :", [s["name"] for s in staff])
|
||||
@@ -0,0 +1,229 @@
|
||||
"""
|
||||
kavita_client.py
|
||||
================
|
||||
|
||||
Thin HTTP client for the Kavita server REST API (v0.9.x).
|
||||
|
||||
Authenticates via the ``x-api-key`` header. All series / library /
|
||||
collection / metadata reads and writes used by the light-novel updater
|
||||
go through this single client so request shaping (paging, content types,
|
||||
timeouts, retries) is consistent.
|
||||
|
||||
The class is intentionally state-light: no caching layer, just one
|
||||
``requests.Session``. Higher-level diff / update logic lives in
|
||||
KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Iterable
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class KavitaClient:
|
||||
def __init__(self, base_url: str, api_key: str, *,
|
||||
request_timeout: int = 30):
|
||||
self._base = base_url.rstrip("/")
|
||||
self._timeout = request_timeout
|
||||
|
||||
# API session: sends + receives JSON.
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update({
|
||||
"x-api-key": api_key,
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
})
|
||||
|
||||
# Plain session for downloading external images (covers). Must NOT
|
||||
# carry the API headers — some CDNs refuse to return image bytes
|
||||
# when the client sends Accept: application/json.
|
||||
self._image_session = requests.Session()
|
||||
self._image_session.headers.update({
|
||||
"User-Agent": "KavitaLightNovelUpdater/1.0",
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Libraries
|
||||
# ------------------------------------------------------------------
|
||||
def list_libraries(self) -> list[dict]:
|
||||
"""Returns all libraries the authenticated user can access."""
|
||||
r = self._session.get(f"{self._base}/api/Library/libraries",
|
||||
timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
return r.json() or []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series
|
||||
# ------------------------------------------------------------------
|
||||
def list_series_in_library(self, library_id: int, *,
|
||||
page_size: int = 200) -> list[dict]:
|
||||
"""
|
||||
Returns all SeriesDto entries in the given library.
|
||||
|
||||
Uses POST /api/Series/all-v2 with a FilterV2 that scopes by
|
||||
library id. Pages through until an empty page is returned.
|
||||
"""
|
||||
results: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
body = {
|
||||
"statements": [
|
||||
{
|
||||
"comparison": 0, # Equal
|
||||
"field": 19, # Libraries field id (Kavita v0.9.x)
|
||||
"value": str(library_id),
|
||||
}
|
||||
],
|
||||
"combination": 1, # And
|
||||
"sortOptions": {"isAscending": True, "sortField": 1},
|
||||
"limitTo": 0,
|
||||
}
|
||||
r = self._session.post(
|
||||
f"{self._base}/api/Series/all-v2",
|
||||
params={"PageNumber": page, "PageSize": page_size},
|
||||
json=body, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
chunk = r.json() or []
|
||||
if not chunk:
|
||||
break
|
||||
results.extend(chunk)
|
||||
if len(chunk) < page_size:
|
||||
break
|
||||
page += 1
|
||||
return results
|
||||
|
||||
def get_series(self, series_id: int) -> dict:
|
||||
"""Returns the SeriesDto for the given series id."""
|
||||
r = self._session.get(f"{self._base}/api/Series/{series_id}",
|
||||
timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
return r.json() or {}
|
||||
|
||||
def update_series(self, series: dict) -> None:
|
||||
"""Updates the Series-level data (name, sortName, malId, …)."""
|
||||
r = self._session.post(f"{self._base}/api/Series/update",
|
||||
json=series, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series metadata
|
||||
# ------------------------------------------------------------------
|
||||
def get_series_metadata(self, series_id: int) -> dict:
|
||||
"""Returns the SeriesMetadataDto for a series."""
|
||||
r = self._session.get(
|
||||
f"{self._base}/api/Series/metadata",
|
||||
params={"seriesId": series_id}, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
return r.json() or {}
|
||||
|
||||
def update_series_metadata(self, metadata: dict) -> None:
|
||||
"""
|
||||
Writes a SeriesMetadataDto back to Kavita.
|
||||
|
||||
Kavita expects the payload wrapped: {seriesMetadata: {...}}.
|
||||
"""
|
||||
r = self._session.post(
|
||||
f"{self._base}/api/Series/metadata",
|
||||
json={"seriesMetadata": metadata},
|
||||
timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Related series
|
||||
# ------------------------------------------------------------------
|
||||
def get_related(self, series_id: int) -> dict:
|
||||
"""Returns all related series grouped by relation type."""
|
||||
r = self._session.get(
|
||||
f"{self._base}/api/Series/all-related",
|
||||
params={"seriesId": series_id}, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
return r.json() or {}
|
||||
|
||||
def update_related(self, payload: dict) -> None:
|
||||
"""
|
||||
Sets the related-series relationships for a series.
|
||||
|
||||
Payload shape (UpdateRelatedSeriesDto):
|
||||
{seriesId, prequels, sequels, sideStories, spinOffs,
|
||||
adaptations, characters, contains, others,
|
||||
alternativeSettings, alternativeVersions, doujinshis,
|
||||
editions, annuals}
|
||||
Each *_ids list contains target series ids (ints).
|
||||
"""
|
||||
r = self._session.post(
|
||||
f"{self._base}/api/Series/update-related",
|
||||
json=payload, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Collections
|
||||
# ------------------------------------------------------------------
|
||||
def list_collections(self) -> list[dict]:
|
||||
"""Returns all collection tags visible to the authenticated user."""
|
||||
r = self._session.get(
|
||||
f"{self._base}/api/Collection",
|
||||
params={"ownedOnly": "false", "sortByLastModified": "false"},
|
||||
timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
return r.json() or []
|
||||
|
||||
def add_series_to_collection(self, *, collection_id: int,
|
||||
title: str,
|
||||
series_ids: Iterable[int]) -> dict:
|
||||
"""
|
||||
Adds (or creates) a collection and attaches series to it.
|
||||
|
||||
Pass collection_id=0 to create a new collection named `title`.
|
||||
For an existing collection set collection_id to its id (title is
|
||||
still required by the API but acts as no-op when the id matches).
|
||||
"""
|
||||
body = {
|
||||
"collectionTagId": int(collection_id),
|
||||
"collectionTagTitle": title,
|
||||
"seriesIds": [int(s) for s in series_ids],
|
||||
}
|
||||
r = self._session.post(
|
||||
f"{self._base}/api/Collection/update-for-series",
|
||||
json=body, timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
try:
|
||||
return r.json() or {}
|
||||
except ValueError:
|
||||
return {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series cover upload
|
||||
# ------------------------------------------------------------------
|
||||
def upload_series_cover(self, series_id: int, image_url: str, *,
|
||||
lock: bool = False) -> None:
|
||||
"""
|
||||
Downloads an external image and uploads it as the series cover.
|
||||
|
||||
Mirrors the cover-upload trick used in KavitaPersonUpdater:
|
||||
Kavita's `/api/Upload/series` accepts a raw base64 blob (no
|
||||
``data:`` prefix) in the ``url`` field.
|
||||
"""
|
||||
img = self._image_session.get(image_url, timeout=self._timeout)
|
||||
img.raise_for_status()
|
||||
b64 = base64.b64encode(img.content).decode()
|
||||
r = self._session.post(
|
||||
f"{self._base}/api/Upload/series",
|
||||
json={"id": series_id, "url": b64, "lockCover": lock},
|
||||
timeout=self._timeout)
|
||||
r.raise_for_status()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Generic GET helper (used by callers that need a response object)
|
||||
# ------------------------------------------------------------------
|
||||
def get(self, path: str, params: "dict | None" = None) -> requests.Response:
|
||||
return self._session.get(f"{self._base}{path}",
|
||||
params=params, timeout=self._timeout)
|
||||
|
||||
def post(self, path: str, *,
|
||||
json: "dict | list | None" = None,
|
||||
params: "dict | None" = None) -> requests.Response:
|
||||
return self._session.post(f"{self._base}{path}",
|
||||
json=json, params=params,
|
||||
timeout=self._timeout)
|
||||
@@ -0,0 +1,545 @@
|
||||
"""
|
||||
kavita_person_updater.py
|
||||
========================
|
||||
|
||||
Synchronises Kavita person / character records with MyAnimeList data.
|
||||
|
||||
For every character and staff member that MAL knows about for a given manga
|
||||
the updater:
|
||||
1. Searches Kavita for a matching Person record (by name similarity /
|
||||
alias match, configurable threshold).
|
||||
2. Sets the MAL ID on the Kavita person if it is not yet linked.
|
||||
3. Uploads the MAL profile image when the cover is not locked and has
|
||||
not been set in a previous sync run.
|
||||
4. Populates the description field when Kavita has none and MAL provides
|
||||
an 'about' text (requires an extra Jikan request per character; only
|
||||
performed when update_descriptions=True).
|
||||
|
||||
Kavita API version
|
||||
------------------
|
||||
Tested against Kavita 0.9.0.2.
|
||||
|
||||
Authentication
|
||||
--------------
|
||||
Uses the `x-api-key` header (API key from Kavita user settings).
|
||||
No JWT login is required.
|
||||
|
||||
Relevant endpoints (Kavita 0.9.0.2)
|
||||
-------------------------------------
|
||||
GET /api/Person/search find persons by name / alias
|
||||
POST /api/Person/update write metadata (malId, description, …)
|
||||
POST /api/Upload/person set cover image (base64 data URI)
|
||||
POST /api/Upload/upload-by-url download an external URL to temp storage
|
||||
(used as an alternative upload path)
|
||||
|
||||
Cover upload flow
|
||||
-----------------
|
||||
The image is downloaded locally, base64-encoded, and sent as a data URI
|
||||
to POST /api/Upload/person. This is more reliable than the
|
||||
upload-by-url → upload/person two-step because it avoids Kavita's temp
|
||||
file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import difflib
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from MALResolver import MALResolver
|
||||
from AniListResolver import AniListResolver
|
||||
|
||||
|
||||
class KavitaPersonUpdater:
|
||||
"""
|
||||
Syncs Kavita Person records with MyAnimeList data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
|
||||
api_key : Kavita API key (Settings → User → API key)
|
||||
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
||||
request_timeout : HTTP timeout in seconds for both Kavita and image requests
|
||||
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
||||
Kavita person as a match for a MAL name. Default 0.80.
|
||||
"""
|
||||
|
||||
def __init__(self, kavita_base_url: str, api_key: str, *,
|
||||
mal_resolver: "MALResolver | None" = None,
|
||||
al_resolver: "AniListResolver | None" = None,
|
||||
request_timeout: int = 30,
|
||||
min_name_score: float = 0.80):
|
||||
self._base = kavita_base_url.rstrip("/")
|
||||
self._timeout = request_timeout
|
||||
self._min_score = min_name_score
|
||||
self._mal = mal_resolver or MALResolver()
|
||||
self._al = al_resolver or AniListResolver()
|
||||
|
||||
# Session used for Kavita API calls.
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update({
|
||||
"x-api-key": api_key,
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
})
|
||||
|
||||
# Plain session used to download external images (MAL CDN etc.).
|
||||
# Must NOT carry the Kavita API headers — Accept: application/json
|
||||
# would prevent MAL CDN from returning the image bytes.
|
||||
self._image_session = requests.Session()
|
||||
self._image_session.headers.update({
|
||||
"User-Agent": "KavitaPersonUpdater/1.0",
|
||||
})
|
||||
|
||||
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
||||
self._person_search_cache: dict[str, list[dict]] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: combined update
|
||||
# ------------------------------------------------------------------
|
||||
def update_for_manga(self, mal_manga_id: "int | None", *,
|
||||
al_manga_id: "int | None" = None,
|
||||
update_covers: bool = True,
|
||||
update_descriptions: bool = True) -> dict:
|
||||
"""
|
||||
Runs a full update pass for both characters and staff of the manga.
|
||||
MAL is tried first; AniList is used as fallback when MAL returns nothing.
|
||||
|
||||
Returns
|
||||
-------
|
||||
{
|
||||
"characters": {"updated": n, "skipped": n, "not_found": n},
|
||||
"staff": {"updated": n, "skipped": n, "not_found": n},
|
||||
}
|
||||
"""
|
||||
return {
|
||||
"characters": self.update_characters(
|
||||
mal_manga_id, al_manga_id=al_manga_id,
|
||||
update_covers=update_covers,
|
||||
update_descriptions=update_descriptions),
|
||||
"staff": self.update_staff(
|
||||
mal_manga_id, al_manga_id=al_manga_id,
|
||||
update_covers=update_covers,
|
||||
update_descriptions=update_descriptions),
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: character update
|
||||
# ------------------------------------------------------------------
|
||||
def update_characters(self, mal_manga_id: "int | None", *,
|
||||
al_manga_id: "int | None" = None,
|
||||
update_covers: bool = True,
|
||||
update_descriptions: bool = True) -> dict:
|
||||
"""
|
||||
Updates Kavita persons that match MAL/AniList characters for the manga.
|
||||
MAL is tried first; AniList is the fallback when MAL returns nothing.
|
||||
|
||||
Returns {"updated": n, "skipped": n, "not_found": n}.
|
||||
"""
|
||||
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
|
||||
resolver = self._mal
|
||||
if not entries and al_manga_id:
|
||||
entries = self._al.get_characters_detailed(al_manga_id)
|
||||
resolver = self._al
|
||||
return self._sync_entries(entries, "character", resolver,
|
||||
update_covers=update_covers,
|
||||
update_descriptions=update_descriptions)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: staff update
|
||||
# ------------------------------------------------------------------
|
||||
def update_staff(self, mal_manga_id: "int | None", *,
|
||||
al_manga_id: "int | None" = None,
|
||||
update_covers: bool = True,
|
||||
update_descriptions: bool = True) -> dict:
|
||||
"""
|
||||
Updates Kavita persons that match MAL/AniList staff for the manga.
|
||||
MAL is tried first; AniList is the fallback when MAL returns nothing.
|
||||
|
||||
Returns {"updated": n, "skipped": n, "not_found": n}.
|
||||
"""
|
||||
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
|
||||
resolver = self._mal
|
||||
if not entries and al_manga_id:
|
||||
entries = self._al.get_staff_detailed(al_manga_id)
|
||||
resolver = self._al
|
||||
return self._sync_entries(entries, "staff", resolver,
|
||||
update_covers=update_covers,
|
||||
update_descriptions=update_descriptions)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: cache management
|
||||
# ------------------------------------------------------------------
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears the Kavita person search cache."""
|
||||
self._person_search_cache.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: main sync loop
|
||||
# ------------------------------------------------------------------
|
||||
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
|
||||
update_covers: bool,
|
||||
update_descriptions: bool) -> dict:
|
||||
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
|
||||
"errors": []}
|
||||
for entry in entries:
|
||||
name = (entry.get("name") or "").strip()
|
||||
raw_name = (entry.get("raw_name") or "").strip()
|
||||
if not name and not raw_name:
|
||||
continue
|
||||
|
||||
# Search by the cleaned (XML-safe) name first; if Kavita stores
|
||||
# the legacy comma form, retry with the raw MAL name.
|
||||
matches = self._find_kavita_person(name) if name else []
|
||||
if not matches and raw_name and raw_name != name:
|
||||
matches = self._find_kavita_person(raw_name)
|
||||
|
||||
if not matches:
|
||||
result["not_found"] += 1
|
||||
continue
|
||||
|
||||
changed = self._apply_mal_data(
|
||||
matches[0], entry, kind, resolver,
|
||||
update_cover=update_covers,
|
||||
update_desc=update_descriptions,
|
||||
errors=result["errors"])
|
||||
result["updated" if changed else "skipped"] += 1
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: Kavita person search
|
||||
# ------------------------------------------------------------------
|
||||
def _find_kavita_person(self, name: str) -> list[dict]:
|
||||
"""
|
||||
Searches Kavita for persons matching `name`.
|
||||
|
||||
Checks both the main name and any stored aliases.
|
||||
Returns persons sorted by similarity, filtered by min_name_score.
|
||||
Results are cached per (normalised) query name.
|
||||
"""
|
||||
key = name.lower().strip()
|
||||
if key in self._person_search_cache:
|
||||
return self._person_search_cache[key]
|
||||
|
||||
try:
|
||||
resp = self._session.get(
|
||||
f"{self._base}/api/Person/search",
|
||||
params={"queryString": name},
|
||||
timeout=self._timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
persons: list[dict] = resp.json() or []
|
||||
except requests.RequestException:
|
||||
self._person_search_cache[key] = []
|
||||
return []
|
||||
|
||||
def score(p: dict) -> float:
|
||||
candidates = [p.get("name") or ""]
|
||||
candidates += [a for a in (p.get("aliases") or []) if a]
|
||||
best = 0.0
|
||||
q = key
|
||||
for c in candidates:
|
||||
r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
|
||||
best = max(best, r)
|
||||
return best
|
||||
|
||||
ranked = sorted(persons, key=score, reverse=True)
|
||||
filtered = [p for p in ranked if score(p) >= self._min_score]
|
||||
self._person_search_cache[key] = filtered
|
||||
return filtered
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: apply MAL data to a single Kavita person
|
||||
# ------------------------------------------------------------------
|
||||
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
|
||||
resolver, *,
|
||||
update_cover: bool, update_desc: bool,
|
||||
errors: "list | None" = None) -> bool:
|
||||
"""
|
||||
Applies tracker data (MAL or AniList) to one Kavita person record.
|
||||
|
||||
Fields updated
|
||||
--------------
|
||||
- malId : set when the entry carries a MAL ID and it differs
|
||||
- aniListId : set when the entry carries an AniList ID and it differs
|
||||
- description: set when empty and the tracker provides a description
|
||||
- cover image: uploaded when not locked and no prior sync cover exists
|
||||
|
||||
Returns True if any change was made. Failures are appended to the
|
||||
`errors` list (if provided) instead of being silently swallowed.
|
||||
"""
|
||||
person_id: "int | None" = person.get("id")
|
||||
if not person_id:
|
||||
return False
|
||||
|
||||
person_name = person.get("name") or ""
|
||||
|
||||
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
|
||||
mal_id: "int | None" = mal_entry.get("mal_id")
|
||||
al_id: "int | None" = mal_entry.get("al_id")
|
||||
entity_id = mal_id or al_id # used for resolver detail calls
|
||||
|
||||
current_mal_id: int = person.get("malId") or 0
|
||||
current_al_id: int = person.get("aniListId") or 0
|
||||
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
|
||||
needs_al_id = bool(al_id and current_al_id != al_id)
|
||||
|
||||
# ------ Lazy description fetch -----------------------------------
|
||||
description: "str | None" = None
|
||||
if update_desc and not (person.get("description") or "").strip():
|
||||
if entity_id:
|
||||
if kind == "character":
|
||||
details = resolver.get_character_details(entity_id)
|
||||
if details:
|
||||
description = _build_character_description(details) or None
|
||||
else:
|
||||
details = resolver.get_person_details(entity_id)
|
||||
if details:
|
||||
description = _build_person_description(details) or None
|
||||
|
||||
needs_desc = bool(description)
|
||||
|
||||
# ------ Metadata update ------------------------------------------
|
||||
changed = False
|
||||
if needs_mal_id or needs_al_id or needs_desc:
|
||||
payload: dict = {
|
||||
"id": person_id,
|
||||
"name": person_name,
|
||||
# MUST stay a boolean — the cover image itself is uploaded
|
||||
# separately via POST /api/Upload/person (below). Putting a
|
||||
# URL here makes Kavita reject the whole payload with HTTP 400.
|
||||
"coverImageLocked": bool(person.get("coverImageLocked", False)),
|
||||
"aliases": person.get("aliases") or [],
|
||||
"description": description or person.get("description"),
|
||||
"malId": mal_id if needs_mal_id else (current_mal_id or None),
|
||||
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
||||
}
|
||||
try:
|
||||
resp = self._session.post(
|
||||
f"{self._base}/api/Person/update",
|
||||
json=payload,
|
||||
timeout=self._timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
changed = True
|
||||
except requests.RequestException as e:
|
||||
if errors is not None:
|
||||
errors.append(
|
||||
f"Person/update failed for #{person_id} "
|
||||
f"'{person_name}': {e}")
|
||||
|
||||
# ------ Cover image upload ----------------------------------------
|
||||
# Upload whenever:
|
||||
# - caller requested cover updates
|
||||
# - cover is NOT locked (user did not manually pin it)
|
||||
# - we have not already uploaded this exact tracker entity's image
|
||||
# (i.e. the tracked ID differs OR there is no cover yet).
|
||||
if update_cover and not person.get("coverImageLocked"):
|
||||
image_url = mal_entry.get("image_url")
|
||||
already_uploaded = (
|
||||
entity_id is not None
|
||||
and (current_mal_id == mal_id or current_al_id == al_id)
|
||||
and bool(person.get("coverImage"))
|
||||
)
|
||||
if image_url and not already_uploaded:
|
||||
if self._upload_cover(person_id, image_url,
|
||||
person_name=person_name,
|
||||
errors=errors):
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: cover upload
|
||||
# ------------------------------------------------------------------
|
||||
def _upload_cover(self, person_id: int, image_url: str,
|
||||
lock: bool = False, *,
|
||||
person_name: str = "",
|
||||
errors: "list | None" = None) -> bool:
|
||||
"""
|
||||
Uploads a cover image to a Kavita person.
|
||||
|
||||
The image is downloaded with the plain (header-less) image session
|
||||
and posted to `POST /api/Upload/person` as a raw base64 string in
|
||||
the `url` field.
|
||||
|
||||
Notes on protocol quirks discovered against Kavita 0.9.0.2:
|
||||
- The two-step `upload-by-url` -> `Upload/person` flow returns
|
||||
"Unable to save cover image to Person" (HTTP 400).
|
||||
- A `data:image/jpeg;base64,...` data URI is rejected with the
|
||||
same error.
|
||||
- Only the raw base64 blob (no prefix) is accepted.
|
||||
"""
|
||||
label = (f"#{person_id} '{person_name}'"
|
||||
if person_name else f"#{person_id}")
|
||||
|
||||
# 1) Download the image with a clean session — the Kavita session's
|
||||
# `Accept: application/json` header makes some CDNs refuse to
|
||||
# return image bytes.
|
||||
try:
|
||||
img_resp = self._image_session.get(image_url,
|
||||
timeout=self._timeout)
|
||||
img_resp.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
if errors is not None:
|
||||
errors.append(
|
||||
f"image download failed for {label} ({image_url}): {e}")
|
||||
return False
|
||||
|
||||
b64 = base64.b64encode(img_resp.content).decode()
|
||||
|
||||
# 2) POST the raw base64 blob.
|
||||
try:
|
||||
resp = self._session.post(
|
||||
f"{self._base}/api/Upload/person",
|
||||
json={"id": person_id, "url": b64, "lockCover": lock},
|
||||
timeout=self._timeout,
|
||||
)
|
||||
if resp.status_code >= 400:
|
||||
if errors is not None:
|
||||
errors.append(
|
||||
f"Upload/person HTTP {resp.status_code} for {label}: "
|
||||
f"{_short_body(resp)}")
|
||||
return False
|
||||
return True
|
||||
except requests.RequestException as e:
|
||||
if errors is not None:
|
||||
errors.append(
|
||||
f"Upload/person failed for {label}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helpers: description builders
|
||||
# --------------------------------------------------------------------------
|
||||
def _plain_to_html(text: str) -> str:
|
||||
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
||||
if not text:
|
||||
return ""
|
||||
parts: list[str] = []
|
||||
for para in re.split(r"\n{2,}", text.strip()):
|
||||
para = para.strip()
|
||||
if para:
|
||||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _format_birthday(birthday: str) -> str:
|
||||
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
||||
if not birthday:
|
||||
return ""
|
||||
try:
|
||||
dt = datetime.date.fromisoformat(birthday.split("T")[0])
|
||||
return f"{dt.day} {dt.strftime('%B %Y')}"
|
||||
except (ValueError, AttributeError):
|
||||
return ""
|
||||
|
||||
|
||||
def _build_character_description(details: dict) -> str:
|
||||
"""
|
||||
Builds a Kavita-safe HTML description for a MAL character.
|
||||
|
||||
Top line: "Favorites: N" as a link to the character's MAL page.
|
||||
Remainder: the character's `about` text converted to HTML paragraphs.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
url = details.get("url") or ""
|
||||
favorites = details.get("favorites")
|
||||
if url and favorites is not None:
|
||||
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
||||
about = (details.get("about") or "").strip()
|
||||
if about:
|
||||
parts.append(_plain_to_html(about))
|
||||
return "<br>".join(parts)
|
||||
|
||||
|
||||
def _build_person_description(details: dict) -> str:
|
||||
"""
|
||||
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
|
||||
|
||||
Renders a summary table (given name, family name, birthday, website,
|
||||
member favorites) followed by the `about` biography as HTML paragraphs.
|
||||
"""
|
||||
_TD = 'style="padding-right:1.5em"'
|
||||
rows: list[str] = []
|
||||
|
||||
given = (details.get("given_name") or "").strip()
|
||||
family = (details.get("family_name") or "").strip()
|
||||
birthday = details.get("birthday") or ""
|
||||
favorites = details.get("favorites")
|
||||
website = (details.get("website_url") or "").strip()
|
||||
url = (details.get("url") or "").strip()
|
||||
|
||||
if given:
|
||||
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
|
||||
if family:
|
||||
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
|
||||
bday_str = _format_birthday(birthday)
|
||||
if bday_str:
|
||||
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
|
||||
if website:
|
||||
rows.append(
|
||||
f'<tr><td {_TD}>Website</td>'
|
||||
f'<td><a href="{website}">{website}</a></td></tr>'
|
||||
)
|
||||
if favorites is not None:
|
||||
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
|
||||
else f"{favorites:,}")
|
||||
rows.append(
|
||||
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
|
||||
|
||||
parts: list[str] = []
|
||||
if rows:
|
||||
parts.append(f'<table>{"".join(rows)}</table>')
|
||||
about = (details.get("about") or "").strip()
|
||||
if about:
|
||||
parts.append(_plain_to_html(about))
|
||||
return "<br>".join(parts)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helper
|
||||
# --------------------------------------------------------------------------
|
||||
def _short_body(resp: requests.Response, limit: int = 400) -> str:
|
||||
"""Returns the response body trimmed to `limit` chars for error logging."""
|
||||
try:
|
||||
text = resp.text or ""
|
||||
except Exception:
|
||||
return "<unreadable response body>"
|
||||
text = text.strip().replace("\n", " ").replace("\r", " ")
|
||||
if len(text) > limit:
|
||||
text = text[:limit] + "…"
|
||||
return text or "<empty body>"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
KAVITA_URL = "http://192.168.2.2:5000"
|
||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
||||
|
||||
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
|
||||
|
||||
mal = MALResolver()
|
||||
mal_id = mal.find_mal_id("よふかしのうた")
|
||||
print("MAL ID:", mal_id)
|
||||
|
||||
if mal_id:
|
||||
result = updater.update_for_manga(mal_id)
|
||||
print("Characters:", {k: v for k, v in result["characters"].items()
|
||||
if k != "errors"})
|
||||
print("Staff :", {k: v for k, v in result["staff"].items()
|
||||
if k != "errors"})
|
||||
# Surface any non-fatal upload / API errors for debugging
|
||||
for section in ("characters", "staff"):
|
||||
for err in result[section].get("errors", []):
|
||||
print(f"[{section}] {err}")
|
||||
@@ -0,0 +1,313 @@
|
||||
"""
|
||||
kavita_series_updater.py
|
||||
========================
|
||||
|
||||
Diff-based update of a single Kavita series record from a
|
||||
LightNovelMetadataBuilder output dict.
|
||||
|
||||
Behaviour
|
||||
---------
|
||||
* Locked fields in Kavita (``*Locked`` flags) are never touched, no matter
|
||||
what MangaBaka returns.
|
||||
* Scalar fields (summary, releaseYear, ageRating, publicationStatus,
|
||||
language, score, sortName, localizedName) are overwritten when the
|
||||
newly-built value differs from the value currently stored in Kavita.
|
||||
* List fields (genres, tags, characters, writers, coverArtists,
|
||||
publishers, imprints) are diff-merged: a name appearing in the new
|
||||
set but not in the current one is added (id=0 so Kavita creates the
|
||||
record); a name that is in Kavita but no longer in the new set is
|
||||
dropped. Comparison is case-insensitive on the ``name`` field.
|
||||
* Web links are stored as a comma-separated string in Kavita; this
|
||||
updater treats them as a set and re-joins on write.
|
||||
* Series-level cover image (URL different from last time) is re-uploaded
|
||||
whenever ``coverImageLocked`` is False. The MangaBaka cover URL is
|
||||
stamped onto matches.json as ``imageUrl`` so a subsequent run can skip
|
||||
the upload when nothing changed.
|
||||
|
||||
Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per
|
||||
series so the WebApp can surface what happened.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from KavitaClient import KavitaClient
|
||||
|
||||
|
||||
# Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key).
|
||||
# `item_key` is the dict key Kavita uses for the display name on each item:
|
||||
# GenreTagDto / TagDto use "title", PersonDto uses "name".
|
||||
_LIST_FIELDS: list[tuple[str, str, str]] = [
|
||||
("genres", "genresLocked", "title"),
|
||||
("tags", "tagsLocked", "title"),
|
||||
("characters", "characterLocked", "name"),
|
||||
("writers", "writerLocked", "name"),
|
||||
("coverArtists", "coverArtistLocked", "name"),
|
||||
("publishers", "publisherLocked", "name"),
|
||||
("imprints", "imprintLocked", "name"),
|
||||
]
|
||||
|
||||
|
||||
def _norm(name: str) -> str:
|
||||
return (name or "").strip().lower()
|
||||
|
||||
|
||||
def _merge_list(
|
||||
current: list[dict],
|
||||
new_names: Iterable[str],
|
||||
item_key: str,
|
||||
) -> "tuple[list[dict], bool]":
|
||||
"""
|
||||
Diff-merges a Kavita list field with the canonical name list from
|
||||
MangaBaka. Returns (merged_list, changed_flag).
|
||||
|
||||
`item_key` is the dict key Kavita uses for the display name on each
|
||||
item ("title" for GenreTagDto/TagDto, "name" for PersonDto).
|
||||
|
||||
* Items in `current` whose display value appears in `new_names` are
|
||||
kept verbatim so existing ids and ancillary fields survive.
|
||||
* New names (no matching entry in `current`) are appended with
|
||||
``{"id": 0, <item_key>: <name>}`` — Kavita creates the record on save.
|
||||
* Items in `current` whose display value is *not* in `new_names` are
|
||||
dropped.
|
||||
"""
|
||||
new_set = [n for n in new_names if n and n.strip()]
|
||||
new_index = {_norm(n): n.strip() for n in new_set}
|
||||
|
||||
merged: list[dict] = []
|
||||
kept_keys: set[str] = set()
|
||||
for item in (current or []):
|
||||
key = _norm(item.get(item_key))
|
||||
if key in new_index:
|
||||
merged.append(item)
|
||||
kept_keys.add(key)
|
||||
|
||||
added = False
|
||||
for key, display in new_index.items():
|
||||
if key not in kept_keys:
|
||||
merged.append({"id": 0, item_key: display})
|
||||
added = True
|
||||
|
||||
removed = len(current or []) != len(kept_keys)
|
||||
return merged, added or removed
|
||||
|
||||
|
||||
def _parse_web_links(value) -> list[str]:
|
||||
if not value:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return [str(v).strip() for v in value if v]
|
||||
return [p.strip() for p in str(value).split(",") if p.strip()]
|
||||
|
||||
|
||||
def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]":
|
||||
current = _parse_web_links(current_str)
|
||||
new_norm = [l for l in new_links if l]
|
||||
if not new_norm:
|
||||
return ",".join(current), False
|
||||
|
||||
# Mirror MangaBaka's set: keep order from new_norm, then anything from
|
||||
# current that's still in new_norm (already covered above). Anything
|
||||
# in current that's not in new_norm is dropped.
|
||||
new_set = set(new_norm)
|
||||
merged = list(new_norm)
|
||||
changed = sorted(new_set) != sorted(set(current))
|
||||
return ",".join(merged), changed
|
||||
|
||||
|
||||
class KavitaSeriesUpdater:
|
||||
def __init__(self, client: KavitaClient):
|
||||
self._client = client
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public
|
||||
# ------------------------------------------------------------------
|
||||
def update_series(self, series_id: int, built: dict, *,
|
||||
previous_cover_url: "str | None" = None) -> dict:
|
||||
"""
|
||||
Applies the diff between Kavita's current state for `series_id`
|
||||
and the freshly-built MangaBaka dict. Returns a per-field diff
|
||||
report.
|
||||
"""
|
||||
series = self._client.get_series(series_id)
|
||||
metadata = self._client.get_series_metadata(series_id)
|
||||
report: dict = {}
|
||||
|
||||
meta_changed = self._diff_metadata(metadata, built, report)
|
||||
if meta_changed:
|
||||
self._client.update_series_metadata(metadata)
|
||||
|
||||
series_changed = self._diff_series(series, built, report)
|
||||
if series_changed:
|
||||
self._client.update_series(series)
|
||||
|
||||
# Cover: only re-upload when not locked AND URL actually changed.
|
||||
new_cover = built.get("coverUrl")
|
||||
if (new_cover
|
||||
and not series.get("coverImageLocked")
|
||||
and new_cover != previous_cover_url):
|
||||
try:
|
||||
self._client.upload_series_cover(series_id, new_cover)
|
||||
report["coverImage"] = "changed"
|
||||
except Exception as exc:
|
||||
report["coverImage"] = f"error: {exc}"
|
||||
elif series.get("coverImageLocked"):
|
||||
report["coverImage"] = "locked"
|
||||
else:
|
||||
report["coverImage"] = "skipped"
|
||||
|
||||
return report
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: SeriesMetadataDto
|
||||
# ------------------------------------------------------------------
|
||||
def _diff_metadata(self, metadata: dict, built: dict,
|
||||
report: dict) -> bool:
|
||||
changed = False
|
||||
|
||||
# ----- Scalars ------------------------------------------------
|
||||
# (built_key, metadata_key, locked_key, transform, skip_when_zero)
|
||||
# `skip_when_zero` covers fields where 0 means "no data" rather
|
||||
# than a real value (releaseYear, ageRating). publicationStatus 0
|
||||
# is a valid "Ongoing" status — never skip it.
|
||||
scalar_map = [
|
||||
("summary", "summary", "summaryLocked", None, False),
|
||||
("releaseYear", "releaseYear", "releaseYearLocked", int, True),
|
||||
("ageRating", "ageRating", "ageRatingLocked", int, True),
|
||||
("publicationStatus", "publicationStatus", "publicationStatusLocked", int, False),
|
||||
("language", "language", "languageLocked", None, False),
|
||||
]
|
||||
for built_key, meta_key, locked_key, transform, skip_zero in scalar_map:
|
||||
new_val = built.get(built_key)
|
||||
if new_val is None or new_val == "":
|
||||
report[meta_key] = "skipped"
|
||||
continue
|
||||
if transform is not None:
|
||||
try:
|
||||
new_val = transform(new_val)
|
||||
except (TypeError, ValueError):
|
||||
report[meta_key] = "skipped"
|
||||
continue
|
||||
if skip_zero and new_val == 0:
|
||||
report[meta_key] = "skipped"
|
||||
continue
|
||||
if metadata.get(locked_key):
|
||||
report[meta_key] = "locked"
|
||||
continue
|
||||
if metadata.get(meta_key) != new_val:
|
||||
metadata[meta_key] = new_val
|
||||
changed = True
|
||||
report[meta_key] = "changed"
|
||||
else:
|
||||
report[meta_key] = "unchanged"
|
||||
|
||||
# ----- Web links (single comma-separated string) ---------------
|
||||
# SeriesMetadataDto has no dedicated lock for webLinks — always update.
|
||||
web_str, web_changed = _merge_web_links(
|
||||
metadata.get("webLinks"), built.get("webLinks") or [])
|
||||
if web_changed:
|
||||
metadata["webLinks"] = web_str
|
||||
changed = True
|
||||
report["webLinks"] = "changed"
|
||||
else:
|
||||
report["webLinks"] = "unchanged"
|
||||
|
||||
# ----- List fields --------------------------------------------
|
||||
list_map = {
|
||||
"genres": built.get("genres"),
|
||||
"tags": built.get("tags"),
|
||||
"characters": built.get("characters"),
|
||||
"writers": built.get("writers"),
|
||||
"coverArtists": built.get("coverArtists"),
|
||||
"publishers": built.get("publishers"),
|
||||
"imprints": [built["imprint"]] if built.get("imprint") else [],
|
||||
}
|
||||
for meta_key, locked_key, item_key in _LIST_FIELDS:
|
||||
new_names = list_map.get(meta_key) or []
|
||||
if metadata.get(locked_key):
|
||||
report[meta_key] = "locked"
|
||||
continue
|
||||
if not new_names and not (metadata.get(meta_key) or []):
|
||||
report[meta_key] = "unchanged"
|
||||
continue
|
||||
merged, list_changed = _merge_list(
|
||||
metadata.get(meta_key) or [], new_names, item_key)
|
||||
if list_changed:
|
||||
metadata[meta_key] = merged
|
||||
changed = True
|
||||
report[meta_key] = "changed"
|
||||
else:
|
||||
report[meta_key] = "unchanged"
|
||||
|
||||
return changed
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: SeriesDto (sortName, userRating, tracker ids)
|
||||
# ------------------------------------------------------------------
|
||||
def _diff_series(self, series: dict, built: dict, report: dict) -> bool:
|
||||
changed = False
|
||||
|
||||
# sortName / localizedName
|
||||
if not series.get("sortNameLocked"):
|
||||
new_sort = built.get("sortName") or ""
|
||||
if new_sort and series.get("sortName") != new_sort:
|
||||
series["sortName"] = new_sort
|
||||
changed = True
|
||||
report["sortName"] = "changed"
|
||||
else:
|
||||
report["sortName"] = "unchanged"
|
||||
else:
|
||||
report["sortName"] = "locked"
|
||||
|
||||
if not series.get("localizedNameLocked"):
|
||||
new_loc = built.get("localizedName") or ""
|
||||
if new_loc and series.get("localizedName") != new_loc:
|
||||
series["localizedName"] = new_loc
|
||||
changed = True
|
||||
report["localizedName"] = "changed"
|
||||
else:
|
||||
report["localizedName"] = "unchanged"
|
||||
else:
|
||||
report["localizedName"] = "locked"
|
||||
|
||||
# Tracker ids — Kavita exposes malId, aniListId, mangaBakaId
|
||||
for built_key, series_key in (
|
||||
("malId", "malId"),
|
||||
("anilistId", "aniListId"),
|
||||
("mangabakaId", "mangaBakaId"),
|
||||
):
|
||||
new_val = built.get(built_key)
|
||||
if new_val in (None, "", 0):
|
||||
continue
|
||||
try:
|
||||
new_int = int(new_val)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if int(series.get(series_key) or 0) != new_int:
|
||||
series[series_key] = new_int
|
||||
changed = True
|
||||
report[series_key] = "changed"
|
||||
|
||||
# userRating from MangaBaka (0..5)
|
||||
new_score = built.get("score")
|
||||
if new_score is not None:
|
||||
try:
|
||||
new_score = float(new_score)
|
||||
except (TypeError, ValueError):
|
||||
new_score = None
|
||||
if new_score is not None:
|
||||
current_score = series.get("userRating")
|
||||
try:
|
||||
current_score = float(current_score) if current_score is not None else None
|
||||
except (TypeError, ValueError):
|
||||
current_score = None
|
||||
if current_score != new_score:
|
||||
series["userRating"] = new_score
|
||||
series["hasUserRated"] = True
|
||||
changed = True
|
||||
report["userRating"] = "changed"
|
||||
else:
|
||||
report["userRating"] = "unchanged"
|
||||
|
||||
return changed
|
||||
@@ -0,0 +1,560 @@
|
||||
"""
|
||||
light_novel_metadata_builder.py
|
||||
===============================
|
||||
|
||||
Fetches series-level metadata for a light novel from MangaBaka, enriches
|
||||
it with MyAnimeList / AniList tracker statistics and character data, and
|
||||
returns a structured dict ready to be diffed against Kavita's
|
||||
SeriesMetadataDto.
|
||||
|
||||
Differences vs. the manga project's ComicInfoBuilder:
|
||||
- No chapter / page handling — Kavita reads volumes from the files.
|
||||
- No XML output — produces a plain dict.
|
||||
- No MangaDex resolver — light novels don't have a chapter→volume
|
||||
mapping problem.
|
||||
- MangaBaka search type is fixed to ``novel`` so only light/web novels
|
||||
are returned.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||
from MALResolver import MALResolver
|
||||
from AniListResolver import AniListResolver
|
||||
from MatchesCache import MatchesCache
|
||||
|
||||
|
||||
# MangaBaka series type for the search endpoint.
|
||||
_SEARCH_TYPES = ["novel"]
|
||||
|
||||
# MangaBaka content_rating -> Kavita AgeRating enum
|
||||
# Kavita AgeRating values (from openapi.json):
|
||||
# 0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly
|
||||
_AGE_RATING_MAP = {
|
||||
"safe": 3, # Everyone
|
||||
"suggestive": 8, # Teen
|
||||
"erotica": 10, # Mature17Plus
|
||||
"pornographic": 13, # AdultsOnly
|
||||
}
|
||||
|
||||
# MangaBaka status -> Kavita PublicationStatus enum
|
||||
# Kavita PublicationStatus (from openapi.json):
|
||||
# 0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended
|
||||
_PUB_STATUS_MAP = {
|
||||
"ongoing": 0,
|
||||
"hiatus": 1,
|
||||
"completed": 2,
|
||||
"cancelled": 3,
|
||||
"ended": 4,
|
||||
}
|
||||
|
||||
# External-tracker URL templates used to enrich the web-links list.
|
||||
_TRACKER_URL_TEMPLATES = {
|
||||
"anilist": "https://anilist.co/manga/{id}",
|
||||
"myanimelist": "https://myanimelist.net/manga/{id}",
|
||||
"mal": "https://myanimelist.net/manga/{id}",
|
||||
"mangaupdates": "https://www.mangaupdates.com/series.html?id={id}",
|
||||
"kitsu": "https://kitsu.app/manga/{id}",
|
||||
"animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||||
"ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||||
"animeplanet": "https://www.anime-planet.com/manga/{id}",
|
||||
"shikimori": "https://shikimori.one/mangas/{id}",
|
||||
"bookwalker": "https://bookwalker.jp/{id}",
|
||||
}
|
||||
|
||||
_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# --------------------------------------------------------------------------
|
||||
def _normalise_key(key) -> str:
|
||||
return re.sub(r"[^a-z0-9]", "", str(key).lower())
|
||||
|
||||
|
||||
def _format_term(value: str) -> str:
|
||||
return str(value).replace("_", " ").strip().title() if value else ""
|
||||
|
||||
|
||||
def _md_to_html(text: str) -> str:
|
||||
"""Converts the subset of Markdown produced by MangaBaka to compact HTML."""
|
||||
if not text:
|
||||
return ""
|
||||
text = _MD_ESCAPE_RE.sub(r'\1', text)
|
||||
text = re.sub(
|
||||
r'\[([^\]]+)\]\(([^)]+)\)',
|
||||
lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
|
||||
text,
|
||||
)
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
|
||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
|
||||
parts: list[str] = []
|
||||
for para in re.split(r'\n{2,}', text.strip()):
|
||||
para = para.strip()
|
||||
if para:
|
||||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def pick_cover_url(cover) -> "str | None":
|
||||
"""Selects the best cover URL from a MangaBaka cover object."""
|
||||
if not cover:
|
||||
return None
|
||||
if isinstance(cover, str):
|
||||
return cover
|
||||
if not isinstance(cover, dict):
|
||||
return None
|
||||
|
||||
raw = cover.get("raw")
|
||||
if isinstance(raw, dict):
|
||||
url = raw.get("url")
|
||||
if isinstance(url, str) and url:
|
||||
return url
|
||||
elif isinstance(raw, str) and raw:
|
||||
return raw
|
||||
|
||||
for size_key in ("x350", "x250", "x150"):
|
||||
variant = cover.get(size_key)
|
||||
if isinstance(variant, dict):
|
||||
for density in ("x3", "x2", "x1"):
|
||||
url = variant.get(density)
|
||||
if isinstance(url, str) and url:
|
||||
return url
|
||||
elif isinstance(variant, str) and variant:
|
||||
return variant
|
||||
|
||||
for val in cover.values():
|
||||
if isinstance(val, str) and val.startswith("http"):
|
||||
return val
|
||||
if isinstance(val, dict):
|
||||
for sub in val.values():
|
||||
if isinstance(sub, str) and sub.startswith("http"):
|
||||
return sub
|
||||
return None
|
||||
|
||||
|
||||
def pick_thumbnail_url(cover) -> "str | None":
|
||||
"""Picks a small cover variant suitable for a UI thumbnail."""
|
||||
if not cover:
|
||||
return None
|
||||
if isinstance(cover, str):
|
||||
return cover
|
||||
if not isinstance(cover, dict):
|
||||
return None
|
||||
for size_key in ("x150", "x250", "x350"):
|
||||
variant = cover.get(size_key)
|
||||
if isinstance(variant, dict):
|
||||
for density in ("x2", "x1", "x3"):
|
||||
url = variant.get(density)
|
||||
if isinstance(url, str) and url:
|
||||
return url
|
||||
elif isinstance(variant, str) and variant:
|
||||
return variant
|
||||
return pick_cover_url(cover)
|
||||
|
||||
|
||||
def _id_from_source(md: dict, *names: str) -> "int | None":
|
||||
target = {_normalise_key(n) for n in names}
|
||||
for raw_key, info in (md.get("source") or {}).items():
|
||||
if _normalise_key(raw_key) in target and isinstance(info, dict):
|
||||
mid = info.get("id")
|
||||
if mid is not None:
|
||||
try:
|
||||
return int(mid)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Builder
|
||||
# --------------------------------------------------------------------------
|
||||
class LightNovelMetadataBuilder:
|
||||
"""
|
||||
Resolves a light-novel series on MangaBaka and produces a structured
|
||||
metadata dict ready to be merged into Kavita.
|
||||
"""
|
||||
|
||||
def __init__(self, *,
|
||||
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||
language: str = "en",
|
||||
request_timeout: int = 30,
|
||||
session: "requests.Session | None" = None,
|
||||
mal_resolver: "MALResolver | None" = None,
|
||||
al_resolver: "AniListResolver | None" = None,
|
||||
matches_cache: "MatchesCache | None" = None):
|
||||
self.api_base_url = api_base_url.rstrip("/")
|
||||
self.language = language
|
||||
self.request_timeout = request_timeout
|
||||
|
||||
self._session = session or requests.Session()
|
||||
self._session.headers.setdefault("User-Agent",
|
||||
"LightNovelMetadataBuilder/1.0")
|
||||
_apply_mangabaka_rate_limit(self._session)
|
||||
|
||||
self._mal = mal_resolver or MALResolver(request_timeout=request_timeout)
|
||||
self._al = al_resolver or AniListResolver(request_timeout=request_timeout)
|
||||
self._matches_cache = matches_cache
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# MangaBaka search / fetch
|
||||
# ------------------------------------------------------------------
|
||||
def search_series(self, title: str) -> "dict | None":
|
||||
"""Returns the top MangaBaka novel hit for `title`, or None."""
|
||||
if not title or not title.strip():
|
||||
return None
|
||||
url = f"{self.api_base_url}/series/search"
|
||||
try:
|
||||
resp = self._session.get(
|
||||
url, params={"q": title, "type": _SEARCH_TYPES,
|
||||
"page": 1, "limit": 1},
|
||||
timeout=self.request_timeout)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException:
|
||||
return None
|
||||
data = resp.json().get("data") or []
|
||||
return data[0] if data else None
|
||||
|
||||
def fetch_series(self, series_id) -> "dict | None":
|
||||
"""Returns the full MangaBaka series dict for the given id."""
|
||||
if series_id is None or str(series_id).strip() == "":
|
||||
return None
|
||||
url = f"{self.api_base_url}/series/{series_id}"
|
||||
resp = self._session.get(url, timeout=self.request_timeout)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data")
|
||||
if data and data.get("state") == "merged" and data.get("merged_with"):
|
||||
return self.fetch_series(data["merged_with"])
|
||||
return data
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Resolve title -> MangaBaka series (caches the match)
|
||||
# ------------------------------------------------------------------
|
||||
def resolve(self, title: str) -> "dict | None":
|
||||
"""
|
||||
Returns the MangaBaka series for `title`.
|
||||
|
||||
Lookup order:
|
||||
1. MatchesCache (uses stored mangabakaId, skips the search).
|
||||
2. Fresh MangaBaka search — top hit. Result is persisted to the
|
||||
cache so it survives a crash.
|
||||
"""
|
||||
if self._matches_cache is not None:
|
||||
cached = self._matches_cache.get(title)
|
||||
if cached and cached.get("mangabakaId"):
|
||||
try:
|
||||
series = self.fetch_series(cached["mangabakaId"])
|
||||
if series:
|
||||
return series
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
series = self.search_series(title)
|
||||
if series and self._matches_cache is not None:
|
||||
self._matches_cache.upsert(
|
||||
title,
|
||||
mangabaka_id=series.get("id"),
|
||||
mangabaka_name=series.get("title") or "",
|
||||
image_url=pick_thumbnail_url(series.get("cover")),
|
||||
)
|
||||
return series
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ------------------------------------------------------------------
|
||||
def build(self, *, title: str = "",
|
||||
mangabaka_id=None) -> "dict | None":
|
||||
"""
|
||||
Fetches and enriches metadata for one series, returning the
|
||||
normalised dict described in the module docstring.
|
||||
|
||||
Pass either `title` (will resolve via cache/search) or
|
||||
`mangabaka_id` (direct fetch).
|
||||
"""
|
||||
if mangabaka_id is not None and str(mangabaka_id).strip():
|
||||
md = self.fetch_series(mangabaka_id)
|
||||
else:
|
||||
md = self.resolve(title)
|
||||
if not md:
|
||||
return None
|
||||
return self._assemble(md)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: assemble the result dict
|
||||
# ------------------------------------------------------------------
|
||||
def _assemble(self, md: dict) -> dict:
|
||||
mal_id = _id_from_source(md, "myanimelist", "mal")
|
||||
al_id = _id_from_source(md, "anilist")
|
||||
|
||||
# Fall back to a title-based MAL lookup when the source map does
|
||||
# not carry an id — Jikan is the only tracker that ships staff
|
||||
# data we can use to enrich author / artist person records.
|
||||
if mal_id is None:
|
||||
mal_id = self._mal.find_mal_id(md.get("title") or "")
|
||||
|
||||
mal_stats = self._mal.get_stats(mal_id) if mal_id else None
|
||||
|
||||
characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else []
|
||||
if not characters_detailed and al_id:
|
||||
characters_detailed = self._al.get_characters_detailed(al_id)
|
||||
|
||||
staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else []
|
||||
if not staff_detailed and al_id:
|
||||
staff_detailed = self._al.get_staff_detailed(al_id)
|
||||
|
||||
# Character / writer name lists for SeriesMetadata
|
||||
character_names = [c["name"] for c in characters_detailed
|
||||
if c.get("name")]
|
||||
# Writers come from MangaBaka first (authoritative for novels)
|
||||
writers = list(md.get("authors") or [])
|
||||
# Illustrators / artists -> CoverArtists (Kavita has no dedicated
|
||||
# illustrator field, and Pencillers is the wrong semantic for
|
||||
# text-only novels).
|
||||
cover_artists = list(md.get("artists") or [])
|
||||
|
||||
# Publisher: prefer English licence, else original
|
||||
publishers = self._publishers_by_type(md, "English") \
|
||||
or self._publishers_by_type(md, "Original")
|
||||
imprint = None
|
||||
if self._publishers_by_type(md, "English") and \
|
||||
self._publishers_by_type(md, "Original"):
|
||||
imprint = self._publishers_by_type(md, "Original")[0] if \
|
||||
self._publishers_by_type(md, "Original") else None
|
||||
|
||||
# Release year
|
||||
release_year = None
|
||||
try:
|
||||
if md.get("year") is not None:
|
||||
release_year = int(md["year"])
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Score: MangaBaka rating is 0..100 -> Kavita userRating is 0..5
|
||||
score = None
|
||||
if md.get("rating") is not None:
|
||||
try:
|
||||
score = round(float(md["rating"]) / 20.0, 1)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Tags / genres come back as snake_case slugs.
|
||||
genres = [_format_term(g) for g in (md.get("genres") or []) if g]
|
||||
tags = [_format_term(t) for t in (md.get("tags") or []) if t]
|
||||
|
||||
# Web links
|
||||
web_links = self._collect_web_links(md)
|
||||
|
||||
# Summary HTML
|
||||
summary = self._build_summary(md, mal_stats)
|
||||
|
||||
# Cover URL
|
||||
cover_url = pick_cover_url(md.get("cover"))
|
||||
|
||||
# Title variants
|
||||
all_alt = self._collect_all_alt_titles(md)
|
||||
|
||||
return {
|
||||
"mangabakaId": str(md.get("id") or ""),
|
||||
"mangabakaTitle": md.get("title") or "",
|
||||
"originalName": md.get("native_title") or "",
|
||||
"localizedName": md.get("romanized_title") or "",
|
||||
"sortName": self._sort_title(md),
|
||||
"altTitles": all_alt,
|
||||
"summary": summary,
|
||||
"genres": genres,
|
||||
"tags": tags,
|
||||
"characters": character_names,
|
||||
"writers": writers,
|
||||
"coverArtists": cover_artists,
|
||||
"publishers": publishers,
|
||||
"imprint": imprint,
|
||||
"releaseYear": release_year,
|
||||
"ageRating": _AGE_RATING_MAP.get(md.get("content_rating"), 0),
|
||||
"publicationStatus": _PUB_STATUS_MAP.get(
|
||||
(md.get("status") or "").lower(), 0),
|
||||
"language": self.language,
|
||||
"webLinks": web_links,
|
||||
"score": score,
|
||||
"coverUrl": cover_url,
|
||||
"malId": mal_id,
|
||||
"anilistId": al_id,
|
||||
"relationships": list(md.get("relationships_v2") or []),
|
||||
"charactersDetailed": characters_detailed,
|
||||
"staffDetailed": staff_detailed,
|
||||
"raw": md,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def _publishers_by_type(md: dict, ptype: str) -> list[str]:
|
||||
return [p.get("name") for p in (md.get("publishers") or [])
|
||||
if p.get("type") == ptype and p.get("name")]
|
||||
|
||||
def _sort_title(self, md: dict) -> str:
|
||||
lang = self.language.lower()
|
||||
alts = self._collect_alt_titles(md)
|
||||
return alts.get(lang) or md.get("title") or ""
|
||||
|
||||
def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
|
||||
"""Returns one best title per language code (en/de/jp/romaji)."""
|
||||
titles = md.get("titles") or md.get("alt_titles") or []
|
||||
|
||||
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
||||
) -> "str | None":
|
||||
best_score = -1
|
||||
best_title: "str | None" = None
|
||||
for entry in titles:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||
if lang not in language_codes:
|
||||
continue
|
||||
title = entry.get("title")
|
||||
if not title:
|
||||
continue
|
||||
traits = entry.get("traits") or []
|
||||
score = 0
|
||||
if prefer_trait and prefer_trait in traits:
|
||||
score += 4
|
||||
if "official" in traits:
|
||||
score += 2
|
||||
if entry.get("is_primary"):
|
||||
score += 1
|
||||
if score > best_score:
|
||||
best_score, best_title = score, title
|
||||
return best_title
|
||||
|
||||
result: dict[str, str] = {}
|
||||
kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
|
||||
if kanji:
|
||||
result["jp"] = kanji
|
||||
romaji = pick(("ja-latn", "ja-romaji"))
|
||||
if not romaji:
|
||||
rt = md.get("romanized_title") or ""
|
||||
if rt and all(ord(c) < 128 for c in rt):
|
||||
romaji = rt
|
||||
if romaji:
|
||||
result["romaji"] = romaji
|
||||
en = pick(("en",)) or md.get("title")
|
||||
if en:
|
||||
result["en"] = en
|
||||
de = pick(("de",))
|
||||
if de:
|
||||
result["de"] = de
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]":
|
||||
_GROUPS = {
|
||||
"en": ("en",),
|
||||
"de": ("de",),
|
||||
"ja": ("ja",),
|
||||
"ja-romaji": ("ja-latn", "ja-romaji"),
|
||||
"ko": ("ko",),
|
||||
"ko-romaji": ("ko-latn", "ko-romaji"),
|
||||
"zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
|
||||
"zh-romaji": ("zh-latn",),
|
||||
}
|
||||
lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls}
|
||||
result: dict[str, list[str]] = {}
|
||||
seen: dict[str, set] = {}
|
||||
for entry in (md.get("titles") or md.get("alt_titles") or []):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||
group = lang_to_group.get(lang)
|
||||
if not group:
|
||||
continue
|
||||
title = (entry.get("title") or "").strip()
|
||||
if not title:
|
||||
continue
|
||||
result.setdefault(group, [])
|
||||
seen.setdefault(group, set())
|
||||
if title not in seen[group]:
|
||||
result[group].append(title)
|
||||
seen[group].add(title)
|
||||
return result
|
||||
|
||||
def _collect_web_links(self, md: dict) -> list[str]:
|
||||
links: list[str] = [l for l in (md.get("links") or []) if l]
|
||||
for raw_key, info in (md.get("source") or {}).items():
|
||||
template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
|
||||
if not template or not isinstance(info, dict):
|
||||
continue
|
||||
source_id = info.get("id")
|
||||
if source_id is not None:
|
||||
links.append(template.format(id=source_id))
|
||||
seen: set[str] = set()
|
||||
unique: list[str] = []
|
||||
for link in links:
|
||||
if link not in seen:
|
||||
seen.add(link)
|
||||
unique.append(link)
|
||||
return unique
|
||||
|
||||
def _build_summary(self, md: dict,
|
||||
mal_stats: "dict | None") -> str:
|
||||
"""Builds the HTML summary with stats table + description + alt titles."""
|
||||
_TD = 'style="padding-right:1.5em"'
|
||||
parts: list[str] = []
|
||||
|
||||
if mal_stats:
|
||||
url = mal_stats.get("url", "")
|
||||
as_of = mal_stats.get("as_of", "")
|
||||
rows: list[str] = []
|
||||
for label, key, fmt in (
|
||||
("Score", "score", "{}"),
|
||||
("Ranked", "rank", "#{}"),
|
||||
("Scored by", "scored_by", "{:,} users"),
|
||||
("Popularity","popularity", "#{}"),
|
||||
("Members", "members", "{:,}"),
|
||||
("Favorites", "favorites", "{:,}"),
|
||||
):
|
||||
v = mal_stats.get(key)
|
||||
if v is None:
|
||||
continue
|
||||
try:
|
||||
formatted = fmt.format(v)
|
||||
except (TypeError, ValueError):
|
||||
formatted = str(v)
|
||||
rows.append(f"<tr><td {_TD}>{label}</td><td>{formatted}</td></tr>")
|
||||
if rows:
|
||||
link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
|
||||
parts.append(f"<p>{link} stats as of {as_of}:</p>"
|
||||
f"<table>{''.join(rows)}</table>")
|
||||
|
||||
desc_raw = (md.get("description") or "").strip()
|
||||
if desc_raw:
|
||||
parts.append(_md_to_html(desc_raw))
|
||||
|
||||
all_alt = self._collect_all_alt_titles(md)
|
||||
if all_alt:
|
||||
label_map = {
|
||||
"en": "EN",
|
||||
"de": "DE",
|
||||
"ja": "JA",
|
||||
"ja-romaji": "JA Romaji",
|
||||
"ko": "KO",
|
||||
"ko-romaji": "KO Romaji",
|
||||
"zh": "ZH",
|
||||
"zh-romaji": "ZH Romaji",
|
||||
}
|
||||
alt_rows: list[str] = []
|
||||
for group in ("en", "de", "ja", "ja-romaji",
|
||||
"ko", "ko-romaji", "zh", "zh-romaji"):
|
||||
titles = all_alt.get(group)
|
||||
if not titles:
|
||||
continue
|
||||
cell = "<br>".join(titles)
|
||||
alt_rows.append(
|
||||
f"<tr><td {_TD}>{label_map[group]}</td><td>{cell}</td></tr>")
|
||||
if alt_rows:
|
||||
parts.append(f"<table>{''.join(alt_rows)}</table>")
|
||||
|
||||
return "<br>".join(parts)
|
||||
@@ -0,0 +1,257 @@
|
||||
"""
|
||||
light_novel_orchestrator.py
|
||||
===========================
|
||||
|
||||
High-level workflow on top of the resolvers, the Kavita client and the
|
||||
diff-based updaters. Exposes three operations to the WebApp:
|
||||
|
||||
- build_matches(library_ids):
|
||||
Scan one or more Kavita libraries, resolve every series against
|
||||
MangaBaka and persist the match in matches.json.
|
||||
- update_series(kavita_series_id):
|
||||
Re-fetch MangaBaka, MAL and AniList data for a single Kavita
|
||||
series and apply the diff (metadata + persons + relationships).
|
||||
- update_all(library_ids):
|
||||
Run update_series for every series that has a match in the
|
||||
cache and lives in the given libraries.
|
||||
|
||||
A single shared HTTP session (rate-limited for MangaBaka) and shared
|
||||
resolver singletons are used across the whole run to maximise cache
|
||||
hits.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import requests
|
||||
|
||||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||
from MALResolver import MALResolver
|
||||
from AniListResolver import AniListResolver
|
||||
from MatchesCache import MatchesCache
|
||||
from KavitaClient import KavitaClient
|
||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||
from KavitaSeriesUpdater import KavitaSeriesUpdater
|
||||
from LightNovelMetadataBuilder import (
|
||||
LightNovelMetadataBuilder,
|
||||
pick_thumbnail_url,
|
||||
)
|
||||
from RelationshipSync import RelationshipSync
|
||||
|
||||
|
||||
class LightNovelOrchestrator:
|
||||
def __init__(self, *,
|
||||
kavita_url: str,
|
||||
kavita_api_key: str,
|
||||
matches_cache: MatchesCache,
|
||||
language: str = "en",
|
||||
request_timeout: int = 30,
|
||||
api_base_url: str = "https://api.mangabaka.dev/v1"):
|
||||
self._cache = matches_cache
|
||||
self._timeout = request_timeout
|
||||
|
||||
session = requests.Session()
|
||||
session.headers.setdefault("User-Agent",
|
||||
"KavitaLightNovelOrchestrator/1.0")
|
||||
_apply_mangabaka_rate_limit(session)
|
||||
self._session = session
|
||||
|
||||
self._mal = MALResolver(request_timeout=request_timeout)
|
||||
self._al = AniListResolver(request_timeout=request_timeout)
|
||||
|
||||
self._client = KavitaClient(kavita_url, kavita_api_key,
|
||||
request_timeout=request_timeout)
|
||||
self._builder = LightNovelMetadataBuilder(
|
||||
api_base_url=api_base_url,
|
||||
language=language,
|
||||
request_timeout=request_timeout,
|
||||
session=session,
|
||||
mal_resolver=self._mal,
|
||||
al_resolver=self._al,
|
||||
matches_cache=matches_cache,
|
||||
)
|
||||
self._series_updater = KavitaSeriesUpdater(self._client)
|
||||
self._person_updater = KavitaPersonUpdater(
|
||||
kavita_url, kavita_api_key,
|
||||
mal_resolver=self._mal,
|
||||
al_resolver=self._al,
|
||||
request_timeout=request_timeout,
|
||||
)
|
||||
self._relation_sync = RelationshipSync(
|
||||
self._client, matches_cache, builder=self._builder)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Library listings
|
||||
# ------------------------------------------------------------------
|
||||
def list_libraries(self) -> list[dict]:
|
||||
return self._client.list_libraries()
|
||||
|
||||
def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]:
|
||||
result: list[dict] = []
|
||||
for lib_id in library_ids:
|
||||
try:
|
||||
result.extend(self._client.list_series_in_library(int(lib_id)))
|
||||
except Exception as exc:
|
||||
print(f"[orchestrator] library {lib_id} list failed: {exc}",
|
||||
flush=True)
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Matching
|
||||
# ------------------------------------------------------------------
|
||||
def build_matches(self, library_ids: list[int]) -> dict:
|
||||
"""
|
||||
Resolves every series in the given libraries against MangaBaka.
|
||||
|
||||
Series already present in matches.json keep their stored
|
||||
mangabakaId; the kavitaSeriesId + libraryId fields are refreshed
|
||||
in case the user moved a series between libraries.
|
||||
"""
|
||||
stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0}
|
||||
for series in self.list_series_in_libraries(library_ids):
|
||||
title = (series.get("name") or "").strip()
|
||||
if not title:
|
||||
continue
|
||||
stats["checked"] += 1
|
||||
kavita_id = int(series.get("id") or 0)
|
||||
library_id = int(series.get("libraryId") or 0)
|
||||
|
||||
cached = self._cache.get(title)
|
||||
if cached and cached.get("mangabakaId"):
|
||||
self._cache.upsert(
|
||||
title,
|
||||
kavita_series_id=kavita_id,
|
||||
library_id=library_id,
|
||||
)
|
||||
stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
mb_series = self._builder.search_series(title)
|
||||
if not mb_series:
|
||||
self._cache.upsert(
|
||||
title,
|
||||
kavita_series_id=kavita_id,
|
||||
library_id=library_id,
|
||||
)
|
||||
stats["missing"] += 1
|
||||
print(f"[match] {title!r}: no MangaBaka hit", flush=True)
|
||||
continue
|
||||
|
||||
self._cache.upsert(
|
||||
title,
|
||||
mangabaka_id=mb_series.get("id"),
|
||||
mangabaka_name=mb_series.get("title") or "",
|
||||
image_url=pick_thumbnail_url(mb_series.get("cover")),
|
||||
kavita_series_id=kavita_id,
|
||||
library_id=library_id,
|
||||
)
|
||||
stats["matched"] += 1
|
||||
print(f"[match] {title!r} -> {mb_series.get('title')!r} "
|
||||
f"(id={mb_series.get('id')})", flush=True)
|
||||
return stats
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Updating
|
||||
# ------------------------------------------------------------------
|
||||
def update_series(self, kavita_series_id: int) -> dict:
|
||||
"""Runs a full metadata update for a single Kavita series."""
|
||||
hit = self._cache.get_by_kavita_id(int(kavita_series_id))
|
||||
if not hit:
|
||||
# Try to resolve via the Kavita series name on the fly.
|
||||
series = self._client.get_series(int(kavita_series_id))
|
||||
title = (series.get("name") or "").strip()
|
||||
if not title:
|
||||
return {"ok": False, "error": "series not in matches.json"}
|
||||
built = self._builder.build(title=title)
|
||||
if not built:
|
||||
return {"ok": False, "error": "no MangaBaka match"}
|
||||
self._cache.upsert(
|
||||
title,
|
||||
mangabaka_id=built.get("mangabakaId"),
|
||||
mangabaka_name=built.get("mangabakaTitle"),
|
||||
image_url=built.get("coverUrl"),
|
||||
kavita_series_id=int(kavita_series_id),
|
||||
library_id=int(series.get("libraryId") or 0),
|
||||
)
|
||||
cached_title = title
|
||||
cached_entry = self._cache.get(title) or {}
|
||||
else:
|
||||
cached_title, cached_entry = hit
|
||||
built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId"))
|
||||
if not built:
|
||||
return {"ok": False, "error": "mangabaka id no longer resolvable"}
|
||||
|
||||
prev_cover = cached_entry.get("imageUrl") or ""
|
||||
try:
|
||||
series_report = self._series_updater.update_series(
|
||||
int(kavita_series_id), built,
|
||||
previous_cover_url=prev_cover,
|
||||
)
|
||||
except Exception as exc:
|
||||
return {"ok": False, "error": f"series update failed: {exc}"}
|
||||
|
||||
# Persons
|
||||
try:
|
||||
person_report = self._person_updater.update_for_manga(
|
||||
built.get("malId"),
|
||||
al_manga_id=built.get("anilistId"),
|
||||
)
|
||||
except Exception as exc:
|
||||
person_report = {"error": str(exc)}
|
||||
|
||||
# Relationships + collection
|
||||
try:
|
||||
relation_report = self._relation_sync.sync(
|
||||
int(kavita_series_id), built)
|
||||
except Exception as exc:
|
||||
relation_report = {"error": str(exc)}
|
||||
|
||||
# Stamp the new cover URL on the cache so the next run knows when
|
||||
# to re-upload.
|
||||
self._cache.upsert(
|
||||
cached_title,
|
||||
image_url=built.get("coverUrl") or prev_cover,
|
||||
)
|
||||
self._cache.mark_updated(cached_title)
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"title": cached_title,
|
||||
"mangabakaId": built.get("mangabakaId"),
|
||||
"series": series_report,
|
||||
"persons": person_report,
|
||||
"relationships": relation_report,
|
||||
}
|
||||
|
||||
def update_all(self, library_ids: "list[int] | None") -> dict:
|
||||
"""Updates every cached series in the given libraries."""
|
||||
if library_ids is None:
|
||||
entries = self._cache.all()["matches"]
|
||||
else:
|
||||
entries = self._cache.all_in_libraries(library_ids)["matches"]
|
||||
|
||||
results: list[dict] = []
|
||||
ok = fail = 0
|
||||
for title, entry in entries.items():
|
||||
ksid = int(entry.get("kavitaSeriesId") or 0)
|
||||
if not ksid or not entry.get("mangabakaId"):
|
||||
continue
|
||||
try:
|
||||
res = self.update_series(ksid)
|
||||
except Exception as exc:
|
||||
res = {"ok": False, "error": str(exc)}
|
||||
res["title"] = title
|
||||
results.append(res)
|
||||
if res.get("ok"):
|
||||
ok += 1
|
||||
else:
|
||||
fail += 1
|
||||
print(f"[update] {title!r}: "
|
||||
f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}",
|
||||
flush=True)
|
||||
return {"ok": ok, "failed": fail, "results": results}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Direct helpers exposed to the WebApp
|
||||
# ------------------------------------------------------------------
|
||||
def fetch_series(self, mangabaka_id) -> "dict | None":
|
||||
return self._builder.fetch_series(mangabaka_id)
|
||||
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
mal_resolver.py
|
||||
===============
|
||||
|
||||
Fetches and caches MyAnimeList manga metadata (statistics, characters, staff)
|
||||
using the public Jikan REST API v4.
|
||||
|
||||
Jikan API: https://api.jikan.moe/v4 (no authentication required)
|
||||
Rate limit: 3 req/s, 60 req/min -> a 400 ms guard between calls is applied.
|
||||
|
||||
Singleton
|
||||
---------
|
||||
Only one instance of this class exists per process. Subsequent calls to
|
||||
MALResolver() return the same object with its warm caches intact.
|
||||
|
||||
Provided features
|
||||
-----------------
|
||||
- Title-based MAL ID lookup with best-match scoring
|
||||
- MAL statistics: score, rank, scored_by, popularity, members, favorites
|
||||
- Character list for a manga (names only — for <Characters> XML tag)
|
||||
- Detailed character list: name, MAL character ID, image URL, role
|
||||
- Detailed staff list: name, MAL person ID, image URL, positions
|
||||
- Lazy full-detail fetches per character / person (for descriptions)
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import difflib
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
from MediaResolver import MediaResolver
|
||||
|
||||
|
||||
class MALResolver(MediaResolver):
|
||||
"""
|
||||
Singleton: fetches and caches MAL manga data via Jikan API v4.
|
||||
|
||||
The first call to MALResolver() creates and initialises the instance;
|
||||
all subsequent calls return the same object.
|
||||
"""
|
||||
|
||||
_instance: "MALResolver | None" = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Singleton machinery
|
||||
# ------------------------------------------------------------------
|
||||
def __new__(cls, **kwargs):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def __init__(self, *, request_timeout: int = 30):
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.JIKAN_BASE = "https://api.jikan.moe/v4"
|
||||
self.request_timeout = request_timeout
|
||||
|
||||
self._session = requests.Session()
|
||||
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
|
||||
|
||||
# title_lower -> mal_id
|
||||
self._id_cache: dict[str, "int | None"] = {}
|
||||
# mal_id -> stats dict
|
||||
self._stats_cache: dict[int, dict] = {}
|
||||
# manga_mal_id -> [name_str, ...] (for ComicInfo <Characters>)
|
||||
self._char_names_cache: dict[int, list[str]] = {}
|
||||
# manga_mal_id -> [{mal_id, name, image_url, role}]
|
||||
self._char_detailed_cache: dict[int, list[dict]] = {}
|
||||
# manga_mal_id -> [{mal_id, name, image_url, positions}]
|
||||
self._staff_detailed_cache: dict[int, list[dict]] = {}
|
||||
# char_mal_id -> {mal_id, name, image_url, about}
|
||||
self._char_info_cache: dict[int, dict] = {}
|
||||
# person_mal_id -> {mal_id, name, image_url, about, website_url}
|
||||
self._person_info_cache: dict[int, dict] = {}
|
||||
|
||||
self._last_request_at: float = 0.0
|
||||
self._initialized = True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: ID lookup
|
||||
# ------------------------------------------------------------------
|
||||
def find_id(self, title: str) -> "int | None":
|
||||
"""MediaResolver interface — delegates to find_mal_id."""
|
||||
return self.find_mal_id(title)
|
||||
|
||||
def find_mal_id(self, title: str) -> "int | None":
|
||||
"""
|
||||
Searches MAL for a manga by title and returns the best-matching MAL ID.
|
||||
Returns None on failure or when no result is found.
|
||||
"""
|
||||
if not title or not title.strip():
|
||||
return None
|
||||
|
||||
key = title.strip().lower()
|
||||
if key in self._id_cache:
|
||||
return self._id_cache[key]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/manga",
|
||||
{"q": title, "limit": 5, "type": "lightnovel"})
|
||||
results = data.get("data") or []
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
if not results:
|
||||
self._id_cache[key] = None
|
||||
return None
|
||||
|
||||
results.sort(key=lambda e: _score_title(title, e), reverse=True)
|
||||
mal_id = results[0].get("mal_id")
|
||||
self._id_cache[key] = mal_id
|
||||
return mal_id
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: statistics
|
||||
# ------------------------------------------------------------------
|
||||
def get_stats(self, mal_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns a statistics dict for the given MAL manga ID:
|
||||
|
||||
{score, rank, scored_by, popularity, members, favorites,
|
||||
url, title, as_of (DD-MM-YYYY)}
|
||||
|
||||
Returns None if mal_id is None or on network failure.
|
||||
"""
|
||||
if mal_id is None:
|
||||
return None
|
||||
if mal_id in self._stats_cache:
|
||||
return self._stats_cache[mal_id]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}")
|
||||
entry = data.get("data") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
stats: dict = {
|
||||
"score": entry.get("score"),
|
||||
"rank": entry.get("rank"),
|
||||
"scored_by": entry.get("scored_by"),
|
||||
"popularity": entry.get("popularity"),
|
||||
"members": entry.get("members"),
|
||||
"favorites": entry.get("favorites"),
|
||||
"url": (entry.get("url")
|
||||
or f"https://myanimelist.net/manga/{mal_id}"),
|
||||
"title": entry.get("title") or "",
|
||||
"as_of": datetime.date.today().strftime("%d-%m-%Y"),
|
||||
}
|
||||
self._stats_cache[mal_id] = stats
|
||||
return stats
|
||||
|
||||
def get_stats_for_manga(self, title: str) -> "dict | None":
|
||||
"""Convenience: find MAL ID by title, then return stats."""
|
||||
return self.get_stats(self.find_mal_id(title))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: character names (for ComicInfo <Characters> tag)
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters(self, mal_id: "int | None") -> list[str]:
|
||||
"""
|
||||
Returns a flat list of character names for the manga.
|
||||
Used by ComicInfoBuilder to populate the <Characters> XML element.
|
||||
"""
|
||||
if mal_id is None:
|
||||
return []
|
||||
if mal_id in self._char_names_cache:
|
||||
return self._char_names_cache[mal_id]
|
||||
|
||||
detailed = self.get_characters_detailed(mal_id)
|
||||
names = [e["name"] for e in detailed if e.get("name")]
|
||||
if names:
|
||||
# Only cache a successful result — empty could be a transient
|
||||
# API failure and we want the next call to retry.
|
||||
self._char_names_cache[mal_id] = names
|
||||
return names
|
||||
|
||||
def get_characters_for_manga(self, title: str) -> list[str]:
|
||||
"""Convenience: search by title, then return character names."""
|
||||
return self.get_characters(self.find_mal_id(title))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed character data (for KavitaPersonUpdater)
|
||||
# ------------------------------------------------------------------
|
||||
def get_characters_detailed(self, mal_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed character entries for a manga:
|
||||
[{mal_id, name, image_url, role, about=None}, ...]
|
||||
|
||||
`about` is not populated here; call get_character_details(char_mal_id)
|
||||
to fetch it lazily when needed.
|
||||
"""
|
||||
if mal_id is None:
|
||||
return []
|
||||
if mal_id in self._char_detailed_cache:
|
||||
return self._char_detailed_cache[mal_id]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}/characters")
|
||||
entries = data.get("data") or []
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for entry in entries:
|
||||
char = entry.get("character") or {}
|
||||
raw_name = char.get("name") or ""
|
||||
if not raw_name:
|
||||
continue
|
||||
jpg = (char.get("images") or {}).get("jpg") or {}
|
||||
results.append({
|
||||
"mal_id": char.get("mal_id"),
|
||||
# Cleaned name: "Hibino, Susuki" -> "Susuki Hibino". ComicInfo
|
||||
# <Characters> is comma-separated, so commas in names would
|
||||
# cause Kavita to split a single character into two persons.
|
||||
"name": _clean_mal_name(raw_name),
|
||||
"raw_name": raw_name,
|
||||
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
||||
"role": entry.get("role") or "Supporting",
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._char_detailed_cache[mal_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: detailed staff data (for KavitaPersonUpdater)
|
||||
# ------------------------------------------------------------------
|
||||
def get_staff_detailed(self, mal_id: "int | None") -> list[dict]:
|
||||
"""
|
||||
Returns detailed staff (author) entries for a manga:
|
||||
[{mal_id, name, image_url, positions, about=None}, ...]
|
||||
|
||||
Jikan has no `/manga/{id}/staff` endpoint — that route only exists for
|
||||
anime. For manga the authors are listed on `/manga/{id}` under
|
||||
`data.authors`, but each entry only has {mal_id, name, url}; the image
|
||||
URL is fetched lazily via get_person_details (cached, so the later
|
||||
description fetch is free).
|
||||
"""
|
||||
if mal_id is None:
|
||||
return []
|
||||
if mal_id in self._staff_detailed_cache:
|
||||
return self._staff_detailed_cache[mal_id]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/manga/{mal_id}")
|
||||
entry = data.get("data") or {}
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for author in (entry.get("authors") or []):
|
||||
raw_name = author.get("name") or ""
|
||||
person_mal_id = author.get("mal_id")
|
||||
if not raw_name or person_mal_id is None:
|
||||
continue
|
||||
details = self.get_person_details(person_mal_id) or {}
|
||||
results.append({
|
||||
"mal_id": person_mal_id,
|
||||
"name": _clean_mal_name(raw_name),
|
||||
"raw_name": raw_name,
|
||||
"image_url": details.get("image_url"),
|
||||
"positions": [],
|
||||
"about": None,
|
||||
})
|
||||
|
||||
if results:
|
||||
self._staff_detailed_cache[mal_id] = results
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: individual character / person details (lazy, with description)
|
||||
# ------------------------------------------------------------------
|
||||
def get_character_details(self, char_mal_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns full details for a single MAL character, including `about`.
|
||||
Result is cached.
|
||||
"""
|
||||
if char_mal_id is None:
|
||||
return None
|
||||
if char_mal_id in self._char_info_cache:
|
||||
return self._char_info_cache[char_mal_id]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/characters/{char_mal_id}")
|
||||
entry = data.get("data") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
jpg = (entry.get("images") or {}).get("jpg") or {}
|
||||
result = {
|
||||
"mal_id": entry.get("mal_id"),
|
||||
"name": entry.get("name") or "",
|
||||
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
||||
"about": entry.get("about"),
|
||||
"favorites": entry.get("favorites"),
|
||||
"url": (entry.get("url")
|
||||
or f"https://myanimelist.net/character/{char_mal_id}"),
|
||||
}
|
||||
self._char_info_cache[char_mal_id] = result
|
||||
return result
|
||||
|
||||
def get_person_details(self, person_mal_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns full details for a single MAL person (staff), including `about`.
|
||||
Result is cached.
|
||||
"""
|
||||
if person_mal_id is None:
|
||||
return None
|
||||
if person_mal_id in self._person_info_cache:
|
||||
return self._person_info_cache[person_mal_id]
|
||||
|
||||
try:
|
||||
data = self._get(f"{self.JIKAN_BASE}/people/{person_mal_id}")
|
||||
entry = data.get("data") or {}
|
||||
except requests.RequestException:
|
||||
return None
|
||||
|
||||
jpg = (entry.get("images") or {}).get("jpg") or {}
|
||||
result = {
|
||||
"mal_id": entry.get("mal_id"),
|
||||
"name": entry.get("name") or "",
|
||||
"given_name": entry.get("given_name"),
|
||||
"family_name": entry.get("family_name"),
|
||||
"birthday": entry.get("birthday"),
|
||||
"image_url": jpg.get("image_url") or jpg.get("small_image_url"),
|
||||
"about": entry.get("about"),
|
||||
"favorites": entry.get("favorites"),
|
||||
"website_url": entry.get("website_url"),
|
||||
"url": (entry.get("url")
|
||||
or f"https://myanimelist.net/people/{person_mal_id}"),
|
||||
}
|
||||
self._person_info_cache[person_mal_id] = result
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public: cache management
|
||||
# ------------------------------------------------------------------
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears all internal caches (the Singleton instance is retained)."""
|
||||
self._id_cache.clear()
|
||||
self._stats_cache.clear()
|
||||
self._char_names_cache.clear()
|
||||
self._char_detailed_cache.clear()
|
||||
self._staff_detailed_cache.clear()
|
||||
self._char_info_cache.clear()
|
||||
self._person_info_cache.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal: rate-limited HTTP
|
||||
# ------------------------------------------------------------------
|
||||
def _get(self, url: str, params: "dict | None" = None) -> dict:
|
||||
"""Rate-limited GET request (respects Jikan's ~3 req/s limit)."""
|
||||
elapsed = time.monotonic() - self._last_request_at
|
||||
if elapsed < 0.4:
|
||||
time.sleep(0.4 - elapsed)
|
||||
resp = self._session.get(url, params=params, timeout=self.request_timeout)
|
||||
self._last_request_at = time.monotonic()
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helper
|
||||
# --------------------------------------------------------------------------
|
||||
def _clean_mal_name(name: str) -> str:
|
||||
"""
|
||||
Converts an MAL name into a comma-free, ComicInfo-safe form.
|
||||
|
||||
The ComicInfo <Characters> tag is comma-separated, so a single MAL
|
||||
character "Hibino, Susuki" written into the XML would be parsed by
|
||||
Kavita as two persons ("Hibino" and "Susuki").
|
||||
|
||||
Conversion:
|
||||
"Hibino, Susuki" -> "Susuki Hibino" (Western: First Last)
|
||||
"Yamori, Kou" -> "Kou Yamori"
|
||||
"Kotoyama" -> "Kotoyama" (unchanged)
|
||||
|
||||
Trailing/leading commas and stray whitespace are stripped defensively.
|
||||
"""
|
||||
if not name:
|
||||
return ""
|
||||
name = name.strip()
|
||||
if "," in name:
|
||||
last, _, first = name.partition(",")
|
||||
first = first.strip()
|
||||
last = last.strip()
|
||||
if first and last:
|
||||
return f"{first} {last}"
|
||||
# Fallback: strip any remaining commas
|
||||
return name.replace(",", " ").strip()
|
||||
return name
|
||||
|
||||
|
||||
def _score_title(query: str, entry: dict) -> float:
|
||||
"""Returns the best title-similarity score for a Jikan manga entry."""
|
||||
candidates = [
|
||||
entry.get("title") or "",
|
||||
entry.get("title_english") or "",
|
||||
entry.get("title_japanese") or "",
|
||||
]
|
||||
for alt in (entry.get("titles") or []):
|
||||
candidates.append(alt.get("title") or "")
|
||||
best = 0.0
|
||||
q = query.lower()
|
||||
for t in candidates:
|
||||
if t:
|
||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
||||
best = max(best, ratio)
|
||||
return best
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Usage example
|
||||
# --------------------------------------------------------------------------
|
||||
if __name__ == "__main__":
|
||||
r1 = MALResolver()
|
||||
r2 = MALResolver()
|
||||
assert r1 is r2, "MALResolver must be a Singleton"
|
||||
|
||||
mal_id = r1.find_mal_id("Yofukashi no Uta")
|
||||
print("MAL ID :", mal_id)
|
||||
|
||||
stats = r1.get_stats(mal_id)
|
||||
if stats:
|
||||
print("Score :", stats["score"])
|
||||
print("Rank :", stats["rank"])
|
||||
|
||||
chars = r1.get_characters_detailed(mal_id)
|
||||
print("Characters (first 3):", [c["name"] for c in chars[:3]])
|
||||
|
||||
staff = r1.get_staff_detailed(mal_id)
|
||||
print("Staff :", [s["name"] for s in staff])
|
||||
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
mangabaka_rate_limit.py
|
||||
=======================
|
||||
|
||||
Process-wide rate limiter for the MangaBaka API.
|
||||
|
||||
Apply via:
|
||||
|
||||
from MangaBakaRateLimit import apply_to_session
|
||||
apply_to_session(session)
|
||||
|
||||
This mounts a custom ``requests.adapters.HTTPAdapter`` on the given
|
||||
``requests.Session`` for the ``api.mangabaka.dev`` host. Every request
|
||||
going through that adapter is:
|
||||
|
||||
* throttled so that no two requests are dispatched within
|
||||
``_MIN_INTERVAL`` seconds of one another, and
|
||||
* retried on HTTP 429, honouring the ``Retry-After`` header when
|
||||
present, otherwise exponential backoff capped at ``_MAX_BACKOFF``.
|
||||
|
||||
Throttle state is module-global, so even if several sessions exist in
|
||||
the same process they share one budget — important because they all hit
|
||||
the same upstream IP-based limit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
|
||||
# Tune these if MangaBaka tightens or loosens limits.
|
||||
_MIN_INTERVAL = 1.1 # seconds between consecutive requests
|
||||
_MAX_RETRIES = 6 # retries on 429 before giving up
|
||||
_MAX_BACKOFF = 60.0 # cap on per-attempt backoff sleep
|
||||
|
||||
|
||||
# --- shared throttle state --------------------------------------------------
|
||||
_state_lock = threading.Lock()
|
||||
_last_request_time = 0.0
|
||||
|
||||
|
||||
def _wait_for_slot() -> None:
|
||||
"""Block until the next request slot is available, then reserve it."""
|
||||
global _last_request_time
|
||||
while True:
|
||||
with _state_lock:
|
||||
now = time.monotonic()
|
||||
wait = _MIN_INTERVAL - (now - _last_request_time)
|
||||
if wait <= 0:
|
||||
_last_request_time = now
|
||||
return
|
||||
time.sleep(wait)
|
||||
|
||||
|
||||
class _MangaBakaRateLimitAdapter(HTTPAdapter):
|
||||
def send(self, request, **kwargs):
|
||||
response = None
|
||||
for attempt in range(_MAX_RETRIES + 1):
|
||||
_wait_for_slot()
|
||||
response = super().send(request, **kwargs)
|
||||
if response.status_code != 429:
|
||||
return response
|
||||
|
||||
retry_after = response.headers.get("Retry-After")
|
||||
try:
|
||||
wait = (float(retry_after) if retry_after
|
||||
else min(_MAX_BACKOFF, 2.0 * (2 ** attempt)))
|
||||
except ValueError:
|
||||
wait = min(_MAX_BACKOFF, 2.0 * (2 ** attempt))
|
||||
|
||||
print(f"[MangaBaka] 429 — backing off {wait:.1f}s "
|
||||
f"(attempt {attempt + 1}/{_MAX_RETRIES})",
|
||||
flush=True)
|
||||
response.close()
|
||||
time.sleep(wait)
|
||||
|
||||
# Retries exhausted — let the caller deal with the last 429.
|
||||
return response
|
||||
|
||||
|
||||
def apply_to_session(session) -> None:
|
||||
"""
|
||||
Mount the rate-limit adapter on ``session`` so every MangaBaka call
|
||||
is automatically throttled. Safe to call multiple times (later mounts
|
||||
just replace the earlier adapter for the same prefix).
|
||||
"""
|
||||
adapter = _MangaBakaRateLimitAdapter()
|
||||
session.mount("https://api.mangabaka.dev/", adapter)
|
||||
session.mount("http://api.mangabaka.dev/", adapter)
|
||||
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
mangabaka_works_resolver.py
|
||||
===========================
|
||||
|
||||
Fetches volume-level (work) data from the MangaBaka API.
|
||||
|
||||
Each "work" is a physical tankobon volume and may carry:
|
||||
- volume number
|
||||
- ISBN / GTIN
|
||||
- page count (used for chapter-to-volume estimation)
|
||||
- release date
|
||||
- cover image (raw / default / small variants)
|
||||
|
||||
Only works that have a usable cover are kept in the cache.
|
||||
Works without a cover are discarded at fetch time.
|
||||
If no volume is assigned for a chapter, callers fall back to the
|
||||
default series cover from the series object itself.
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
requests -> pip install requests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class MangaBakaWorksResolver:
|
||||
"""
|
||||
Fetches and caches MangaBaka volume (work) data for a series.
|
||||
Only works that have a cover image are retained in the cache.
|
||||
"""
|
||||
|
||||
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||
request_timeout: int = 30,
|
||||
session: "requests.Session | None" = None):
|
||||
self.api_base_url = api_base_url.rstrip("/")
|
||||
self.request_timeout = request_timeout
|
||||
self._session = session or requests.Session()
|
||||
self._session.headers.setdefault("User-Agent", "MangaBakaWorksResolver/1.0")
|
||||
|
||||
# Cache: series_id (str) -> list of work dicts (only those with covers)
|
||||
self._cache: dict[str, list[dict]] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
def get_works(self, series_id: str) -> list[dict]:
|
||||
"""
|
||||
Returns volume-level works for a series, filtered to those that have
|
||||
a usable cover image. Results are cached per series.
|
||||
|
||||
Pages through the API (limit=50) until the response returns an empty
|
||||
page, collecting all works before applying the cover filter.
|
||||
"""
|
||||
if not series_id:
|
||||
return []
|
||||
|
||||
if series_id in self._cache:
|
||||
return self._cache[series_id]
|
||||
|
||||
all_works: list[dict] = []
|
||||
page = 1
|
||||
try:
|
||||
while True:
|
||||
resp = self._session.get(
|
||||
f"{self.api_base_url}/series/{series_id}/works",
|
||||
params={"limit": 50, "page": page},
|
||||
timeout=self.request_timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
page_data = resp.json().get("data") or []
|
||||
if not page_data:
|
||||
break
|
||||
all_works.extend(page_data)
|
||||
if len(page_data) < 50:
|
||||
break
|
||||
page += 1
|
||||
except requests.RequestException:
|
||||
if not all_works:
|
||||
return []
|
||||
|
||||
# Discard works that carry no usable cover
|
||||
works_with_cover = [w for w in all_works if w.get("images")]
|
||||
self._cache[series_id] = works_with_cover
|
||||
return works_with_cover
|
||||
|
||||
def get_work_for_volume(self, series_id: str, volume) -> "dict | None":
|
||||
"""
|
||||
Returns the work dict for a specific volume number, or None.
|
||||
Volume comparison normalises trailing ".0" (e.g. "1.0" == "1").
|
||||
"""
|
||||
works = self.get_works(series_id)
|
||||
if not works:
|
||||
return None
|
||||
|
||||
target = _norm_vol(volume)
|
||||
for work in works:
|
||||
if _norm_vol(work.get("sequence_string")) == target:
|
||||
return work
|
||||
return None
|
||||
|
||||
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
|
||||
"""Returns the cover URL for a specific volume, or None if not found."""
|
||||
work = self.get_work_for_volume(series_id, volume)
|
||||
if not work:
|
||||
return None
|
||||
return self._pick_cover_url(work.get("images")[0].get("image"))
|
||||
|
||||
def get_page_counts(self, series_id: str) -> "dict[str, int]":
|
||||
"""
|
||||
Returns {volume_str: page_count} for all cached works.
|
||||
Used by MangaDexVolumeResolver for chapter-to-volume estimation.
|
||||
"""
|
||||
result: dict[str, int] = {}
|
||||
for work in self.get_works(series_id):
|
||||
vol = _norm_vol(work.get("volume"))
|
||||
pages = work.get("pages")
|
||||
if vol and pages is not None:
|
||||
try:
|
||||
result[vol] = int(pages)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return result
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears the internal works cache."""
|
||||
self._cache.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------------
|
||||
@staticmethod
|
||||
def _pick_cover_url(cover) -> "str | None":
|
||||
"""
|
||||
Selects the best cover URL from a MangaBaka cover object.
|
||||
|
||||
Real API shape:
|
||||
"raw": {"url": "...", "size": ..., "height": ..., "width": ...}
|
||||
"x150": {"x1": "...", "x2": "...", "x3": "..."}
|
||||
"x250": {...}
|
||||
"x350": {...}
|
||||
|
||||
Order: raw original > x350@x3 > x250@x3 > x150@x3 ...
|
||||
"""
|
||||
if not cover:
|
||||
return None
|
||||
if isinstance(cover, str):
|
||||
return cover
|
||||
if not isinstance(cover, dict):
|
||||
return None
|
||||
|
||||
raw = cover.get("raw")
|
||||
if isinstance(raw, dict):
|
||||
url = raw.get("url")
|
||||
if isinstance(url, str) and url:
|
||||
return url
|
||||
elif isinstance(raw, str) and raw:
|
||||
return raw
|
||||
|
||||
for size_key in ("x350", "x250", "x150"):
|
||||
variant = cover.get(size_key)
|
||||
if isinstance(variant, dict):
|
||||
for density in ("x3", "x2", "x1"):
|
||||
url = variant.get(density)
|
||||
if isinstance(url, str) and url:
|
||||
return url
|
||||
elif isinstance(variant, str) and variant:
|
||||
return variant
|
||||
|
||||
# Last-ditch: any HTTP URL anywhere in the structure
|
||||
for val in cover.values():
|
||||
if isinstance(val, str) and val.startswith("http"):
|
||||
return val
|
||||
if isinstance(val, dict):
|
||||
for sub_val in val.values():
|
||||
if isinstance(sub_val, str) and sub_val.startswith("http"):
|
||||
return sub_val
|
||||
return None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Module helper
|
||||
# --------------------------------------------------------------------------
|
||||
def _norm_vol(value) -> str:
|
||||
"""Normalises a volume identifier: strips whitespace, removes trailing .0."""
|
||||
text = str(value or "").strip()
|
||||
try:
|
||||
f = float(text)
|
||||
if f.is_integer():
|
||||
return str(int(f))
|
||||
except ValueError:
|
||||
pass
|
||||
return text
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
matches_cache.py
|
||||
================
|
||||
|
||||
Persistent JSON cache that maps a Kavita series title to the MangaBaka
|
||||
series it was matched against, plus enough context to update the right
|
||||
Kavita record later.
|
||||
|
||||
Structure on disk::
|
||||
|
||||
{
|
||||
"matches": {
|
||||
"<kavita series name>": {
|
||||
"mangabakaId": "12345",
|
||||
"mangabakaName": "Re:Zero",
|
||||
"imageUrl": "https://.../cover.jpg",
|
||||
"kavitaSeriesId": 42,
|
||||
"libraryId": 3,
|
||||
"firstMatchTime": 1700000000,
|
||||
"lastUpdateTime": 1700100000
|
||||
},
|
||||
...
|
||||
}
|
||||
}
|
||||
|
||||
The cache is the source of truth for the WebUI's matches table and is
|
||||
written back on every mutation so a crash mid-batch does not lose
|
||||
matches that were resolved in the current run.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class MatchesCache:
|
||||
def __init__(self, path):
|
||||
self._path = Path(path)
|
||||
self._lock = threading.RLock()
|
||||
self._data: dict = {"matches": {}}
|
||||
self._load()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public lookup / mutation API
|
||||
# ------------------------------------------------------------------
|
||||
def get(self, title: str) -> "dict | None":
|
||||
with self._lock:
|
||||
entry = self._data["matches"].get(title)
|
||||
return dict(entry) if entry else None
|
||||
|
||||
def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None":
|
||||
with self._lock:
|
||||
for title, entry in self._data["matches"].items():
|
||||
if entry.get("kavitaSeriesId") == kavita_series_id:
|
||||
return title, dict(entry)
|
||||
return None
|
||||
|
||||
def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None":
|
||||
target = str(mangabaka_id) if mangabaka_id is not None else ""
|
||||
if not target:
|
||||
return None
|
||||
with self._lock:
|
||||
for title, entry in self._data["matches"].items():
|
||||
if str(entry.get("mangabakaId") or "") == target:
|
||||
return title, dict(entry)
|
||||
return None
|
||||
|
||||
def upsert(self, title: str, *,
|
||||
mangabaka_id=None,
|
||||
mangabaka_name=None,
|
||||
image_url=None,
|
||||
kavita_series_id=None,
|
||||
library_id=None,
|
||||
first_match_time=None,
|
||||
last_update_time=None) -> dict:
|
||||
"""
|
||||
Inserts or updates an entry. Only fields passed explicitly are
|
||||
modified; the rest are preserved.
|
||||
"""
|
||||
with self._lock:
|
||||
entry = self._data["matches"].get(title)
|
||||
if entry is None:
|
||||
entry = {
|
||||
"mangabakaId": "",
|
||||
"mangabakaName": "",
|
||||
"imageUrl": "",
|
||||
"kavitaSeriesId": 0,
|
||||
"libraryId": 0,
|
||||
"firstMatchTime": int(time.time()),
|
||||
"lastUpdateTime": 0,
|
||||
}
|
||||
self._data["matches"][title] = entry
|
||||
if mangabaka_id is not None:
|
||||
entry["mangabakaId"] = str(mangabaka_id)
|
||||
if mangabaka_name is not None:
|
||||
entry["mangabakaName"] = mangabaka_name
|
||||
if image_url is not None:
|
||||
entry["imageUrl"] = image_url
|
||||
if kavita_series_id is not None:
|
||||
try:
|
||||
entry["kavitaSeriesId"] = int(kavita_series_id)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if library_id is not None:
|
||||
try:
|
||||
entry["libraryId"] = int(library_id)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if first_match_time is not None:
|
||||
try:
|
||||
entry["firstMatchTime"] = int(first_match_time)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if last_update_time is not None:
|
||||
try:
|
||||
entry["lastUpdateTime"] = int(last_update_time)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
self._save_unlocked()
|
||||
return dict(entry)
|
||||
|
||||
def mark_updated(self, title: str) -> None:
|
||||
with self._lock:
|
||||
entry = self._data["matches"].get(title)
|
||||
if entry is not None:
|
||||
entry["lastUpdateTime"] = int(time.time())
|
||||
self._save_unlocked()
|
||||
|
||||
def rename(self, old_title: str, new_title: str) -> bool:
|
||||
if not new_title or old_title == new_title:
|
||||
return False
|
||||
with self._lock:
|
||||
entry = self._data["matches"].pop(old_title, None)
|
||||
if entry is None:
|
||||
return False
|
||||
self._data["matches"][new_title] = entry
|
||||
self._save_unlocked()
|
||||
return True
|
||||
|
||||
def remove(self, title: str) -> bool:
|
||||
with self._lock:
|
||||
existed = title in self._data["matches"]
|
||||
if existed:
|
||||
del self._data["matches"][title]
|
||||
self._save_unlocked()
|
||||
return existed
|
||||
|
||||
def all(self) -> dict:
|
||||
with self._lock:
|
||||
return {"matches": {k: dict(v)
|
||||
for k, v in self._data["matches"].items()}}
|
||||
|
||||
def all_in_libraries(self, library_ids: "list[int] | None") -> dict:
|
||||
"""
|
||||
Returns the cache filtered to entries whose libraryId is in
|
||||
`library_ids`. Pass None to return everything.
|
||||
"""
|
||||
if library_ids is None:
|
||||
return self.all()
|
||||
ids = {int(i) for i in library_ids}
|
||||
with self._lock:
|
||||
return {"matches": {
|
||||
k: dict(v) for k, v in self._data["matches"].items()
|
||||
if int(v.get("libraryId") or 0) in ids
|
||||
}}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal IO
|
||||
# ------------------------------------------------------------------
|
||||
def _load(self) -> None:
|
||||
if not self._path.is_file():
|
||||
return
|
||||
try:
|
||||
with self._path.open("r", encoding="utf-8") as f:
|
||||
loaded = json.load(f)
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
print(f"[MatchesCache] failed to load {self._path}: {exc}",
|
||||
flush=True)
|
||||
return
|
||||
if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
|
||||
self._data = loaded
|
||||
|
||||
def _save_unlocked(self) -> None:
|
||||
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(self._data, f, ensure_ascii=False, indent=2)
|
||||
tmp.replace(self._path)
|
||||
@@ -0,0 +1,757 @@
|
||||
"""
|
||||
matches_web_app.py
|
||||
==================
|
||||
|
||||
Flask web UI for the Kavita light-novel metadata fetcher.
|
||||
|
||||
Pages
|
||||
-----
|
||||
GET / HTML UI (matches table + actions)
|
||||
|
||||
Match cache (JSON)
|
||||
------------------
|
||||
GET /api/libraries Lists Kavita libraries
|
||||
GET /api/matches Full cache, optionally filtered by libraryIds=
|
||||
POST /api/matches Upsert a single match
|
||||
body: {title, mangabakaId}
|
||||
POST /api/matches/delete Remove a match
|
||||
body: {title}
|
||||
|
||||
Background jobs
|
||||
---------------
|
||||
POST /api/build Build matches for libraries
|
||||
body: {libraryIds: [int, ...]}
|
||||
POST /api/update Update a single series
|
||||
body: {kavitaSeriesId}
|
||||
POST /api/update-all Update every cached series in libraries
|
||||
body: {libraryIds: [int, ...] | null}
|
||||
GET /api/status Current background job status (status, log)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from flask import Flask, jsonify, request, Response
|
||||
|
||||
from MatchesCache import MatchesCache
|
||||
from LightNovelMetadataBuilder import pick_thumbnail_url
|
||||
|
||||
|
||||
_INDEX_HTML = r"""<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Kavita light-novel metadata fetcher</title>
|
||||
<style>
|
||||
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
||||
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
||||
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
|
||||
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
|
||||
.bar select[multiple] { background:#222; color:#eee; border:1px solid #444; min-width: 14rem; min-height: 4.2rem; }
|
||||
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
|
||||
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
|
||||
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
|
||||
button.success { background:#15803d; border-color:#15803d; color:white; }
|
||||
button:disabled { opacity:.5; cursor:default; }
|
||||
table { border-collapse: collapse; width: 100%; }
|
||||
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
|
||||
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
|
||||
th.sortable { cursor: pointer; user-select: none; }
|
||||
th.sortable:hover { background:#252525; }
|
||||
th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
|
||||
tr:nth-child(even) td { background: #161616; }
|
||||
td.image img { max-width: 90px; max-height: 130px; display:block; }
|
||||
td.id input { width: 12rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
|
||||
td.title a { color: #60a5fa; text-decoration: none; }
|
||||
td.title a:hover { text-decoration: underline; }
|
||||
td.actions { white-space: nowrap; }
|
||||
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
|
||||
.dirty td { background: #1f2937 !important; }
|
||||
.count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
|
||||
pre.log { background:#0a0a0a; color:#9ca3af; padding:.5rem .75rem; max-height:18rem; overflow:auto; border:1px solid #333; font-size:.8rem; white-space:pre-wrap; }
|
||||
label { font-size:.9rem; color:#9ca3af; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Kavita light-novel metadata fetcher <span id="count" class="count"></span></h1>
|
||||
|
||||
<div class="bar">
|
||||
<label>Libraries
|
||||
<select id="libraries" multiple size="3"></select>
|
||||
</label>
|
||||
<button id="reload">Reload</button>
|
||||
<button id="build">Match all in libraries</button>
|
||||
<button id="updateAll" class="success">Update all in libraries</button>
|
||||
<button id="batchSave" class="primary">Save dirty (0)</button>
|
||||
<span class="status" id="status"></span>
|
||||
</div>
|
||||
|
||||
<div class="bar">
|
||||
<input id="filter" type="search" placeholder="Filter by title…">
|
||||
<span class="count" id="jobStatus"></span>
|
||||
</div>
|
||||
|
||||
<pre id="jobLog" class="log" hidden></pre>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
|
||||
<th>mangabakaId</th>
|
||||
<th>mangabakaName</th>
|
||||
<th>library</th>
|
||||
<th class="sortable" data-col="lastUpdateTime">Last update <span class="arrow" id="arrow-lastUpdateTime"></span></th>
|
||||
<th>Image</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="rows"></tbody>
|
||||
</table>
|
||||
|
||||
<script>
|
||||
const MB_SEARCH = "https://mangabaka.org/search?q=";
|
||||
let matchesData = {};
|
||||
let librariesById = {};
|
||||
let currentSort = { col: "title", asc: true };
|
||||
let jobPollHandle = null;
|
||||
|
||||
function fmtTime(unix) {
|
||||
if (!unix) return "";
|
||||
const d = new Date(unix * 1000);
|
||||
return d.toLocaleString();
|
||||
}
|
||||
|
||||
function setStatus(msg) { document.getElementById("status").textContent = msg; }
|
||||
|
||||
function selectedLibraryIds() {
|
||||
const sel = document.getElementById("libraries");
|
||||
return Array.from(sel.selectedOptions).map(o => parseInt(o.value, 10));
|
||||
}
|
||||
|
||||
function updateDirtyCount() {
|
||||
const n = document.querySelectorAll("#rows tr.dirty").length;
|
||||
const btn = document.getElementById("batchSave");
|
||||
btn.textContent = "Save dirty (" + n + ")";
|
||||
btn.disabled = n === 0;
|
||||
}
|
||||
|
||||
function makeRow(title, e) {
|
||||
const tr = document.createElement("tr");
|
||||
tr.dataset.title = title;
|
||||
|
||||
// Title — links to MangaBaka search
|
||||
const titleTd = document.createElement("td");
|
||||
titleTd.className = "title";
|
||||
const a = document.createElement("a");
|
||||
a.href = MB_SEARCH + encodeURIComponent(title) + "&type=novel";
|
||||
a.target = "_blank";
|
||||
a.rel = "noopener";
|
||||
a.textContent = title;
|
||||
titleTd.appendChild(a);
|
||||
tr.appendChild(titleTd);
|
||||
|
||||
// mangabakaId (editable)
|
||||
const idTd = document.createElement("td");
|
||||
idTd.className = "id";
|
||||
const idInp = document.createElement("input");
|
||||
idInp.value = e.mangabakaId || "";
|
||||
idInp.dataset.original = e.mangabakaId || "";
|
||||
idInp.addEventListener("input", () => {
|
||||
if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
|
||||
else tr.classList.remove("dirty");
|
||||
updateDirtyCount();
|
||||
});
|
||||
idTd.appendChild(idInp);
|
||||
tr.appendChild(idTd);
|
||||
|
||||
// mangabakaName
|
||||
const nameTd = document.createElement("td");
|
||||
nameTd.textContent = e.mangabakaName || "";
|
||||
tr.appendChild(nameTd);
|
||||
|
||||
// library
|
||||
const libTd = document.createElement("td");
|
||||
const libId = e.libraryId || 0;
|
||||
libTd.textContent = librariesById[libId] || (libId ? "#" + libId : "");
|
||||
tr.appendChild(libTd);
|
||||
|
||||
// lastUpdateTime
|
||||
const timeTd = document.createElement("td");
|
||||
timeTd.textContent = e.lastUpdateTime ? fmtTime(e.lastUpdateTime) : "";
|
||||
tr.appendChild(timeTd);
|
||||
|
||||
// Image
|
||||
const imgTd = document.createElement("td");
|
||||
imgTd.className = "image";
|
||||
const img = document.createElement("img");
|
||||
img.src = e.imageUrl || "";
|
||||
img.alt = "";
|
||||
img.loading = "lazy";
|
||||
imgTd.appendChild(img);
|
||||
tr.appendChild(imgTd);
|
||||
|
||||
// Actions
|
||||
const actTd = document.createElement("td");
|
||||
actTd.className = "actions";
|
||||
|
||||
const save = document.createElement("button");
|
||||
save.textContent = "Save";
|
||||
save.className = "primary";
|
||||
save.addEventListener("click", () => saveRow(tr));
|
||||
actTd.appendChild(save);
|
||||
|
||||
const update = document.createElement("button");
|
||||
update.textContent = "Update";
|
||||
update.className = "success";
|
||||
update.style.marginLeft = ".25rem";
|
||||
update.disabled = !e.kavitaSeriesId;
|
||||
update.title = e.kavitaSeriesId
|
||||
? "Push metadata to Kavita series #" + e.kavitaSeriesId
|
||||
: "Run a Match cycle first so we know the Kavita series id";
|
||||
update.addEventListener("click", () => updateRow(tr));
|
||||
actTd.appendChild(update);
|
||||
|
||||
const del = document.createElement("button");
|
||||
del.textContent = "Delete";
|
||||
del.className = "danger";
|
||||
del.style.marginLeft = ".25rem";
|
||||
del.addEventListener("click", () => deleteRow(tr));
|
||||
actTd.appendChild(del);
|
||||
|
||||
tr.appendChild(actTd);
|
||||
|
||||
tr._idInp = idInp;
|
||||
tr._nameTd = nameTd;
|
||||
tr._img = img;
|
||||
tr._timeTd = timeTd;
|
||||
tr._update = update;
|
||||
return tr;
|
||||
}
|
||||
|
||||
async function saveRow(tr) {
|
||||
const title = tr.dataset.title;
|
||||
const newId = tr._idInp.value.trim();
|
||||
setStatus("Saving " + title + "…");
|
||||
try {
|
||||
const r = await fetch("/api/matches", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ title: title, mangabakaId: newId }),
|
||||
});
|
||||
if (!r.ok) throw new Error(await r.text());
|
||||
const data = await r.json();
|
||||
const entry = data.entry || {};
|
||||
matchesData[title] = entry;
|
||||
tr._idInp.value = entry.mangabakaId || "";
|
||||
tr._idInp.dataset.original = entry.mangabakaId || "";
|
||||
tr._nameTd.textContent = entry.mangabakaName || "";
|
||||
tr._img.src = entry.imageUrl || "";
|
||||
tr.classList.remove("dirty");
|
||||
updateDirtyCount();
|
||||
setStatus("Saved " + title);
|
||||
return true;
|
||||
} catch (err) {
|
||||
setStatus("Save failed (" + title + "): " + err.message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function deleteRow(tr) {
|
||||
const title = tr.dataset.title;
|
||||
if (!confirm("Delete " + title + "?")) return;
|
||||
setStatus("Deleting " + title + "…");
|
||||
try {
|
||||
const r = await fetch("/api/matches/delete", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ title: title }),
|
||||
});
|
||||
if (!r.ok) throw new Error(await r.text());
|
||||
delete matchesData[title];
|
||||
tr.remove();
|
||||
document.getElementById("count").textContent =
|
||||
"(" + Object.keys(matchesData).length + " entries)";
|
||||
setStatus("Deleted");
|
||||
} catch (err) {
|
||||
setStatus("Delete failed: " + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function updateRow(tr) {
|
||||
const title = tr.dataset.title;
|
||||
const entry = matchesData[title] || {};
|
||||
if (!entry.kavitaSeriesId) {
|
||||
setStatus("No kavitaSeriesId for " + title + " — run match first");
|
||||
return;
|
||||
}
|
||||
setStatus("Updating " + title + "…");
|
||||
tr._update.disabled = true;
|
||||
try {
|
||||
const r = await fetch("/api/update", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ kavitaSeriesId: entry.kavitaSeriesId }),
|
||||
});
|
||||
if (!r.ok) throw new Error(await r.text());
|
||||
const res = await r.json();
|
||||
setStatus(res.ok ? "Updated " + title : "Update failed: " + res.error);
|
||||
if (res.ok) {
|
||||
entry.lastUpdateTime = Math.floor(Date.now() / 1000);
|
||||
tr._timeTd.textContent = fmtTime(entry.lastUpdateTime);
|
||||
}
|
||||
} catch (err) {
|
||||
setStatus("Update failed: " + err.message);
|
||||
} finally {
|
||||
tr._update.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function batchSave() {
|
||||
const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
|
||||
if (dirty.length === 0) return;
|
||||
if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
|
||||
setStatus("Batch saving " + dirty.length + " rows…");
|
||||
let ok = 0, fail = 0;
|
||||
for (const tr of dirty) {
|
||||
const success = await saveRow(tr);
|
||||
if (success) ok++; else fail++;
|
||||
}
|
||||
setStatus("Batch: " + ok + " ok, " + fail + " failed");
|
||||
}
|
||||
|
||||
function sortedTitles() {
|
||||
const titles = Object.keys(matchesData);
|
||||
const dir = currentSort.asc ? 1 : -1;
|
||||
if (currentSort.col === "title") {
|
||||
return titles.sort((a, b) => a.localeCompare(b) * dir);
|
||||
}
|
||||
if (currentSort.col === "lastUpdateTime") {
|
||||
return titles.sort((a, b) => {
|
||||
const av = matchesData[a].lastUpdateTime || 0;
|
||||
const bv = matchesData[b].lastUpdateTime || 0;
|
||||
return (av - bv) * dir;
|
||||
});
|
||||
}
|
||||
return titles;
|
||||
}
|
||||
|
||||
function updateSortArrows() {
|
||||
for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
|
||||
const id = "arrow-" + currentSort.col;
|
||||
const el = document.getElementById(id);
|
||||
if (el) el.textContent = currentSort.asc ? "▲" : "▼";
|
||||
}
|
||||
|
||||
function applyFilter() {
|
||||
const q = document.getElementById("filter").value.toLowerCase();
|
||||
const libs = new Set(selectedLibraryIds());
|
||||
for (const tr of document.querySelectorAll("#rows tr")) {
|
||||
const title = tr.dataset.title;
|
||||
const entry = matchesData[title] || {};
|
||||
const titleMatch = title.toLowerCase().includes(q);
|
||||
const libMatch = libs.size === 0 || libs.has(entry.libraryId || 0);
|
||||
tr.style.display = (titleMatch && libMatch) ? "" : "none";
|
||||
}
|
||||
}
|
||||
|
||||
function render() {
|
||||
const tbody = document.getElementById("rows");
|
||||
tbody.innerHTML = "";
|
||||
for (const t of sortedTitles()) {
|
||||
tbody.appendChild(makeRow(t, matchesData[t]));
|
||||
}
|
||||
updateSortArrows();
|
||||
applyFilter();
|
||||
updateDirtyCount();
|
||||
document.getElementById("count").textContent =
|
||||
"(" + Object.keys(matchesData).length + " entries)";
|
||||
}
|
||||
|
||||
async function loadLibraries() {
|
||||
try {
|
||||
const r = await fetch("/api/libraries");
|
||||
const data = await r.json();
|
||||
const libs = data.libraries || [];
|
||||
const defaults = new Set(data.defaults || []);
|
||||
librariesById = {};
|
||||
const sel = document.getElementById("libraries");
|
||||
sel.innerHTML = "";
|
||||
for (const lib of libs) {
|
||||
librariesById[lib.id] = lib.name;
|
||||
const opt = document.createElement("option");
|
||||
opt.value = lib.id;
|
||||
opt.textContent = lib.name + " (#" + lib.id + ")";
|
||||
if (defaults.has(lib.id)) opt.selected = true;
|
||||
sel.appendChild(opt);
|
||||
}
|
||||
} catch (err) {
|
||||
setStatus("Failed to load libraries: " + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function load() {
|
||||
setStatus("Loading…");
|
||||
try {
|
||||
const r = await fetch("/api/matches");
|
||||
const data = await r.json();
|
||||
matchesData = data.matches || {};
|
||||
render();
|
||||
setStatus(Object.keys(matchesData).length + " entries");
|
||||
} catch (err) {
|
||||
setStatus("Load failed: " + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function pollJob() {
|
||||
try {
|
||||
const r = await fetch("/api/status");
|
||||
const s = await r.json();
|
||||
const jobStatus = document.getElementById("jobStatus");
|
||||
const jobLog = document.getElementById("jobLog");
|
||||
if (!s.running && !s.lastFinished) {
|
||||
jobStatus.textContent = "";
|
||||
jobLog.hidden = true;
|
||||
stopPolling();
|
||||
return;
|
||||
}
|
||||
jobLog.hidden = false;
|
||||
jobLog.textContent = (s.log || []).join("\n");
|
||||
jobLog.scrollTop = jobLog.scrollHeight;
|
||||
if (s.running) {
|
||||
jobStatus.textContent = "Running: " + (s.label || "");
|
||||
} else {
|
||||
jobStatus.textContent = "Done: " + (s.label || "");
|
||||
stopPolling();
|
||||
load();
|
||||
}
|
||||
} catch (err) {
|
||||
/* keep polling silently */
|
||||
}
|
||||
}
|
||||
|
||||
function startPolling() {
|
||||
if (jobPollHandle) return;
|
||||
jobPollHandle = setInterval(pollJob, 1000);
|
||||
pollJob();
|
||||
}
|
||||
|
||||
function stopPolling() {
|
||||
if (jobPollHandle) clearInterval(jobPollHandle);
|
||||
jobPollHandle = null;
|
||||
}
|
||||
|
||||
async function startBuild() {
|
||||
const libs = selectedLibraryIds();
|
||||
if (libs.length === 0) {
|
||||
setStatus("Pick at least one library");
|
||||
return;
|
||||
}
|
||||
if (!confirm("Match every series in " + libs.length + " library(ies)?")) return;
|
||||
setStatus("Build started");
|
||||
try {
|
||||
const r = await fetch("/api/build", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ libraryIds: libs }),
|
||||
});
|
||||
if (!r.ok) throw new Error(await r.text());
|
||||
startPolling();
|
||||
} catch (err) {
|
||||
setStatus("Build failed: " + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function startUpdateAll() {
|
||||
const libs = selectedLibraryIds();
|
||||
if (libs.length === 0) {
|
||||
if (!confirm("No libraries selected — update every cached series?")) return;
|
||||
} else if (!confirm("Update every cached series in " + libs.length + " library(ies)?")) {
|
||||
return;
|
||||
}
|
||||
setStatus("Update-all started");
|
||||
try {
|
||||
const r = await fetch("/api/update-all", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ libraryIds: libs.length ? libs : null }),
|
||||
});
|
||||
if (!r.ok) throw new Error(await r.text());
|
||||
startPolling();
|
||||
} catch (err) {
|
||||
setStatus("Update-all failed: " + err.message);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById("filter").addEventListener("input", applyFilter);
|
||||
document.getElementById("libraries").addEventListener("change", applyFilter);
|
||||
document.getElementById("reload").addEventListener("click", load);
|
||||
document.getElementById("batchSave").addEventListener("click", batchSave);
|
||||
document.getElementById("build").addEventListener("click", startBuild);
|
||||
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
|
||||
for (const th of document.querySelectorAll("th.sortable")) {
|
||||
th.addEventListener("click", () => {
|
||||
const col = th.dataset.col;
|
||||
if (currentSort.col === col) currentSort.asc = !currentSort.asc;
|
||||
else { currentSort.col = col; currentSort.asc = true; }
|
||||
render();
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
await loadLibraries();
|
||||
await load();
|
||||
// Resume polling if there's a job running from a previous session
|
||||
pollJob();
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class _JobState:
|
||||
"""Thread-safe container for the current background job's progress."""
|
||||
|
||||
def __init__(self):
|
||||
self._lock = threading.Lock()
|
||||
self._running = False
|
||||
self._label = ""
|
||||
self._log: list[str] = []
|
||||
self._last_finished_at = 0
|
||||
self._thread: "threading.Thread | None" = None
|
||||
|
||||
def start(self, label: str, target, *args, **kwargs) -> bool:
|
||||
with self._lock:
|
||||
if self._running:
|
||||
return False
|
||||
self._running = True
|
||||
self._label = label
|
||||
self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"]
|
||||
|
||||
def runner():
|
||||
try:
|
||||
target(self, *args, **kwargs)
|
||||
except Exception as exc:
|
||||
self.append(f"FATAL: {exc}")
|
||||
finally:
|
||||
with self._lock:
|
||||
self._running = False
|
||||
self._last_finished_at = int(time.time())
|
||||
self.append(f"[{time.strftime('%H:%M:%S')}] finished")
|
||||
|
||||
self._thread = threading.Thread(target=runner,
|
||||
name=f"job:{label}",
|
||||
daemon=True)
|
||||
self._thread.start()
|
||||
return True
|
||||
|
||||
def append(self, line: str) -> None:
|
||||
with self._lock:
|
||||
self._log.append(line)
|
||||
# Cap log length so the response stays bounded.
|
||||
if len(self._log) > 1000:
|
||||
self._log = self._log[-800:]
|
||||
|
||||
def snapshot(self) -> dict:
|
||||
with self._lock:
|
||||
return {
|
||||
"running": self._running,
|
||||
"label": self._label,
|
||||
"log": list(self._log),
|
||||
"lastFinished": self._last_finished_at,
|
||||
}
|
||||
|
||||
|
||||
class MatchesWebApp:
|
||||
def __init__(self, cache: MatchesCache, *,
|
||||
orchestrator=None,
|
||||
default_library_ids: "list[int] | None" = None,
|
||||
host: str = "0.0.0.0",
|
||||
port: int = 8080):
|
||||
self._cache = cache
|
||||
self._orchestrator = orchestrator
|
||||
self._defaults = list(default_library_ids or [])
|
||||
self._host = host
|
||||
self._port = port
|
||||
self._job = _JobState()
|
||||
self._app = Flask(__name__)
|
||||
self._thread: "threading.Thread | None" = None
|
||||
self._register_routes()
|
||||
|
||||
@property
|
||||
def app(self) -> Flask:
|
||||
return self._app
|
||||
|
||||
def start(self) -> threading.Thread:
|
||||
if self._thread is not None and self._thread.is_alive():
|
||||
return self._thread
|
||||
self._thread = threading.Thread(
|
||||
target=self._app.run,
|
||||
kwargs={"host": self._host, "port": self._port,
|
||||
"debug": False, "use_reloader": False,
|
||||
"threaded": True},
|
||||
name="MatchesWebApp",
|
||||
daemon=False,
|
||||
)
|
||||
self._thread.start()
|
||||
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
|
||||
flush=True)
|
||||
return self._thread
|
||||
|
||||
def wait(self) -> None:
|
||||
if self._thread is not None:
|
||||
self._thread.join()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Routes
|
||||
# ------------------------------------------------------------------
|
||||
def _register_routes(self) -> None:
|
||||
app = self._app
|
||||
cache = self._cache
|
||||
|
||||
@app.get("/")
|
||||
def index() -> Response:
|
||||
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
|
||||
|
||||
@app.get("/api/libraries")
|
||||
def api_libraries():
|
||||
if self._orchestrator is None:
|
||||
return jsonify([])
|
||||
try:
|
||||
libs = self._orchestrator.list_libraries()
|
||||
except Exception as exc:
|
||||
return Response(f"libraries failed: {exc}", status=502)
|
||||
return jsonify({"libraries": libs, "defaults": self._defaults})
|
||||
|
||||
@app.get("/api/matches")
|
||||
def api_list():
|
||||
raw = request.args.get("libraryIds") or ""
|
||||
lib_ids = [int(p) for p in raw.split(",") if p.strip().isdigit()]
|
||||
if lib_ids:
|
||||
return jsonify(cache.all_in_libraries(lib_ids))
|
||||
return jsonify(cache.all())
|
||||
|
||||
@app.post("/api/matches")
|
||||
def api_upsert():
|
||||
body = request.get_json(silent=True) or {}
|
||||
title = (body.get("title") or "").strip()
|
||||
if not title:
|
||||
return Response("title is required", status=400)
|
||||
new_id_raw = body.get("mangabakaId")
|
||||
new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
|
||||
if not new_id:
|
||||
return Response("mangabakaId is required", status=400)
|
||||
|
||||
new_name: "str | None" = None
|
||||
new_image: "str | None" = None
|
||||
if self._orchestrator is not None:
|
||||
try:
|
||||
series = self._orchestrator.fetch_series(new_id)
|
||||
except Exception as exc:
|
||||
return Response(f"resolve failed: {exc}", status=502)
|
||||
if not series:
|
||||
return Response(
|
||||
f"MangaBaka has no series with id {new_id}",
|
||||
status=404)
|
||||
new_name = series.get("title") or ""
|
||||
new_image = pick_thumbnail_url(series.get("cover")) or ""
|
||||
|
||||
entry = cache.upsert(
|
||||
title,
|
||||
mangabaka_id=new_id,
|
||||
mangabaka_name=new_name,
|
||||
image_url=new_image,
|
||||
)
|
||||
return jsonify({"title": title, "entry": entry})
|
||||
|
||||
@app.post("/api/matches/delete")
|
||||
def api_delete():
|
||||
body = request.get_json(silent=True) or {}
|
||||
title = (body.get("title") or "").strip()
|
||||
if not title:
|
||||
return Response("title is required", status=400)
|
||||
removed = cache.remove(title)
|
||||
return jsonify({"removed": removed, "title": title})
|
||||
|
||||
@app.post("/api/build")
|
||||
def api_build():
|
||||
if self._orchestrator is None:
|
||||
return Response("no orchestrator configured", status=503)
|
||||
body = request.get_json(silent=True) or {}
|
||||
library_ids = [int(i) for i in (body.get("libraryIds") or [])
|
||||
if str(i).strip().lstrip("-").isdigit()]
|
||||
if not library_ids:
|
||||
return Response("libraryIds required", status=400)
|
||||
|
||||
label = f"match libraries {library_ids}"
|
||||
|
||||
def task(job: _JobState, lib_ids):
|
||||
stats = self._orchestrator.build_matches(lib_ids)
|
||||
job.append(f"matched={stats.get('matched')} "
|
||||
f"skipped={stats.get('skipped')} "
|
||||
f"missing={stats.get('missing')} "
|
||||
f"checked={stats.get('checked')}")
|
||||
|
||||
if not self._job.start(label, task, library_ids):
|
||||
return Response("a job is already running", status=409)
|
||||
return jsonify({"started": label})
|
||||
|
||||
@app.post("/api/update")
|
||||
def api_update():
|
||||
if self._orchestrator is None:
|
||||
return Response("no orchestrator configured", status=503)
|
||||
body = request.get_json(silent=True) or {}
|
||||
ksid = body.get("kavitaSeriesId")
|
||||
try:
|
||||
ksid_int = int(ksid)
|
||||
except (TypeError, ValueError):
|
||||
return Response("kavitaSeriesId required", status=400)
|
||||
try:
|
||||
res = self._orchestrator.update_series(ksid_int)
|
||||
except Exception as exc:
|
||||
return Response(f"update failed: {exc}", status=500)
|
||||
return jsonify(res)
|
||||
|
||||
@app.post("/api/update-all")
|
||||
def api_update_all():
|
||||
if self._orchestrator is None:
|
||||
return Response("no orchestrator configured", status=503)
|
||||
body = request.get_json(silent=True) or {}
|
||||
raw = body.get("libraryIds")
|
||||
library_ids: "list[int] | None"
|
||||
if raw is None:
|
||||
library_ids = None
|
||||
else:
|
||||
library_ids = [int(i) for i in raw
|
||||
if str(i).strip().lstrip("-").isdigit()]
|
||||
|
||||
label = ("update all (every library)" if library_ids is None
|
||||
else f"update all in libraries {library_ids}")
|
||||
|
||||
def task(job: _JobState, lib_ids):
|
||||
summary = self._orchestrator.update_all(lib_ids)
|
||||
job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}")
|
||||
for res in summary.get("results", []):
|
||||
title = res.get("title", "?")
|
||||
if res.get("ok"):
|
||||
flags = []
|
||||
sr = res.get("series") or {}
|
||||
for k, v in sr.items():
|
||||
if v == "changed":
|
||||
flags.append(k)
|
||||
job.append(
|
||||
f" {title}: changed=[{', '.join(flags) or '-'}]")
|
||||
else:
|
||||
job.append(f" {title}: FAIL {res.get('error')}")
|
||||
|
||||
if not self._job.start(label, task, library_ids):
|
||||
return Response("a job is already running", status=409)
|
||||
return jsonify({"started": label})
|
||||
|
||||
@app.get("/api/status")
|
||||
def api_status():
|
||||
snap = self._job.snapshot()
|
||||
snap["defaults"] = self._defaults
|
||||
return jsonify(snap)
|
||||
@@ -0,0 +1,91 @@
|
||||
"""
|
||||
media_resolver.py
|
||||
=================
|
||||
|
||||
Abstract base class for tracker-specific manga metadata resolvers.
|
||||
|
||||
Concrete implementations (MALResolver, AniListResolver) must implement
|
||||
every abstract method, ensuring a uniform interface regardless of the
|
||||
underlying data source (Jikan/MAL, AniList GraphQL, …).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class MediaResolver(ABC):
|
||||
"""
|
||||
Abstract base for tracker-specific manga metadata resolvers.
|
||||
|
||||
Subclasses connect to a specific tracker API and expose a common
|
||||
interface for:
|
||||
- Searching a manga by title → tracker-specific numeric ID
|
||||
- Fetching summary statistics (score, rank, popularity, …)
|
||||
- Listing characters and staff (name-only and detailed forms)
|
||||
- Fetching full details for a single character or person
|
||||
|
||||
Methods that accept a tracker ID treat None as "unknown" and return
|
||||
a safe empty value rather than raising.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def find_id(self, title: str) -> "int | None":
|
||||
"""
|
||||
Searches the tracker for a manga by title.
|
||||
Returns the best-matching tracker ID, or None on failure.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_stats(self, tracker_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns a statistics dict for the given tracker ID:
|
||||
|
||||
{score, rank, scored_by, popularity, members, favorites,
|
||||
url, title, as_of (DD-MM-YYYY)}
|
||||
|
||||
Returns None if tracker_id is None or on network failure.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_characters(self, tracker_id: "int | None") -> "list[str]":
|
||||
"""
|
||||
Returns a flat list of character name strings for the manga.
|
||||
Used to populate the ComicInfo <Characters> XML element.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_characters_detailed(self, tracker_id: "int | None") -> "list[dict]":
|
||||
"""
|
||||
Returns detailed character entries for a manga:
|
||||
[{id, name, image_url, role, about=None, ...}, ...]
|
||||
|
||||
'about' is not populated here; call get_character_details() lazily.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_staff_detailed(self, tracker_id: "int | None") -> "list[dict]":
|
||||
"""
|
||||
Returns detailed staff/author entries for a manga:
|
||||
[{id, name, image_url, positions, about=None, ...}, ...]
|
||||
|
||||
'about' is not populated here; call get_person_details() lazily.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_character_details(self, char_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns full details for a single character, including description.
|
||||
Implementations should cache the result.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_person_details(self, person_id: "int | None") -> "dict | None":
|
||||
"""
|
||||
Returns full details for a single person (staff), including description.
|
||||
Implementations should cache the result.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def clear_cache(self) -> None:
|
||||
"""Clears all internal caches."""
|
||||
@@ -0,0 +1,174 @@
|
||||
"""
|
||||
relationship_sync.py
|
||||
====================
|
||||
|
||||
Mirrors MangaBaka's ``relationships_v2`` graph into Kavita:
|
||||
|
||||
1. Every related MangaBaka series that is *also* present in Kavita
|
||||
(resolved via MatchesCache) is added to a shared Kavita collection
|
||||
so the whole franchise can be browsed in one place.
|
||||
2. Series-level relationships (prequel / sequel / spin-off / …) are
|
||||
written via ``POST /api/Series/update-related`` so navigating
|
||||
between entries surfaces the right neighbours.
|
||||
|
||||
Only relationships where both endpoints exist in Kavita are written.
|
||||
Relationships pointing to series that have not been imported yet are
|
||||
silently skipped (the next match run picks them up).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from KavitaClient import KavitaClient
|
||||
from MatchesCache import MatchesCache
|
||||
|
||||
|
||||
# MangaBaka relation_type -> Kavita UpdateRelatedSeriesDto bucket
|
||||
_RELATION_MAP = {
|
||||
"prequel": "prequels",
|
||||
"sequel": "sequels",
|
||||
"side_story": "sideStories",
|
||||
"spin_off": "spinOffs",
|
||||
"spinoff": "spinOffs",
|
||||
"alternative_version": "alternativeVersions",
|
||||
"alternative_story": "alternativeVersions",
|
||||
"alternative_setting": "alternativeSettings",
|
||||
"adapted_from": "adaptations",
|
||||
"adaptation": "adaptations",
|
||||
"doujinshi": "doujinshis",
|
||||
"parent": "contains", # the parent "contains" the child
|
||||
}
|
||||
|
||||
_ALL_BUCKETS = (
|
||||
"adaptations", "characters", "contains", "others",
|
||||
"prequels", "sequels", "sideStories", "spinOffs",
|
||||
"alternativeSettings", "alternativeVersions", "doujinshis",
|
||||
"editions", "annuals",
|
||||
)
|
||||
|
||||
|
||||
class RelationshipSync:
|
||||
def __init__(self, client: KavitaClient, cache: MatchesCache, *,
|
||||
builder=None):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
client : KavitaClient for collection / relation writes.
|
||||
cache : MatchesCache to resolve mangabakaId -> kavitaSeriesId.
|
||||
builder : optional LightNovelMetadataBuilder used to fetch parent
|
||||
series titles when picking the collection name.
|
||||
"""
|
||||
self._client = client
|
||||
self._cache = cache
|
||||
self._builder = builder
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public
|
||||
# ------------------------------------------------------------------
|
||||
def sync(self, kavita_series_id: int, built: dict) -> dict:
|
||||
"""
|
||||
Applies the relationship and collection links described by
|
||||
`built["relationships"]` (raw MangaBaka relationships_v2 list)
|
||||
for the given Kavita series. Returns a small status dict.
|
||||
"""
|
||||
report: dict = {"relations": {}, "collection": None,
|
||||
"missing_series": []}
|
||||
|
||||
relationships = built.get("relationships") or []
|
||||
if not relationships:
|
||||
return report
|
||||
|
||||
# Resolve mangabakaId -> kavitaSeriesId for every related entry.
|
||||
related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS}
|
||||
all_kavita_ids: set[int] = set()
|
||||
for rel in relationships:
|
||||
mb_id = rel.get("to_series_id")
|
||||
if mb_id is None:
|
||||
continue
|
||||
hit = self._cache.get_by_mangabaka_id(mb_id)
|
||||
if not hit:
|
||||
report["missing_series"].append(int(mb_id))
|
||||
continue
|
||||
_title, entry = hit
|
||||
ksid = int(entry.get("kavitaSeriesId") or 0)
|
||||
if not ksid:
|
||||
report["missing_series"].append(int(mb_id))
|
||||
continue
|
||||
bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(),
|
||||
"others")
|
||||
if ksid not in related[bucket]:
|
||||
related[bucket].append(ksid)
|
||||
all_kavita_ids.add(ksid)
|
||||
|
||||
# ----- Relationships ------------------------------------------
|
||||
if any(related.values()):
|
||||
payload = {"seriesId": int(kavita_series_id)}
|
||||
for bucket in _ALL_BUCKETS:
|
||||
payload[bucket] = related[bucket]
|
||||
try:
|
||||
self._client.update_related(payload)
|
||||
report["relations"] = {k: v for k, v in related.items() if v}
|
||||
except Exception as exc:
|
||||
report["relations"] = {"error": str(exc)}
|
||||
|
||||
# ----- Collection ---------------------------------------------
|
||||
# Include the current series in the collection so it shows up too.
|
||||
all_kavita_ids.add(int(kavita_series_id))
|
||||
if len(all_kavita_ids) >= 2:
|
||||
collection_name = self._collection_name(built, relationships)
|
||||
collection_id = self._find_collection_id(collection_name)
|
||||
try:
|
||||
self._client.add_series_to_collection(
|
||||
collection_id=collection_id,
|
||||
title=collection_name,
|
||||
series_ids=sorted(all_kavita_ids),
|
||||
)
|
||||
report["collection"] = collection_name
|
||||
except Exception as exc:
|
||||
report["collection"] = f"error: {exc}"
|
||||
|
||||
return report
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal
|
||||
# ------------------------------------------------------------------
|
||||
def _find_collection_id(self, name: str) -> int:
|
||||
"""Returns the id of an existing collection by title, or 0 to create."""
|
||||
if not name:
|
||||
return 0
|
||||
target = name.strip().lower()
|
||||
try:
|
||||
for col in self._client.list_collections():
|
||||
if (col.get("title") or "").strip().lower() == target:
|
||||
try:
|
||||
return int(col.get("id") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
def _collection_name(self, built: dict,
|
||||
relationships: list[dict]) -> str:
|
||||
"""
|
||||
Picks the collection name. Uses the parent series title from
|
||||
MangaBaka if the current series has one; otherwise falls back to
|
||||
the current series' own title.
|
||||
"""
|
||||
for rel in relationships:
|
||||
if (rel.get("relation_type") or "").lower() == "parent":
|
||||
parent_id = rel.get("to_series_id")
|
||||
if parent_id is not None and self._builder is not None:
|
||||
try:
|
||||
parent_md = self._builder.fetch_series(parent_id)
|
||||
if parent_md and parent_md.get("title"):
|
||||
return parent_md["title"]
|
||||
except Exception:
|
||||
pass
|
||||
# Even without a builder, the cache may know the parent.
|
||||
hit = self._cache.get_by_mangabaka_id(parent_id)
|
||||
if hit:
|
||||
_title, entry = hit
|
||||
name = entry.get("mangabakaName")
|
||||
if name:
|
||||
return name
|
||||
return built.get("mangabakaTitle") or ""
|
||||
Reference in New Issue
Block a user