improvements
This commit is contained in:
@@ -0,0 +1,34 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- "v[0-9]+.[0-9]+.[0-9]+"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
run: |
|
||||||
|
git clone ${{ github.server_url }}/${{ github.repository }}.git .
|
||||||
|
git checkout ${{ github.sha }}
|
||||||
|
|
||||||
|
- name: Login to Gitea Registry
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
||||||
|
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||||
|
|
||||||
|
- name: Build Image
|
||||||
|
run: |
|
||||||
|
VERSION="${GITHUB_REF_NAME#v}"
|
||||||
|
docker build \
|
||||||
|
-t gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${VERSION} \
|
||||||
|
-t gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${GITHUB_REF_NAME} \
|
||||||
|
.
|
||||||
|
|
||||||
|
- name: Push Image
|
||||||
|
run: |
|
||||||
|
VERSION="${GITHUB_REF_NAME#v}"
|
||||||
|
docker push gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${VERSION}
|
||||||
|
docker push gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${GITHUB_REF_NAME}
|
||||||
@@ -38,12 +38,15 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Make src/ importable when running as `python main.py`.
|
# Make src/ importable when running as `python main.py`. Import the
|
||||||
|
# modules by their plain names (not `src.X`) so they are the same module
|
||||||
|
# objects the src-internal imports resolve to — `src.X` would load every
|
||||||
|
# module twice under two names.
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
|
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
|
||||||
|
|
||||||
from src.MatchesCache import MatchesCache # noqa: E402
|
from MatchesCache import MatchesCache # noqa: E402
|
||||||
from src.LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
||||||
from src.MatchesWebApp import MatchesWebApp # noqa: E402
|
from MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: "str | None" = None,
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
|
|||||||
+6
-13
@@ -32,12 +32,12 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from MediaResolver import MediaResolver
|
from MediaResolver import MediaResolver
|
||||||
|
from TextUtils import best_similarity
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
@@ -469,18 +469,11 @@ class AniListResolver(MediaResolver):
|
|||||||
def _score_title(query: str, entry: dict) -> float:
|
def _score_title(query: str, entry: dict) -> float:
|
||||||
"""Returns the best title-similarity score for an AniList media entry."""
|
"""Returns the best title-similarity score for an AniList media entry."""
|
||||||
title_obj = entry.get("title") or {}
|
title_obj = entry.get("title") or {}
|
||||||
candidates = [
|
return best_similarity(query, (
|
||||||
title_obj.get("romaji") or "",
|
title_obj.get("romaji"),
|
||||||
title_obj.get("english") or "",
|
title_obj.get("english"),
|
||||||
title_obj.get("native") or "",
|
title_obj.get("native"),
|
||||||
]
|
))
|
||||||
best = 0.0
|
|
||||||
q = query.lower()
|
|
||||||
for t in candidates:
|
|
||||||
if t:
|
|
||||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
|
||||||
best = max(best, ratio)
|
|
||||||
return best
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
|
|||||||
+52
-9
@@ -194,25 +194,56 @@ class KavitaClient:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Series cover upload
|
# Persons
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def search_persons(self, name: str) -> list[dict]:
|
||||||
|
"""Returns PersonDto entries matching `name` (Kavita's own search)."""
|
||||||
|
r = self._session.get(
|
||||||
|
f"{self._base}/api/Person/search",
|
||||||
|
params={"queryString": name}, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or []
|
||||||
|
|
||||||
|
def update_person(self, payload: dict) -> None:
|
||||||
|
"""Writes a person record (malId, aniListId, description, …)."""
|
||||||
|
r = self._session.post(f"{self._base}/api/Person/update",
|
||||||
|
json=payload, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cover uploads
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def upload_series_cover(self, series_id: int, image_url: str, *,
|
def upload_series_cover(self, series_id: int, image_url: str, *,
|
||||||
lock: bool = False) -> None:
|
lock: bool = False) -> None:
|
||||||
"""
|
"""Downloads an external image and uploads it as the series cover."""
|
||||||
Downloads an external image and uploads it as the series cover.
|
self._upload_cover("/api/Upload/series", series_id, image_url, lock)
|
||||||
|
|
||||||
Mirrors the cover-upload trick used in KavitaPersonUpdater:
|
def upload_person_cover(self, person_id: int, image_url: str, *,
|
||||||
Kavita's `/api/Upload/series` accepts a raw base64 blob (no
|
lock: bool = False) -> None:
|
||||||
``data:`` prefix) in the ``url`` field.
|
"""Downloads an external image and uploads it as a person cover."""
|
||||||
|
self._upload_cover("/api/Upload/person", person_id, image_url, lock)
|
||||||
|
|
||||||
|
def _upload_cover(self, endpoint: str, entity_id: int,
|
||||||
|
image_url: str, lock: bool) -> None:
|
||||||
|
"""
|
||||||
|
Shared cover-upload path. Kavita's upload endpoints accept a raw
|
||||||
|
base64 blob (no ``data:`` prefix) in the ``url`` field — a data
|
||||||
|
URI or the two-step upload-by-url flow are rejected with HTTP 400
|
||||||
|
(verified against Kavita 0.9.0.2).
|
||||||
"""
|
"""
|
||||||
img = self._image_session.get(image_url, timeout=self._timeout)
|
img = self._image_session.get(image_url, timeout=self._timeout)
|
||||||
img.raise_for_status()
|
img.raise_for_status()
|
||||||
b64 = base64.b64encode(img.content).decode()
|
b64 = base64.b64encode(img.content).decode()
|
||||||
r = self._session.post(
|
r = self._session.post(
|
||||||
f"{self._base}/api/Upload/series",
|
f"{self._base}{endpoint}",
|
||||||
json={"id": series_id, "url": b64, "lockCover": lock},
|
json={"id": entity_id, "url": b64, "lockCover": lock},
|
||||||
timeout=self._timeout)
|
timeout=self._timeout)
|
||||||
r.raise_for_status()
|
if r.status_code >= 400:
|
||||||
|
# Include the body excerpt — Kavita's upload errors carry the
|
||||||
|
# actual reason there, not in the status line.
|
||||||
|
raise requests.HTTPError(
|
||||||
|
f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
|
||||||
|
response=r)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Generic GET helper (used by callers that need a response object)
|
# Generic GET helper (used by callers that need a response object)
|
||||||
@@ -227,3 +258,15 @@ class KavitaClient:
|
|||||||
return self._session.post(f"{self._base}{path}",
|
return self._session.post(f"{self._base}{path}",
|
||||||
json=json, params=params,
|
json=json, params=params,
|
||||||
timeout=self._timeout)
|
timeout=self._timeout)
|
||||||
|
|
||||||
|
|
||||||
|
def _short_body(resp: requests.Response, limit: int = 400) -> str:
|
||||||
|
"""Returns the response body trimmed to `limit` chars for error messages."""
|
||||||
|
try:
|
||||||
|
text = resp.text or ""
|
||||||
|
except Exception:
|
||||||
|
return "<unreadable response body>"
|
||||||
|
text = text.strip().replace("\n", " ").replace("\r", " ")
|
||||||
|
if len(text) > limit:
|
||||||
|
text = text[:limit] + "…"
|
||||||
|
return text or "<empty body>"
|
||||||
|
|||||||
+30
-171
@@ -15,46 +15,22 @@ the updater:
|
|||||||
an 'about' text (requires an extra Jikan request per character; only
|
an 'about' text (requires an extra Jikan request per character; only
|
||||||
performed when update_descriptions=True).
|
performed when update_descriptions=True).
|
||||||
|
|
||||||
Kavita API version
|
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
|
||||||
------------------
|
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
|
||||||
|
|
||||||
Tested against Kavita 0.9.0.2.
|
Tested against Kavita 0.9.0.2.
|
||||||
|
|
||||||
Authentication
|
|
||||||
--------------
|
|
||||||
Uses the `x-api-key` header (API key from Kavita user settings).
|
|
||||||
No JWT login is required.
|
|
||||||
|
|
||||||
Relevant endpoints (Kavita 0.9.0.2)
|
|
||||||
-------------------------------------
|
|
||||||
GET /api/Person/search find persons by name / alias
|
|
||||||
POST /api/Person/update write metadata (malId, description, …)
|
|
||||||
POST /api/Upload/person set cover image (base64 data URI)
|
|
||||||
POST /api/Upload/upload-by-url download an external URL to temp storage
|
|
||||||
(used as an alternative upload path)
|
|
||||||
|
|
||||||
Cover upload flow
|
|
||||||
-----------------
|
|
||||||
The image is downloaded locally, base64-encoded, and sent as a data URI
|
|
||||||
to POST /api/Upload/person. This is more reliable than the
|
|
||||||
upload-by-url → upload/person two-step because it avoids Kavita's temp
|
|
||||||
file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
|
|
||||||
|
|
||||||
Dependencies
|
|
||||||
------------
|
|
||||||
requests -> pip install requests
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import re
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
|
from TextUtils import best_similarity, paragraphs_to_html
|
||||||
|
|
||||||
|
|
||||||
class KavitaPersonUpdater:
|
class KavitaPersonUpdater:
|
||||||
@@ -63,41 +39,22 @@ class KavitaPersonUpdater:
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
|
client : Shared KavitaClient (session, auth, cover uploads)
|
||||||
api_key : Kavita API key (Settings → User → API key)
|
|
||||||
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
||||||
request_timeout : HTTP timeout in seconds for both Kavita and image requests
|
al_resolver : Shared AniListResolver singleton (created automatically if omitted)
|
||||||
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
||||||
Kavita person as a match for a MAL name. Default 0.80.
|
Kavita person as a match for a MAL name. Default 0.80.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, kavita_base_url: str, api_key: str, *,
|
def __init__(self, client: KavitaClient, *,
|
||||||
mal_resolver: "MALResolver | None" = None,
|
mal_resolver: "MALResolver | None" = None,
|
||||||
al_resolver: "AniListResolver | None" = None,
|
al_resolver: "AniListResolver | None" = None,
|
||||||
request_timeout: int = 30,
|
|
||||||
min_name_score: float = 0.80):
|
min_name_score: float = 0.80):
|
||||||
self._base = kavita_base_url.rstrip("/")
|
self._client = client
|
||||||
self._timeout = request_timeout
|
|
||||||
self._min_score = min_name_score
|
self._min_score = min_name_score
|
||||||
self._mal = mal_resolver or MALResolver()
|
self._mal = mal_resolver or MALResolver()
|
||||||
self._al = al_resolver or AniListResolver()
|
self._al = al_resolver or AniListResolver()
|
||||||
|
|
||||||
# Session used for Kavita API calls.
|
|
||||||
self._session = requests.Session()
|
|
||||||
self._session.headers.update({
|
|
||||||
"x-api-key": api_key,
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Accept": "application/json",
|
|
||||||
})
|
|
||||||
|
|
||||||
# Plain session used to download external images (MAL CDN etc.).
|
|
||||||
# Must NOT carry the Kavita API headers — Accept: application/json
|
|
||||||
# would prevent MAL CDN from returning the image bytes.
|
|
||||||
self._image_session = requests.Session()
|
|
||||||
self._image_session.headers.update({
|
|
||||||
"User-Agent": "KavitaPersonUpdater/1.0",
|
|
||||||
})
|
|
||||||
|
|
||||||
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
||||||
self._person_search_cache: dict[str, list[dict]] = {}
|
self._person_search_cache: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
@@ -230,29 +187,17 @@ class KavitaPersonUpdater:
|
|||||||
return self._person_search_cache[key]
|
return self._person_search_cache[key]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = self._session.get(
|
persons = self._client.search_persons(name)
|
||||||
f"{self._base}/api/Person/search",
|
|
||||||
params={"queryString": name},
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
persons: list[dict] = resp.json() or []
|
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
self._person_search_cache[key] = []
|
self._person_search_cache[key] = []
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def score(p: dict) -> float:
|
scored = []
|
||||||
candidates = [p.get("name") or ""]
|
for p in persons:
|
||||||
candidates += [a for a in (p.get("aliases") or []) if a]
|
candidates = [p.get("name")] + list(p.get("aliases") or [])
|
||||||
best = 0.0
|
scored.append((best_similarity(key, candidates), p))
|
||||||
q = key
|
scored.sort(key=lambda pair: pair[0], reverse=True)
|
||||||
for c in candidates:
|
filtered = [p for score, p in scored if score >= self._min_score]
|
||||||
r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
|
|
||||||
best = max(best, r)
|
|
||||||
return best
|
|
||||||
|
|
||||||
ranked = sorted(persons, key=score, reverse=True)
|
|
||||||
filtered = [p for p in ranked if score(p) >= self._min_score]
|
|
||||||
self._person_search_cache[key] = filtered
|
self._person_search_cache[key] = filtered
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
@@ -323,12 +268,7 @@ class KavitaPersonUpdater:
|
|||||||
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
resp = self._session.post(
|
self._client.update_person(payload)
|
||||||
f"{self._base}/api/Person/update",
|
|
||||||
json=payload,
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
changed = True
|
changed = True
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
if errors is not None:
|
if errors is not None:
|
||||||
@@ -350,88 +290,21 @@ class KavitaPersonUpdater:
|
|||||||
and bool(person.get("coverImage"))
|
and bool(person.get("coverImage"))
|
||||||
)
|
)
|
||||||
if image_url and not already_uploaded:
|
if image_url and not already_uploaded:
|
||||||
if self._upload_cover(person_id, image_url,
|
try:
|
||||||
person_name=person_name,
|
self._client.upload_person_cover(person_id, image_url)
|
||||||
errors=errors):
|
|
||||||
changed = True
|
changed = True
|
||||||
|
except requests.RequestException as e:
|
||||||
|
if errors is not None:
|
||||||
|
errors.append(
|
||||||
|
f"cover upload failed for #{person_id} "
|
||||||
|
f"'{person_name}' ({image_url}): {e}")
|
||||||
|
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Internal: cover upload
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def _upload_cover(self, person_id: int, image_url: str,
|
|
||||||
lock: bool = False, *,
|
|
||||||
person_name: str = "",
|
|
||||||
errors: "list | None" = None) -> bool:
|
|
||||||
"""
|
|
||||||
Uploads a cover image to a Kavita person.
|
|
||||||
|
|
||||||
The image is downloaded with the plain (header-less) image session
|
|
||||||
and posted to `POST /api/Upload/person` as a raw base64 string in
|
|
||||||
the `url` field.
|
|
||||||
|
|
||||||
Notes on protocol quirks discovered against Kavita 0.9.0.2:
|
|
||||||
- The two-step `upload-by-url` -> `Upload/person` flow returns
|
|
||||||
"Unable to save cover image to Person" (HTTP 400).
|
|
||||||
- A `data:image/jpeg;base64,...` data URI is rejected with the
|
|
||||||
same error.
|
|
||||||
- Only the raw base64 blob (no prefix) is accepted.
|
|
||||||
"""
|
|
||||||
label = (f"#{person_id} '{person_name}'"
|
|
||||||
if person_name else f"#{person_id}")
|
|
||||||
|
|
||||||
# 1) Download the image with a clean session — the Kavita session's
|
|
||||||
# `Accept: application/json` header makes some CDNs refuse to
|
|
||||||
# return image bytes.
|
|
||||||
try:
|
|
||||||
img_resp = self._image_session.get(image_url,
|
|
||||||
timeout=self._timeout)
|
|
||||||
img_resp.raise_for_status()
|
|
||||||
except requests.RequestException as e:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"image download failed for {label} ({image_url}): {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
b64 = base64.b64encode(img_resp.content).decode()
|
|
||||||
|
|
||||||
# 2) POST the raw base64 blob.
|
|
||||||
try:
|
|
||||||
resp = self._session.post(
|
|
||||||
f"{self._base}/api/Upload/person",
|
|
||||||
json={"id": person_id, "url": b64, "lockCover": lock},
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
if resp.status_code >= 400:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"Upload/person HTTP {resp.status_code} for {label}: "
|
|
||||||
f"{_short_body(resp)}")
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
except requests.RequestException as e:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"Upload/person failed for {label}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Module helpers: description builders
|
# Module helpers: description builders
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def _plain_to_html(text: str) -> str:
|
|
||||||
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
|
||||||
if not text:
|
|
||||||
return ""
|
|
||||||
parts: list[str] = []
|
|
||||||
for para in re.split(r"\n{2,}", text.strip()):
|
|
||||||
para = para.strip()
|
|
||||||
if para:
|
|
||||||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
|
||||||
return "".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def _format_birthday(birthday: str) -> str:
|
def _format_birthday(birthday: str) -> str:
|
||||||
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
||||||
if not birthday:
|
if not birthday:
|
||||||
@@ -457,7 +330,7 @@ def _build_character_description(details: dict) -> str:
|
|||||||
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
||||||
about = (details.get("about") or "").strip()
|
about = (details.get("about") or "").strip()
|
||||||
if about:
|
if about:
|
||||||
parts.append(_plain_to_html(about))
|
parts.append(paragraphs_to_html(about))
|
||||||
return "<br>".join(parts)
|
return "<br>".join(parts)
|
||||||
|
|
||||||
|
|
||||||
@@ -501,33 +374,19 @@ def _build_person_description(details: dict) -> str:
|
|||||||
parts.append(f'<table>{"".join(rows)}</table>')
|
parts.append(f'<table>{"".join(rows)}</table>')
|
||||||
about = (details.get("about") or "").strip()
|
about = (details.get("about") or "").strip()
|
||||||
if about:
|
if about:
|
||||||
parts.append(_plain_to_html(about))
|
parts.append(paragraphs_to_html(about))
|
||||||
return "<br>".join(parts)
|
return "<br>".join(parts)
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
|
||||||
# Module helper
|
|
||||||
# --------------------------------------------------------------------------
|
|
||||||
def _short_body(resp: requests.Response, limit: int = 400) -> str:
|
|
||||||
"""Returns the response body trimmed to `limit` chars for error logging."""
|
|
||||||
try:
|
|
||||||
text = resp.text or ""
|
|
||||||
except Exception:
|
|
||||||
return "<unreadable response body>"
|
|
||||||
text = text.strip().replace("\n", " ").replace("\r", " ")
|
|
||||||
if len(text) > limit:
|
|
||||||
text = text[:limit] + "…"
|
|
||||||
return text or "<empty body>"
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Usage example
|
# Usage example
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
KAVITA_URL = "http://192.168.2.2:5000"
|
import os
|
||||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
|
||||||
|
|
||||||
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
|
client = KavitaClient(os.environ["KAVITA_URL"],
|
||||||
|
os.environ["KAVITA_API_KEY"])
|
||||||
|
updater = KavitaPersonUpdater(client)
|
||||||
|
|
||||||
mal = MALResolver()
|
mal = MALResolver()
|
||||||
mal_id = mal.find_mal_id("よふかしのうた")
|
mal_id = mal.find_mal_id("よふかしのうた")
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
|||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
|
from TextUtils import paragraphs_to_html
|
||||||
|
|
||||||
|
|
||||||
# MangaBaka series type for the search endpoint.
|
# MangaBaka series type for the search endpoint.
|
||||||
@@ -92,12 +93,7 @@ def _md_to_html(text: str) -> str:
|
|||||||
)
|
)
|
||||||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
|
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
|
||||||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
|
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
|
||||||
parts: list[str] = []
|
return paragraphs_to_html(text)
|
||||||
for para in re.split(r'\n{2,}', text.strip()):
|
|
||||||
para = para.strip()
|
|
||||||
if para:
|
|
||||||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
|
||||||
return "".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def pick_cover_url(cover) -> "str | None":
|
def pick_cover_url(cover) -> "str | None":
|
||||||
@@ -220,16 +216,25 @@ class LightNovelMetadataBuilder:
|
|||||||
return data[0] if data else None
|
return data[0] if data else None
|
||||||
|
|
||||||
def fetch_series(self, series_id) -> "dict | None":
|
def fetch_series(self, series_id) -> "dict | None":
|
||||||
"""Returns the full MangaBaka series dict for the given id."""
|
"""
|
||||||
|
Returns the full MangaBaka series dict for the given id, following
|
||||||
|
``merged_with`` redirects. A seen-set guards against merge cycles.
|
||||||
|
"""
|
||||||
if series_id is None or str(series_id).strip() == "":
|
if series_id is None or str(series_id).strip() == "":
|
||||||
return None
|
return None
|
||||||
url = f"{self.api_base_url}/series/{series_id}"
|
seen: set[str] = set()
|
||||||
resp = self._session.get(url, timeout=self.request_timeout)
|
current = series_id
|
||||||
resp.raise_for_status()
|
while str(current) not in seen:
|
||||||
data = resp.json().get("data")
|
seen.add(str(current))
|
||||||
if data and data.get("state") == "merged" and data.get("merged_with"):
|
url = f"{self.api_base_url}/series/{current}"
|
||||||
return self.fetch_series(data["merged_with"])
|
resp = self._session.get(url, timeout=self.request_timeout)
|
||||||
return data
|
resp.raise_for_status()
|
||||||
|
data = resp.json().get("data")
|
||||||
|
if data and data.get("state") == "merged" and data.get("merged_with"):
|
||||||
|
current = data["merged_with"]
|
||||||
|
continue
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Resolve title -> MangaBaka series (caches the match)
|
# Resolve title -> MangaBaka series (caches the match)
|
||||||
@@ -316,14 +321,12 @@ class LightNovelMetadataBuilder:
|
|||||||
# text-only novels).
|
# text-only novels).
|
||||||
cover_artists = list(md.get("artists") or [])
|
cover_artists = list(md.get("artists") or [])
|
||||||
|
|
||||||
# Publisher: prefer English licence, else original
|
# Publisher: prefer English licence, else original. When both
|
||||||
publishers = self._publishers_by_type(md, "English") \
|
# exist, the original publisher becomes the imprint.
|
||||||
or self._publishers_by_type(md, "Original")
|
english_pubs = self._publishers_by_type(md, "English")
|
||||||
imprint = None
|
original_pubs = self._publishers_by_type(md, "Original")
|
||||||
if self._publishers_by_type(md, "English") and \
|
publishers = english_pubs or original_pubs
|
||||||
self._publishers_by_type(md, "Original"):
|
imprint = original_pubs[0] if english_pubs and original_pubs else None
|
||||||
imprint = self._publishers_by_type(md, "Original")[0] if \
|
|
||||||
self._publishers_by_type(md, "Original") else None
|
|
||||||
|
|
||||||
# Release year
|
# Release year
|
||||||
release_year = None
|
release_year = None
|
||||||
|
|||||||
@@ -71,10 +71,9 @@ class LightNovelOrchestrator:
|
|||||||
)
|
)
|
||||||
self._series_updater = KavitaSeriesUpdater(self._client)
|
self._series_updater = KavitaSeriesUpdater(self._client)
|
||||||
self._person_updater = KavitaPersonUpdater(
|
self._person_updater = KavitaPersonUpdater(
|
||||||
kavita_url, kavita_api_key,
|
self._client,
|
||||||
mal_resolver=self._mal,
|
mal_resolver=self._mal,
|
||||||
al_resolver=self._al,
|
al_resolver=self._al,
|
||||||
request_timeout=request_timeout,
|
|
||||||
)
|
)
|
||||||
self._relation_sync = RelationshipSync(
|
self._relation_sync = RelationshipSync(
|
||||||
self._client, matches_cache, builder=self._builder)
|
self._client, matches_cache, builder=self._builder)
|
||||||
|
|||||||
+6
-13
@@ -30,12 +30,12 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from MediaResolver import MediaResolver
|
from MediaResolver import MediaResolver
|
||||||
|
from TextUtils import best_similarity
|
||||||
|
|
||||||
|
|
||||||
class MALResolver(MediaResolver):
|
class MALResolver(MediaResolver):
|
||||||
@@ -404,19 +404,12 @@ def _clean_mal_name(name: str) -> str:
|
|||||||
def _score_title(query: str, entry: dict) -> float:
|
def _score_title(query: str, entry: dict) -> float:
|
||||||
"""Returns the best title-similarity score for a Jikan manga entry."""
|
"""Returns the best title-similarity score for a Jikan manga entry."""
|
||||||
candidates = [
|
candidates = [
|
||||||
entry.get("title") or "",
|
entry.get("title"),
|
||||||
entry.get("title_english") or "",
|
entry.get("title_english"),
|
||||||
entry.get("title_japanese") or "",
|
entry.get("title_japanese"),
|
||||||
]
|
]
|
||||||
for alt in (entry.get("titles") or []):
|
candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
|
||||||
candidates.append(alt.get("title") or "")
|
return best_similarity(query, candidates)
|
||||||
best = 0.0
|
|
||||||
q = query.lower()
|
|
||||||
for t in candidates:
|
|
||||||
if t:
|
|
||||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
|
||||||
best = max(best, ratio)
|
|
||||||
return best
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
|
|||||||
+12
-16
@@ -36,6 +36,14 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _set_int(entry: dict, key: str, value) -> None:
|
||||||
|
"""Sets entry[key] = int(value); ignores values that don't coerce."""
|
||||||
|
try:
|
||||||
|
entry[key] = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MatchesCache:
|
class MatchesCache:
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self._path = Path(path)
|
self._path = Path(path)
|
||||||
@@ -100,25 +108,13 @@ class MatchesCache:
|
|||||||
if image_url is not None:
|
if image_url is not None:
|
||||||
entry["imageUrl"] = image_url
|
entry["imageUrl"] = image_url
|
||||||
if kavita_series_id is not None:
|
if kavita_series_id is not None:
|
||||||
try:
|
_set_int(entry, "kavitaSeriesId", kavita_series_id)
|
||||||
entry["kavitaSeriesId"] = int(kavita_series_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
if library_id is not None:
|
if library_id is not None:
|
||||||
try:
|
_set_int(entry, "libraryId", library_id)
|
||||||
entry["libraryId"] = int(library_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
if first_match_time is not None:
|
if first_match_time is not None:
|
||||||
try:
|
_set_int(entry, "firstMatchTime", first_match_time)
|
||||||
entry["firstMatchTime"] = int(first_match_time)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
if last_update_time is not None:
|
if last_update_time is not None:
|
||||||
try:
|
_set_int(entry, "lastUpdateTime", last_update_time)
|
||||||
entry["lastUpdateTime"] = int(last_update_time)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
self._save_unlocked()
|
self._save_unlocked()
|
||||||
return dict(entry)
|
return dict(entry)
|
||||||
|
|
||||||
|
|||||||
+16
-9
@@ -39,6 +39,19 @@ from MatchesCache import MatchesCache
|
|||||||
from LightNovelMetadataBuilder import pick_thumbnail_url
|
from LightNovelMetadataBuilder import pick_thumbnail_url
|
||||||
|
|
||||||
|
|
||||||
|
def _int_list(values) -> list[int]:
|
||||||
|
"""Coerces an iterable of mixed values to a list of positive ints."""
|
||||||
|
out: list[int] = []
|
||||||
|
for v in (values or []):
|
||||||
|
try:
|
||||||
|
n = int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if n > 0:
|
||||||
|
out.append(n)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
_INDEX_HTML = r"""<!doctype html>
|
_INDEX_HTML = r"""<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
@@ -628,7 +641,7 @@ class MatchesWebApp:
|
|||||||
@app.get("/api/matches")
|
@app.get("/api/matches")
|
||||||
def api_list():
|
def api_list():
|
||||||
raw = request.args.get("libraryIds") or ""
|
raw = request.args.get("libraryIds") or ""
|
||||||
lib_ids = [int(p) for p in raw.split(",") if p.strip().isdigit()]
|
lib_ids = _int_list(raw.split(","))
|
||||||
if lib_ids:
|
if lib_ids:
|
||||||
return jsonify(cache.all_in_libraries(lib_ids))
|
return jsonify(cache.all_in_libraries(lib_ids))
|
||||||
return jsonify(cache.all())
|
return jsonify(cache.all())
|
||||||
@@ -680,8 +693,7 @@ class MatchesWebApp:
|
|||||||
if self._orchestrator is None:
|
if self._orchestrator is None:
|
||||||
return Response("no orchestrator configured", status=503)
|
return Response("no orchestrator configured", status=503)
|
||||||
body = request.get_json(silent=True) or {}
|
body = request.get_json(silent=True) or {}
|
||||||
library_ids = [int(i) for i in (body.get("libraryIds") or [])
|
library_ids = _int_list(body.get("libraryIds"))
|
||||||
if str(i).strip().lstrip("-").isdigit()]
|
|
||||||
if not library_ids:
|
if not library_ids:
|
||||||
return Response("libraryIds required", status=400)
|
return Response("libraryIds required", status=400)
|
||||||
|
|
||||||
@@ -720,12 +732,7 @@ class MatchesWebApp:
|
|||||||
return Response("no orchestrator configured", status=503)
|
return Response("no orchestrator configured", status=503)
|
||||||
body = request.get_json(silent=True) or {}
|
body = request.get_json(silent=True) or {}
|
||||||
raw = body.get("libraryIds")
|
raw = body.get("libraryIds")
|
||||||
library_ids: "list[int] | None"
|
library_ids = None if raw is None else _int_list(raw)
|
||||||
if raw is None:
|
|
||||||
library_ids = None
|
|
||||||
else:
|
|
||||||
library_ids = [int(i) for i in raw
|
|
||||||
if str(i).strip().lstrip("-").isdigit()]
|
|
||||||
|
|
||||||
label = ("update all (every library)" if library_ids is None
|
label = ("update all (every library)" if library_ids is None
|
||||||
else f"update all in libraries {library_ids}")
|
else f"update all in libraries {library_ids}")
|
||||||
|
|||||||
@@ -0,0 +1,45 @@
|
|||||||
|
"""
|
||||||
|
text_utils.py
|
||||||
|
=============
|
||||||
|
|
||||||
|
Small text helpers shared across modules:
|
||||||
|
|
||||||
|
* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
|
||||||
|
breaks into compact HTML (used for Kavita summary / description fields,
|
||||||
|
which must not contain raw newlines).
|
||||||
|
* ``best_similarity`` — best difflib ratio between a query string and a
|
||||||
|
list of candidate strings (used for title / person-name matching).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
import re
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
def paragraphs_to_html(text: str) -> str:
|
||||||
|
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
parts: list[str] = []
|
||||||
|
for para in re.split(r"\n{2,}", text.strip()):
|
||||||
|
para = para.strip()
|
||||||
|
if para:
|
||||||
|
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def best_similarity(query: str, candidates: Iterable[str]) -> float:
|
||||||
|
"""
|
||||||
|
Returns the best case-insensitive difflib similarity ratio (0..1)
|
||||||
|
between `query` and any non-empty candidate.
|
||||||
|
"""
|
||||||
|
q = (query or "").lower()
|
||||||
|
best = 0.0
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate:
|
||||||
|
ratio = difflib.SequenceMatcher(
|
||||||
|
None, q, str(candidate).lower()).ratio()
|
||||||
|
best = max(best, ratio)
|
||||||
|
return best
|
||||||
Reference in New Issue
Block a user