From 216771f7091ebdc86df6a16d93bfdf0f5548f885 Mon Sep 17 00:00:00 2001 From: JohannesBOT Date: Sun, 14 Jun 2026 10:47:47 +0200 Subject: [PATCH] merged ln metadata into manga mover --- .env.example | 22 + .gitea/workflows/build.yml | 24 +- .gitea/workflows/release.yml | 18 +- Dockerfile | 26 +- docker-compose.prod.yml | 33 +- main_ln.py | 129 ++++ main.py => main_manga.py | 46 +- requirements.txt | 1 + src/AniListResolver.py | 49 +- src/CoverCache.py | 18 +- src/KavitaClient.py | 272 +++++++ src/KavitaPersonUpdater.py | 242 ++----- src/MALResolver.py | 32 +- src/TextUtils.py | 72 ++ src/ln/KavitaSeriesUpdater.py | 313 ++++++++ src/ln/LightNovelMetadataBuilder.py | 571 +++++++++++++++ src/ln/LightNovelOrchestrator.py | 260 +++++++ src/ln/MatchesCache.py | 187 +++++ src/ln/MatchesWebApp.py | 764 ++++++++++++++++++++ src/ln/RelationshipSync.py | 174 +++++ src/{ => manga}/ComicInfoBuilder.py | 38 +- src/{ => manga}/KavitaVolumeCoverUpdater.py | 10 +- src/{ => manga}/MangadexVolumeResolver.py | 0 src/{ => manga}/MatchesCache.py | 0 src/{ => manga}/MatchesWebApp.py | 0 src/{ => manga}/SuwayomiFolderWatcher.py | 0 src/{ => manga}/SuwayomiMover.py | 19 +- 27 files changed, 3040 insertions(+), 280 deletions(-) create mode 100644 .env.example create mode 100644 main_ln.py rename main.py => main_manga.py (80%) create mode 100644 src/KavitaClient.py create mode 100644 src/TextUtils.py create mode 100644 src/ln/KavitaSeriesUpdater.py create mode 100644 src/ln/LightNovelMetadataBuilder.py create mode 100644 src/ln/LightNovelOrchestrator.py create mode 100644 src/ln/MatchesCache.py create mode 100644 src/ln/MatchesWebApp.py create mode 100644 src/ln/RelationshipSync.py rename src/{ => manga}/ComicInfoBuilder.py (97%) rename src/{ => manga}/KavitaVolumeCoverUpdater.py (98%) rename src/{ => manga}/MangadexVolumeResolver.py (100%) rename src/{ => manga}/MatchesCache.py (100%) rename src/{ => manga}/MatchesWebApp.py (100%) rename src/{ => manga}/SuwayomiFolderWatcher.py (100%) rename src/{ => manga}/SuwayomiMover.py (98%) diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e3b464f --- /dev/null +++ b/.env.example @@ -0,0 +1,22 @@ +# Shared +KAVITA_URL=http://192.168.1.100:5000 +KAVITA_API_KEY=your-api-key-here +LANGUAGE=en +TZ=Europe/Berlin + +# Manga container (manga-mover-and-metadata-collector) +HOST_SUWAYOMI_PATH=/path/to/suwayomi/downloads +HOST_KAVITA_PATH=/path/to/kavita/library +HOST_MANGA_CONFIG_PATH=/path/to/manga-config +MANGA_WEB_PORT=8080 +SETTLE_SECONDS=600 +DELETE_SOURCE=true +UPDATER_ENABLED=true +UPDATER_SCHEDULE=0 19 * * 1,4 +COVER_CACHE_PATH=/config/covers + +# Light-novel container (kavita-lightnovel-metadata-fetcher) +HOST_LN_CONFIG_PATH=/path/to/ln-config +LN_WEB_PORT=8081 +LN_LIBRARY_IDS=3,5 + diff --git a/.gitea/workflows/build.yml b/.gitea/workflows/build.yml index 845e445..b2f2719 100644 --- a/.gitea/workflows/build.yml +++ b/.gitea/workflows/build.yml @@ -5,6 +5,11 @@ on: branches: - master +env: + REGISTRY: gitea.johannesbot.de/johannesbot + MANGA_IMAGE: manga-mover-and-metadata-collector + LN_IMAGE: kavita-lightnovel-metadata-fetcher + jobs: build: runs-on: ubuntu-latest @@ -17,11 +22,16 @@ jobs: echo "${{ secrets.REGISTRY_PASSWORD }}" | \ docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin - - name: Build Image - run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest . + - name: Build Manga Image + run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest . - - name: Push Image - run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest + - name: Build LN Image + run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest . + + - name: Push Images + run: | + docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest + docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest deploy: needs: build @@ -37,7 +47,7 @@ jobs: username: ${{ secrets.SSH_USER }} password: ${{ secrets.SSH_PASSWORD }} port: ${{ secrets.SSH_PORT || 22 }} - script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector + script: mkdir -p /home/${{ secrets.SSH_USER }}/kavita-metadata-collector - name: Copy docker-compose via SCP uses: appleboy/scp-action@v0.1.7 @@ -47,7 +57,7 @@ jobs: password: ${{ secrets.SSH_PASSWORD }} port: ${{ secrets.SSH_PORT || 22 }} source: "docker-compose.prod.yml" - target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector" + target: "/home/${{ secrets.SSH_USER }}/kavita-metadata-collector" - name: Deploy via SSH uses: appleboy/ssh-action@v1.0.3 @@ -57,7 +67,7 @@ jobs: password: ${{ secrets.SSH_PASSWORD }} port: ${{ secrets.SSH_PORT || 22 }} script: | - cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector + cd /home/${{ secrets.SSH_USER }}/kavita-metadata-collector mv docker-compose.prod.yml docker-compose.yml echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin sudo docker compose pull diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml index dc13bd5..6147cb3 100644 --- a/.gitea/workflows/release.yml +++ b/.gitea/workflows/release.yml @@ -5,6 +5,11 @@ on: tags: - 'v*' +env: + REGISTRY: gitea.johannesbot.de/johannesbot + MANGA_IMAGE: manga-mover-and-metadata-collector + LN_IMAGE: kavita-lightnovel-metadata-fetcher + jobs: build: runs-on: ubuntu-latest @@ -21,8 +26,13 @@ jobs: id: tag run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT" - - name: Build Image - run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }} . + - name: Build Manga Image + run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} . - - name: Push Image - run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }} \ No newline at end of file + - name: Build LN Image + run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} . + + - name: Push Images + run: | + docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} + docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} diff --git a/Dockerfile b/Dockerfile index 496600a..a496332 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,18 @@ +# One Dockerfile, two images: the build arg APP selects the entry point. +# +# docker build --build-arg APP=manga -t .../manga-mover-and-metadata-collector . +# docker build --build-arg APP=ln -t .../kavita-lightnovel-metadata-fetcher . +# +# Both variants share src/; the variant-specific code lives in +# src/manga/ resp. src/ln/ and is selected by the entry point. + FROM python:3.12-slim +ARG APP=manga + WORKDIR /app -# System deps for Pillow (image dimensions); kept minimal. +# System deps for Pillow (image dimensions, manga variant); kept minimal. RUN apt-get update \ && apt-get install -y --no-install-recommends \ libjpeg62-turbo \ @@ -11,15 +21,17 @@ RUN apt-get update \ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY src/ /app/src/ -COPY main.py /app/main.py +COPY src/ /app/src/ +COPY main_manga.py main_ln.py /app/ ENV PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 + PYTHONDONTWRITEBYTECODE=1 \ + APP_VARIANT=${APP} -# Mount points used by main.py defaults -VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"] +# /config is used by both variants; the manga variant additionally mounts +# /mnt/suwayomi and /mnt/kavita (see docker-compose.prod.yml). +VOLUME ["/config"] EXPOSE 8080 -CMD ["python", "/app/main.py"] +CMD python /app/main_${APP_VARIANT}.py diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 9e23fd7..bde59bb 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -1,5 +1,8 @@ services: - manga-mover: + # ------------------------------------------------------------------ + # Manga: Suwayomi -> Kavita mover + metadata enrichment + # ------------------------------------------------------------------ + manga-mover-and-metadata-collector: image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest container_name: manga-mover-and-metadata-collector restart: unless-stopped @@ -9,21 +12,39 @@ services: LANGUAGE: "${LANGUAGE:-en}" SETTLE_SECONDS: "${SETTLE_SECONDS:-600}" DELETE_SOURCE: "${DELETE_SOURCE:-true}" - MATCH_PATH: "${MATCH_PATH:-/config/matches.json}" - WEB_PORT: "${WEB_PORT:-8080}" + MATCH_PATH: "/config/matches.json" # Volume/cover back-fill updater UPDATER_ENABLED: "${UPDATER_ENABLED:-true}" # Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday # (local time, see TZ) UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}" - UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}" + UPDATER_LOG: "/config/volume_updater.log" # Persistent cover cache (empty = temp dir, deleted on container stop) COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}" # Timezone for the cron schedule — without this 19:00 means 19:00 UTC TZ: "${TZ:-Europe/Berlin}" ports: - - "${WEB_PORT:-8080}:${WEB_PORT:-8080}" + - "${MANGA_WEB_PORT:-8080}:8080" volumes: - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi" - "${HOST_KAVITA_PATH}:/mnt/kavita" - - "${HOST_CONFIG_PATH}:/config" + - "${HOST_MANGA_CONFIG_PATH}:/config" + + # ------------------------------------------------------------------ + # Light novels: Kavita metadata fetcher (HTTP only, no file mover) + # ------------------------------------------------------------------ + kavita-lightnovel-metadata-fetcher: + image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest + container_name: kavita-lightnovel-metadata-fetcher + restart: unless-stopped + environment: + KAVITA_URL: "${KAVITA_URL}" + KAVITA_API_KEY: "${KAVITA_API_KEY}" + LIBRARY_IDS: "${LN_LIBRARY_IDS}" + LANGUAGE: "${LANGUAGE:-en}" + MATCH_PATH: "/config/matches.json" + TZ: "${TZ:-Europe/Berlin}" + ports: + - "${LN_WEB_PORT:-8081}:8080" + volumes: + - "${HOST_LN_CONFIG_PATH}:/config" diff --git a/main_ln.py b/main_ln.py new file mode 100644 index 0000000..792a6af --- /dev/null +++ b/main_ln.py @@ -0,0 +1,129 @@ +""" +main_ln.py +========== + +Container entry point for the **light-novel** variant (Kavita metadata +fetcher). The manga variant has its own entry point (main_manga.py); +both share the modules in src/ and add their variant-specific code from +src/ln/ resp. src/manga/. + +Reads configuration from environment variables, starts the orchestrator +and exposes the Flask WebApp on WEB_HOST:WEB_PORT. Everything happens +through HTTP — there is no folder watcher and no file mover (Kavita is +the source of truth for the library content; this service only writes +metadata back to it). + +Environment variables +--------------------- + Required: + KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000 + KAVITA_API_KEY Kavita API key (Settings -> User -> API key) + + Optional: + LIBRARY_IDS comma-separated default library ids (e.g. "3,5"). + Empty = user picks in the WebUI each time. + LANGUAGE default "en" + REQUEST_TIMEOUT default 30 + MATCH_PATH default /config/matches.json + WEB_PORT default 8080 + WEB_HOST default 0.0.0.0 +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +# Shared code in src/, LN-specific code in src/ln/. Modules are imported +# by their plain names so src-internal imports resolve to the same module +# objects (a `src.X` import would load everything twice). +_BASE = Path(__file__).resolve().parent +sys.path.insert(0, str(_BASE / "src")) +sys.path.insert(0, str(_BASE / "src" / "ln")) + +from MatchesCache import MatchesCache # noqa: E402 +from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402 +from MatchesWebApp import MatchesWebApp # noqa: E402 + + +def _env_str(name: str, default: "str | None" = None, + required: bool = False) -> "str | None": + value = os.environ.get(name, default) + if required and not value: + print(f"[main] missing required env var: {name}", flush=True) + sys.exit(2) + return value + + +def _env_int(name: str, default: int) -> int: + raw = os.environ.get(name) + if raw is None or raw == "": + return default + try: + return int(raw) + except ValueError: + print(f"[main] {name}={raw!r} is not a valid integer; " + f"falling back to {default}", flush=True) + return default + + +def _env_int_list(name: str) -> list[int]: + raw = os.environ.get(name) or "" + out: list[int] = [] + for part in raw.split(","): + part = part.strip() + if not part: + continue + try: + out.append(int(part)) + except ValueError: + print(f"[main] {name}: ignoring non-integer value {part!r}", + flush=True) + return out + + +def main() -> int: + kavita_url = _env_str("KAVITA_URL", required=True) + kavita_api_key = _env_str("KAVITA_API_KEY", required=True) + language = _env_str("LANGUAGE", "en") or "en" + request_timeout = _env_int("REQUEST_TIMEOUT", 30) + match_path = _env_str("MATCH_PATH", "/config/matches.json") + web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" + web_port = _env_int("WEB_PORT", 8080) + library_ids = _env_int_list("LIBRARY_IDS") + + print(f"[main] kavita url = {kavita_url}", flush=True) + print(f"[main] language = {language}", flush=True) + print(f"[main] match path = {match_path}", flush=True) + print(f"[main] libraries = {library_ids or '(picked in WebUI)'}", + flush=True) + print(f"[main] web = {web_host}:{web_port}", flush=True) + + cache = MatchesCache(match_path) + orchestrator = LightNovelOrchestrator( + kavita_url=kavita_url, + kavita_api_key=kavita_api_key, + matches_cache=cache, + language=language, + request_timeout=request_timeout, + ) + + app = MatchesWebApp( + cache, orchestrator=orchestrator, + default_library_ids=library_ids, + host=web_host, port=web_port, + ) + app.start() + app.wait() + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/main.py b/main_manga.py similarity index 80% rename from main.py rename to main_manga.py index d996b39..a055700 100644 --- a/main.py +++ b/main_manga.py @@ -1,10 +1,11 @@ """ -main.py -======= +main_manga.py +============= -Container entry point. Watches the mounted Suwayomi download directory -and, after a quiet period, triggers SuwayomiMover (which also runs the -Kavita person sync for every processed series). +Container entry point for the **manga** variant (Suwayomi -> Kavita mover +plus metadata enrichment). The light-novel variant has its own entry +point (main_ln.py); both share the modules in src/ and add their +variant-specific code from src/manga/ resp. src/ln/. Mount points (Docker) --------------------- @@ -15,7 +16,7 @@ Environment variables --------------------- Required: KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000 - KAVITA_API_KEY Kavita API key (Settings → User → API key) + KAVITA_API_KEY Kavita API key (Settings -> User -> API key) Optional: SUWAYOMI_PATH default /mnt/suwayomi @@ -42,14 +43,24 @@ import os import sys from pathlib import Path -# Make src/ importable when running as `python main.py`. -sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass -from src.SuwayomiMover import SuwayomiMover # noqa: E402 -from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402 -from src.MatchesCache import MatchesCache # noqa: E402 -from src.MatchesWebApp import MatchesWebApp # noqa: E402 -from src.KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 +# Shared code in src/, manga-specific code in src/manga/. Modules are +# imported by their plain names so src-internal imports resolve to the +# same module objects (a `src.X` import would load everything twice). +_BASE = Path(__file__).resolve().parent +sys.path.insert(0, str(_BASE / "src")) +sys.path.insert(0, str(_BASE / "src" / "manga")) + +from SuwayomiMover import SuwayomiMover # noqa: E402 +from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401 +from MatchesCache import MatchesCache # noqa: E402 +from MatchesWebApp import MatchesWebApp # noqa: E402 +from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402 def _env_str(name: str, default: "str | None" = None, @@ -81,7 +92,7 @@ def _env_bool(name: str, default: bool) -> bool: def main() -> int: - suwayomi_path = _env_str("SUWAYOMI_PATH", r"/mnt/suwayomi") + suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi") kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita") kavita_url = _env_str("KAVITA_URL", "http://kavita:5000") kavita_api_key = _env_str("KAVITA_API_KEY", "") @@ -142,13 +153,6 @@ def main() -> int: print(f"[main] UPDATER_SCHEDULE invalid ({exc}); " f"volume/cover updater DISABLED", flush=True) - # def shutdown(signum, _frame): - # print(f"[main] received signal {signum}", flush=True) - # watcher.stop() - # - # signal.signal(signal.SIGTERM, shutdown) - # signal.signal(signal.SIGINT, shutdown) - # # watcher.start() # watcher.wait() # blocks until stop() is called via a signal web_app.wait() # keep process alive while the watcher is disabled diff --git a/requirements.txt b/requirements.txt index 01b6bf7..30dcf60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests>=2.31 Pillow>=10.0 watchdog>=4.0 Flask>=3.0 +python-dotenv>=1.0 diff --git a/src/AniListResolver.py b/src/AniListResolver.py index 39df248..5a54002 100644 --- a/src/AniListResolver.py +++ b/src/AniListResolver.py @@ -32,27 +32,35 @@ Dependencies from __future__ import annotations import datetime -import difflib import time import requests from MediaResolver import MediaResolver +from TextUtils import best_similarity # -------------------------------------------------------------------------- # GraphQL query strings # -------------------------------------------------------------------------- -_SEARCH_MANGA = """ +# AniList models both manga and light novels as type MANGA; the format +# clause decides which of the two a search returns. The placeholder is +# substituted at construction time (see `media_format`). +_SEARCH_MANGA_TEMPLATE = """ query ($search: String) { Page(page: 1, perPage: 5) { - media(search: $search, type: MANGA, format_not_in: [NOVEL]) { + media(search: $search, type: MANGA, __FORMAT_CLAUSE__) { id title { romaji english native } siteUrl } } } """ +_FORMAT_CLAUSES = { + "manga": "format_not_in: [NOVEL]", + "novel": "format_in: [NOVEL]", +} + _MANGA_STATS = """ query ($id: Int) { Media(id: $id, type: MANGA) { @@ -131,10 +139,24 @@ class AniListResolver(MediaResolver): cls._instance._initialized = False return cls._instance - def __init__(self, *, request_timeout: int = 30): + def __init__(self, *, request_timeout: int = 30, + media_format: str = "manga"): + """ + media_format : "manga" (excludes novels) or "novel" (novels only). + Only the FIRST construction in the process sets it + (singleton); construct the resolver with the correct + format in the entry point / orchestrator. + """ if self._initialized: return + if media_format not in _FORMAT_CLAUSES: + raise ValueError(f"media_format must be one of " + f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}") + self.media_format = media_format + self._search_query = _SEARCH_MANGA_TEMPLATE.replace( + "__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format]) + self.request_timeout = request_timeout self._session = requests.Session() @@ -178,7 +200,7 @@ class AniListResolver(MediaResolver): return self._id_cache[key] try: - data = self._gql(_SEARCH_MANGA, {"search": title}) + data = self._gql(self._search_query, {"search": title}) results = ((data.get("data") or {}) .get("Page", {}) .get("media") or []) @@ -469,18 +491,11 @@ class AniListResolver(MediaResolver): def _score_title(query: str, entry: dict) -> float: """Returns the best title-similarity score for an AniList media entry.""" title_obj = entry.get("title") or {} - candidates = [ - title_obj.get("romaji") or "", - title_obj.get("english") or "", - title_obj.get("native") or "", - ] - best = 0.0 - q = query.lower() - for t in candidates: - if t: - ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio() - best = max(best, ratio) - return best + return best_similarity(query, ( + title_obj.get("romaji"), + title_obj.get("english"), + title_obj.get("native"), + )) # -------------------------------------------------------------------------- diff --git a/src/CoverCache.py b/src/CoverCache.py index ceececd..d1517ec 100644 --- a/src/CoverCache.py +++ b/src/CoverCache.py @@ -41,6 +41,21 @@ from pathlib import Path import requests +_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} + + +def _guess_extension(url: str, content_type: str) -> str: + """Derives an image file extension from a URL or HTTP Content-Type.""" + url_ext = Path(url.split("?")[0]).suffix.lower() + if url_ext in _IMAGE_EXTS: + return url_ext + ct = (content_type or "").lower() + if "png" in ct: return ".png" + if "webp" in ct: return ".webp" + if "gif" in ct: return ".gif" + return ".jpg" + + class CoverCache: """ URL-keyed image cache on disk. @@ -121,9 +136,6 @@ class CoverCache: except requests.RequestException: return None - # Local import avoids a circular module dependency: - # ComicInfoBuilder imports CoverCache at module level. - from ComicInfoBuilder import _guess_extension ext = _guess_extension(url, resp.headers.get("Content-Type", "")) target = self._dir / f"{self._key(url)}{ext}" diff --git a/src/KavitaClient.py b/src/KavitaClient.py new file mode 100644 index 0000000..6a980f6 --- /dev/null +++ b/src/KavitaClient.py @@ -0,0 +1,272 @@ +""" +kavita_client.py +================ + +Thin HTTP client for the Kavita server REST API (v0.9.x). + +Authenticates via the ``x-api-key`` header. All series / library / +collection / metadata reads and writes used by the light-novel updater +go through this single client so request shaping (paging, content types, +timeouts, retries) is consistent. + +The class is intentionally state-light: no caching layer, just one +``requests.Session``. Higher-level diff / update logic lives in +KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync. +""" + +from __future__ import annotations + +import base64 +from typing import Iterable + +import requests + + +class KavitaClient: + def __init__(self, base_url: str, api_key: str, *, + request_timeout: int = 30): + self._base = base_url.rstrip("/") + self._timeout = request_timeout + + # API session: sends + receives JSON. + self._session = requests.Session() + self._session.headers.update({ + "x-api-key": api_key, + "Accept": "application/json", + "Content-Type": "application/json", + }) + + # Plain session for downloading external images (covers). Must NOT + # carry the API headers — some CDNs refuse to return image bytes + # when the client sends Accept: application/json. + self._image_session = requests.Session() + self._image_session.headers.update({ + "User-Agent": "KavitaLightNovelUpdater/1.0", + }) + + # ------------------------------------------------------------------ + # Libraries + # ------------------------------------------------------------------ + def list_libraries(self) -> list[dict]: + """Returns all libraries the authenticated user can access.""" + r = self._session.get(f"{self._base}/api/Library/libraries", + timeout=self._timeout) + r.raise_for_status() + return r.json() or [] + + # ------------------------------------------------------------------ + # Series + # ------------------------------------------------------------------ + def list_series_in_library(self, library_id: int, *, + page_size: int = 200) -> list[dict]: + """ + Returns all SeriesDto entries in the given library. + + Uses POST /api/Series/all-v2 with a FilterV2 that scopes by + library id. Pages through until an empty page is returned. + """ + results: list[dict] = [] + page = 1 + while True: + body = { + "statements": [ + { + "comparison": 0, # Equal + "field": 19, # Libraries field id (Kavita v0.9.x) + "value": str(library_id), + } + ], + "combination": 1, # And + "sortOptions": {"isAscending": True, "sortField": 1}, + "limitTo": 0, + } + r = self._session.post( + f"{self._base}/api/Series/all-v2", + params={"PageNumber": page, "PageSize": page_size}, + json=body, timeout=self._timeout) + r.raise_for_status() + chunk = r.json() or [] + if not chunk: + break + results.extend(chunk) + if len(chunk) < page_size: + break + page += 1 + return results + + def get_series(self, series_id: int) -> dict: + """Returns the SeriesDto for the given series id.""" + r = self._session.get(f"{self._base}/api/Series/{series_id}", + timeout=self._timeout) + r.raise_for_status() + return r.json() or {} + + def update_series(self, series: dict) -> None: + """Updates the Series-level data (name, sortName, malId, …).""" + r = self._session.post(f"{self._base}/api/Series/update", + json=series, timeout=self._timeout) + r.raise_for_status() + + # ------------------------------------------------------------------ + # Series metadata + # ------------------------------------------------------------------ + def get_series_metadata(self, series_id: int) -> dict: + """Returns the SeriesMetadataDto for a series.""" + r = self._session.get( + f"{self._base}/api/Series/metadata", + params={"seriesId": series_id}, timeout=self._timeout) + r.raise_for_status() + return r.json() or {} + + def update_series_metadata(self, metadata: dict) -> None: + """ + Writes a SeriesMetadataDto back to Kavita. + + Kavita expects the payload wrapped: {seriesMetadata: {...}}. + """ + r = self._session.post( + f"{self._base}/api/Series/metadata", + json={"seriesMetadata": metadata}, + timeout=self._timeout) + r.raise_for_status() + + # ------------------------------------------------------------------ + # Related series + # ------------------------------------------------------------------ + def get_related(self, series_id: int) -> dict: + """Returns all related series grouped by relation type.""" + r = self._session.get( + f"{self._base}/api/Series/all-related", + params={"seriesId": series_id}, timeout=self._timeout) + r.raise_for_status() + return r.json() or {} + + def update_related(self, payload: dict) -> None: + """ + Sets the related-series relationships for a series. + + Payload shape (UpdateRelatedSeriesDto): + {seriesId, prequels, sequels, sideStories, spinOffs, + adaptations, characters, contains, others, + alternativeSettings, alternativeVersions, doujinshis, + editions, annuals} + Each *_ids list contains target series ids (ints). + """ + r = self._session.post( + f"{self._base}/api/Series/update-related", + json=payload, timeout=self._timeout) + r.raise_for_status() + + # ------------------------------------------------------------------ + # Collections + # ------------------------------------------------------------------ + def list_collections(self) -> list[dict]: + """Returns all collection tags visible to the authenticated user.""" + r = self._session.get( + f"{self._base}/api/Collection", + params={"ownedOnly": "false", "sortByLastModified": "false"}, + timeout=self._timeout) + r.raise_for_status() + return r.json() or [] + + def add_series_to_collection(self, *, collection_id: int, + title: str, + series_ids: Iterable[int]) -> dict: + """ + Adds (or creates) a collection and attaches series to it. + + Pass collection_id=0 to create a new collection named `title`. + For an existing collection set collection_id to its id (title is + still required by the API but acts as no-op when the id matches). + """ + body = { + "collectionTagId": int(collection_id), + "collectionTagTitle": title, + "seriesIds": [int(s) for s in series_ids], + } + r = self._session.post( + f"{self._base}/api/Collection/update-for-series", + json=body, timeout=self._timeout) + r.raise_for_status() + try: + return r.json() or {} + except ValueError: + return {} + + # ------------------------------------------------------------------ + # Persons + # ------------------------------------------------------------------ + def search_persons(self, name: str) -> list[dict]: + """Returns PersonDto entries matching `name` (Kavita's own search).""" + r = self._session.get( + f"{self._base}/api/Person/search", + params={"queryString": name}, timeout=self._timeout) + r.raise_for_status() + return r.json() or [] + + def update_person(self, payload: dict) -> None: + """Writes a person record (malId, aniListId, description, …).""" + r = self._session.post(f"{self._base}/api/Person/update", + json=payload, timeout=self._timeout) + r.raise_for_status() + + # ------------------------------------------------------------------ + # Cover uploads + # ------------------------------------------------------------------ + def upload_series_cover(self, series_id: int, image_url: str, *, + lock: bool = False) -> None: + """Downloads an external image and uploads it as the series cover.""" + self._upload_cover("/api/Upload/series", series_id, image_url, lock) + + def upload_person_cover(self, person_id: int, image_url: str, *, + lock: bool = False) -> None: + """Downloads an external image and uploads it as a person cover.""" + self._upload_cover("/api/Upload/person", person_id, image_url, lock) + + def _upload_cover(self, endpoint: str, entity_id: int, + image_url: str, lock: bool) -> None: + """ + Shared cover-upload path. Kavita's upload endpoints accept a raw + base64 blob (no ``data:`` prefix) in the ``url`` field — a data + URI or the two-step upload-by-url flow are rejected with HTTP 400 + (verified against Kavita 0.9.0.2). + """ + img = self._image_session.get(image_url, timeout=self._timeout) + img.raise_for_status() + b64 = base64.b64encode(img.content).decode() + r = self._session.post( + f"{self._base}{endpoint}", + json={"id": entity_id, "url": b64, "lockCover": lock}, + timeout=self._timeout) + if r.status_code >= 400: + # Include the body excerpt — Kavita's upload errors carry the + # actual reason there, not in the status line. + raise requests.HTTPError( + f"{endpoint} HTTP {r.status_code}: {_short_body(r)}", + response=r) + + # ------------------------------------------------------------------ + # Generic GET helper (used by callers that need a response object) + # ------------------------------------------------------------------ + def get(self, path: str, params: "dict | None" = None) -> requests.Response: + return self._session.get(f"{self._base}{path}", + params=params, timeout=self._timeout) + + def post(self, path: str, *, + json: "dict | list | None" = None, + params: "dict | None" = None) -> requests.Response: + return self._session.post(f"{self._base}{path}", + json=json, params=params, + timeout=self._timeout) + + +def _short_body(resp: requests.Response, limit: int = 400) -> str: + """Returns the response body trimmed to `limit` chars for error messages.""" + try: + text = resp.text or "" + except Exception: + return "" + text = text.strip().replace("\n", " ").replace("\r", " ") + if len(text) > limit: + text = text[:limit] + "…" + return text or "" diff --git a/src/KavitaPersonUpdater.py b/src/KavitaPersonUpdater.py index c03bf32..ae0036b 100644 --- a/src/KavitaPersonUpdater.py +++ b/src/KavitaPersonUpdater.py @@ -15,46 +15,22 @@ the updater: an 'about' text (requires an extra Jikan request per character; only performed when update_descriptions=True). -Kavita API version ------------------- +All HTTP traffic to Kavita goes through the shared :class:`KavitaClient` +(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`). + Tested against Kavita 0.9.0.2. - -Authentication --------------- -Uses the `x-api-key` header (API key from Kavita user settings). -No JWT login is required. - -Relevant endpoints (Kavita 0.9.0.2) -------------------------------------- - GET /api/Person/search find persons by name / alias - POST /api/Person/update write metadata (malId, description, …) - POST /api/Upload/person set cover image (base64 data URI) - POST /api/Upload/upload-by-url download an external URL to temp storage - (used as an alternative upload path) - -Cover upload flow ------------------ -The image is downloaded locally, base64-encoded, and sent as a data URI -to POST /api/Upload/person. This is more reliable than the -upload-by-url → upload/person two-step because it avoids Kavita's temp -file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900). - -Dependencies ------------- - requests -> pip install requests """ from __future__ import annotations -import base64 import datetime -import difflib -import re import requests +from KavitaClient import KavitaClient from MALResolver import MALResolver from AniListResolver import AniListResolver +from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id class KavitaPersonUpdater: @@ -63,41 +39,22 @@ class KavitaPersonUpdater: Parameters ---------- - kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000" - api_key : Kavita API key (Settings → User → API key) + client : Shared KavitaClient (session, auth, cover uploads) mal_resolver : Shared MALResolver singleton (created automatically if omitted) - request_timeout : HTTP timeout in seconds for both Kavita and image requests + al_resolver : Shared AniListResolver singleton (created automatically if omitted) min_name_score : Minimum difflib similarity ratio (0–1) required to accept a Kavita person as a match for a MAL name. Default 0.80. """ - def __init__(self, kavita_base_url: str, api_key: str, *, + def __init__(self, client: KavitaClient, *, mal_resolver: "MALResolver | None" = None, al_resolver: "AniListResolver | None" = None, - request_timeout: int = 30, min_name_score: float = 0.80): - self._base = kavita_base_url.rstrip("/") - self._timeout = request_timeout + self._client = client self._min_score = min_name_score self._mal = mal_resolver or MALResolver() self._al = al_resolver or AniListResolver() - # Session used for Kavita API calls. - self._session = requests.Session() - self._session.headers.update({ - "x-api-key": api_key, - "Content-Type": "application/json", - "Accept": "application/json", - }) - - # Plain session used to download external images (MAL CDN etc.). - # Must NOT carry the Kavita API headers — Accept: application/json - # would prevent MAL CDN from returning the image bytes. - self._image_session = requests.Session() - self._image_session.headers.update({ - "User-Agent": "KavitaPersonUpdater/1.0", - }) - # Cache: normalised name -> list of PersonDto dicts (best matches first) self._person_search_cache: dict[str, list[dict]] = {} @@ -195,11 +152,28 @@ class KavitaPersonUpdater: if not name and not raw_name: continue - # Search by the cleaned (XML-safe) name first; if Kavita stores - # the legacy comma form, retry with the raw MAL name. - matches = self._find_kavita_person(name) if name else [] - if not matches and raw_name and raw_name != name: - matches = self._find_kavita_person(raw_name) + if kind == "character": + # Characters are stored under their disambiguated name + # ("Rem (MAL 118737)") — see person_name_with_id. The + # series metadata write creates the person under exactly + # this name, so only that form is searched. + search_names = [person_name_with_id( + name, mal_id=entry.get("mal_id"), + al_id=entry.get("al_id"))] + else: + # Staff: cleaned (XML-safe) name first; if Kavita stores + # the legacy comma form, retry with the raw MAL name. + search_names = [name] + if raw_name and raw_name != name: + search_names.append(raw_name) + + matches: list[dict] = [] + for search_name in search_names: + if not search_name: + continue + matches = self._find_kavita_person(search_name) + if matches: + break if not matches: result["not_found"] += 1 @@ -230,29 +204,17 @@ class KavitaPersonUpdater: return self._person_search_cache[key] try: - resp = self._session.get( - f"{self._base}/api/Person/search", - params={"queryString": name}, - timeout=self._timeout, - ) - resp.raise_for_status() - persons: list[dict] = resp.json() or [] + persons = self._client.search_persons(name) except requests.RequestException: self._person_search_cache[key] = [] return [] - def score(p: dict) -> float: - candidates = [p.get("name") or ""] - candidates += [a for a in (p.get("aliases") or []) if a] - best = 0.0 - q = key - for c in candidates: - r = difflib.SequenceMatcher(None, q, c.lower()).ratio() - best = max(best, r) - return best - - ranked = sorted(persons, key=score, reverse=True) - filtered = [p for p in ranked if score(p) >= self._min_score] + scored = [] + for p in persons: + candidates = [p.get("name")] + list(p.get("aliases") or []) + scored.append((best_similarity(key, candidates), p)) + scored.sort(key=lambda pair: pair[0], reverse=True) + filtered = [p for score, p in scored if score >= self._min_score] self._person_search_cache[key] = filtered return filtered @@ -289,6 +251,20 @@ class KavitaPersonUpdater: current_mal_id: int = person.get("malId") or 0 current_al_id: int = person.get("aniListId") or 0 + + # Collision guard: the Kavita person is already linked to a + # *different* tracker entity — same display name, different + # character/person. Never overwrite; first writer wins. + if ((mal_id and current_mal_id and current_mal_id != mal_id) + or (al_id and current_al_id and current_al_id != al_id)): + if errors is not None: + errors.append( + f"conflict: '{person_name}' (#{person_id}) is linked to " + f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} " + f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} " + f"— skipped") + return False + needs_mal_id = bool(mal_id and current_mal_id != mal_id) needs_al_id = bool(al_id and current_al_id != al_id) @@ -323,12 +299,7 @@ class KavitaPersonUpdater: "aniListId": al_id if needs_al_id else (current_al_id or None), } try: - resp = self._session.post( - f"{self._base}/api/Person/update", - json=payload, - timeout=self._timeout, - ) - resp.raise_for_status() + self._client.update_person(payload) changed = True except requests.RequestException as e: if errors is not None: @@ -350,88 +321,21 @@ class KavitaPersonUpdater: and bool(person.get("coverImage")) ) if image_url and not already_uploaded: - if self._upload_cover(person_id, image_url, - person_name=person_name, - errors=errors): + try: + self._client.upload_person_cover(person_id, image_url) changed = True + except requests.RequestException as e: + if errors is not None: + errors.append( + f"cover upload failed for #{person_id} " + f"'{person_name}' ({image_url}): {e}") return changed - # ------------------------------------------------------------------ - # Internal: cover upload - # ------------------------------------------------------------------ - def _upload_cover(self, person_id: int, image_url: str, - lock: bool = False, *, - person_name: str = "", - errors: "list | None" = None) -> bool: - """ - Uploads a cover image to a Kavita person. - - The image is downloaded with the plain (header-less) image session - and posted to `POST /api/Upload/person` as a raw base64 string in - the `url` field. - - Notes on protocol quirks discovered against Kavita 0.9.0.2: - - The two-step `upload-by-url` -> `Upload/person` flow returns - "Unable to save cover image to Person" (HTTP 400). - - A `data:image/jpeg;base64,...` data URI is rejected with the - same error. - - Only the raw base64 blob (no prefix) is accepted. - """ - label = (f"#{person_id} '{person_name}'" - if person_name else f"#{person_id}") - - # 1) Download the image with a clean session — the Kavita session's - # `Accept: application/json` header makes some CDNs refuse to - # return image bytes. - try: - img_resp = self._image_session.get(image_url, - timeout=self._timeout) - img_resp.raise_for_status() - except requests.RequestException as e: - if errors is not None: - errors.append( - f"image download failed for {label} ({image_url}): {e}") - return False - - b64 = base64.b64encode(img_resp.content).decode() - - # 2) POST the raw base64 blob. - try: - resp = self._session.post( - f"{self._base}/api/Upload/person", - json={"id": person_id, "url": b64, "lockCover": lock}, - timeout=self._timeout, - ) - if resp.status_code >= 400: - if errors is not None: - errors.append( - f"Upload/person HTTP {resp.status_code} for {label}: " - f"{_short_body(resp)}") - return False - return True - except requests.RequestException as e: - if errors is not None: - errors.append( - f"Upload/person failed for {label}: {e}") - return False - # -------------------------------------------------------------------------- # Module helpers: description builders # -------------------------------------------------------------------------- -def _plain_to_html(text: str) -> str: - """Converts plain text with paragraph breaks to compact HTML (no raw \\n).""" - if not text: - return "" - parts: list[str] = [] - for para in re.split(r"\n{2,}", text.strip()): - para = para.strip() - if para: - parts.append(f"

{para.replace(chr(10), '
')}

") - return "".join(parts) - - def _format_birthday(birthday: str) -> str: """Converts an ISO 8601 birthday string to "D Month YYYY".""" if not birthday: @@ -457,7 +361,7 @@ def _build_character_description(details: dict) -> str: parts.append(f'

Favorites: {favorites:,}

') about = (details.get("about") or "").strip() if about: - parts.append(_plain_to_html(about)) + parts.append(paragraphs_to_html(about)) return "
".join(parts) @@ -501,33 +405,19 @@ def _build_person_description(details: dict) -> str: parts.append(f'{"".join(rows)}
') about = (details.get("about") or "").strip() if about: - parts.append(_plain_to_html(about)) + parts.append(paragraphs_to_html(about)) return "
".join(parts) -# -------------------------------------------------------------------------- -# Module helper -# -------------------------------------------------------------------------- -def _short_body(resp: requests.Response, limit: int = 400) -> str: - """Returns the response body trimmed to `limit` chars for error logging.""" - try: - text = resp.text or "" - except Exception: - return "" - text = text.strip().replace("\n", " ").replace("\r", " ") - if len(text) > limit: - text = text[:limit] + "…" - return text or "" - - # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": - KAVITA_URL = "http://192.168.2.2:5000" - KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA" + import os - updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY) + client = KavitaClient(os.environ["KAVITA_URL"], + os.environ["KAVITA_API_KEY"]) + updater = KavitaPersonUpdater(client) mal = MALResolver() mal_id = mal.find_mal_id("よふかしのうた") diff --git a/src/MALResolver.py b/src/MALResolver.py index d16f824..ac1c422 100644 --- a/src/MALResolver.py +++ b/src/MALResolver.py @@ -30,12 +30,12 @@ Dependencies from __future__ import annotations import datetime -import difflib import time import requests from MediaResolver import MediaResolver +from TextUtils import best_similarity class MALResolver(MediaResolver): @@ -57,12 +57,21 @@ class MALResolver(MediaResolver): cls._instance._initialized = False return cls._instance - def __init__(self, *, request_timeout: int = 30): + def __init__(self, *, request_timeout: int = 30, + search_type: str = "manga"): + """ + search_type : Jikan `type` filter for title searches — "manga" for + the manga container, "lightnovel" for the LN container. + Only the FIRST construction in the process sets it + (singleton); construct the resolver with the correct + type in the entry point / orchestrator. + """ if self._initialized: return self.JIKAN_BASE = "https://api.jikan.moe/v4" self.request_timeout = request_timeout + self.search_type = search_type self._session = requests.Session() self._session.headers.setdefault("User-Agent", "MALResolver/1.0") @@ -106,7 +115,7 @@ class MALResolver(MediaResolver): try: data = self._get(f"{self.JIKAN_BASE}/manga", - {"q": title, "limit": 5, "type": "manga"}) + {"q": title, "limit": 5, "type": self.search_type}) results = data.get("data") or [] except requests.RequestException: return None @@ -404,19 +413,12 @@ def _clean_mal_name(name: str) -> str: def _score_title(query: str, entry: dict) -> float: """Returns the best title-similarity score for a Jikan manga entry.""" candidates = [ - entry.get("title") or "", - entry.get("title_english") or "", - entry.get("title_japanese") or "", + entry.get("title"), + entry.get("title_english"), + entry.get("title_japanese"), ] - for alt in (entry.get("titles") or []): - candidates.append(alt.get("title") or "") - best = 0.0 - q = query.lower() - for t in candidates: - if t: - ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio() - best = max(best, ratio) - return best + candidates += [alt.get("title") for alt in (entry.get("titles") or [])] + return best_similarity(query, candidates) # -------------------------------------------------------------------------- diff --git a/src/TextUtils.py b/src/TextUtils.py new file mode 100644 index 0000000..2bfe7ec --- /dev/null +++ b/src/TextUtils.py @@ -0,0 +1,72 @@ +""" +text_utils.py +============= + +Small text helpers shared across modules: + +* ``paragraphs_to_html`` — converts plain text with blank-line paragraph + breaks into compact HTML (used for Kavita summary / description fields, + which must not contain raw newlines). +* ``best_similarity`` — best difflib ratio between a query string and a + list of candidate strings (used for title / person-name matching). +""" + +from __future__ import annotations + +import difflib +import re +from typing import Iterable + + +def paragraphs_to_html(text: str) -> str: + """Converts plain text with paragraph breaks to compact HTML (no raw \\n).""" + if not text: + return "" + parts: list[str] = [] + for para in re.split(r"\n{2,}", text.strip()): + para = para.strip() + if para: + parts.append(f"

{para.replace(chr(10), '
')}

") + return "".join(parts) + + +def best_similarity(query: str, candidates: Iterable[str]) -> float: + """ + Returns the best case-insensitive difflib similarity ratio (0..1) + between `query` and any non-empty candidate. + """ + q = (query or "").lower() + best = 0.0 + for candidate in candidates: + if candidate: + ratio = difflib.SequenceMatcher( + None, q, str(candidate).lower()).ratio() + best = max(best, ratio) + return best + + +def person_name_with_id(name: str, *, + mal_id: "int | None" = None, + al_id: "int | None" = None) -> str: + """ + Disambiguates a character name with its tracker id: "Rem (MAL 118737)". + + Kavita Person records are global and keyed by name only, so two + different characters who share a name would collapse into one record. + Suffixing the tracker *character* id keeps them apart while still + sharing the record across the manga and light-novel version of the + same series (MAL/AniList character ids are per character, not per + medium). MAL is preferred; AniList ids get an "AL" marker so the two + id spaces cannot collide. Without any id the name is returned as-is. + + The format must stay in sync with the manga project so both tools + address the same Kavita person records. + """ + name = (name or "").strip() + if not name: + return name + if mal_id: + return f"{name} (MAL {mal_id})" + if al_id: + return f"{name} (AL {al_id})" + return name diff --git a/src/ln/KavitaSeriesUpdater.py b/src/ln/KavitaSeriesUpdater.py new file mode 100644 index 0000000..81d2b5e --- /dev/null +++ b/src/ln/KavitaSeriesUpdater.py @@ -0,0 +1,313 @@ +""" +kavita_series_updater.py +======================== + +Diff-based update of a single Kavita series record from a +LightNovelMetadataBuilder output dict. + +Behaviour +--------- +* Locked fields in Kavita (``*Locked`` flags) are never touched, no matter + what MangaBaka returns. +* Scalar fields (summary, releaseYear, ageRating, publicationStatus, + language, score, sortName, localizedName) are overwritten when the + newly-built value differs from the value currently stored in Kavita. +* List fields (genres, tags, characters, writers, coverArtists, + publishers, imprints) are diff-merged: a name appearing in the new + set but not in the current one is added (id=0 so Kavita creates the + record); a name that is in Kavita but no longer in the new set is + dropped. Comparison is case-insensitive on the ``name`` field. +* Web links are stored as a comma-separated string in Kavita; this + updater treats them as a set and re-joins on write. +* Series-level cover image (URL different from last time) is re-uploaded + whenever ``coverImageLocked`` is False. The MangaBaka cover URL is + stamped onto matches.json as ``imageUrl`` so a subsequent run can skip + the upload when nothing changed. + +Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per +series so the WebApp can surface what happened. +""" + +from __future__ import annotations + +from typing import Iterable + +from KavitaClient import KavitaClient + + +# Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key). +# `item_key` is the dict key Kavita uses for the display name on each item: +# GenreTagDto / TagDto use "title", PersonDto uses "name". +_LIST_FIELDS: list[tuple[str, str, str]] = [ + ("genres", "genresLocked", "title"), + ("tags", "tagsLocked", "title"), + ("characters", "characterLocked", "name"), + ("writers", "writerLocked", "name"), + ("coverArtists", "coverArtistLocked", "name"), + ("publishers", "publisherLocked", "name"), + ("imprints", "imprintLocked", "name"), +] + + +def _norm(name: str) -> str: + return (name or "").strip().lower() + + +def _merge_list( + current: list[dict], + new_names: Iterable[str], + item_key: str, +) -> "tuple[list[dict], bool]": + """ + Diff-merges a Kavita list field with the canonical name list from + MangaBaka. Returns (merged_list, changed_flag). + + `item_key` is the dict key Kavita uses for the display name on each + item ("title" for GenreTagDto/TagDto, "name" for PersonDto). + + * Items in `current` whose display value appears in `new_names` are + kept verbatim so existing ids and ancillary fields survive. + * New names (no matching entry in `current`) are appended with + ``{"id": 0, : }`` — Kavita creates the record on save. + * Items in `current` whose display value is *not* in `new_names` are + dropped. + """ + new_set = [n for n in new_names if n and n.strip()] + new_index = {_norm(n): n.strip() for n in new_set} + + merged: list[dict] = [] + kept_keys: set[str] = set() + for item in (current or []): + key = _norm(item.get(item_key)) + if key in new_index: + merged.append(item) + kept_keys.add(key) + + added = False + for key, display in new_index.items(): + if key not in kept_keys: + merged.append({"id": 0, item_key: display}) + added = True + + removed = len(current or []) != len(kept_keys) + return merged, added or removed + + +def _parse_web_links(value) -> list[str]: + if not value: + return [] + if isinstance(value, list): + return [str(v).strip() for v in value if v] + return [p.strip() for p in str(value).split(",") if p.strip()] + + +def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]": + current = _parse_web_links(current_str) + new_norm = [l for l in new_links if l] + if not new_norm: + return ",".join(current), False + + # Mirror MangaBaka's set: keep order from new_norm, then anything from + # current that's still in new_norm (already covered above). Anything + # in current that's not in new_norm is dropped. + new_set = set(new_norm) + merged = list(new_norm) + changed = sorted(new_set) != sorted(set(current)) + return ",".join(merged), changed + + +class KavitaSeriesUpdater: + def __init__(self, client: KavitaClient): + self._client = client + + # ------------------------------------------------------------------ + # Public + # ------------------------------------------------------------------ + def update_series(self, series_id: int, built: dict, *, + previous_cover_url: "str | None" = None) -> dict: + """ + Applies the diff between Kavita's current state for `series_id` + and the freshly-built MangaBaka dict. Returns a per-field diff + report. + """ + series = self._client.get_series(series_id) + metadata = self._client.get_series_metadata(series_id) + report: dict = {} + + meta_changed = self._diff_metadata(metadata, built, report) + if meta_changed: + self._client.update_series_metadata(metadata) + + series_changed = self._diff_series(series, built, report) + if series_changed: + self._client.update_series(series) + + # Cover: only re-upload when not locked AND URL actually changed. + new_cover = built.get("coverUrl") + if (new_cover + and not series.get("coverImageLocked") + and new_cover != previous_cover_url): + try: + self._client.upload_series_cover(series_id, new_cover) + report["coverImage"] = "changed" + except Exception as exc: + report["coverImage"] = f"error: {exc}" + elif series.get("coverImageLocked"): + report["coverImage"] = "locked" + else: + report["coverImage"] = "skipped" + + return report + + # ------------------------------------------------------------------ + # Internal: SeriesMetadataDto + # ------------------------------------------------------------------ + def _diff_metadata(self, metadata: dict, built: dict, + report: dict) -> bool: + changed = False + + # ----- Scalars ------------------------------------------------ + # (built_key, metadata_key, locked_key, transform, skip_when_zero) + # `skip_when_zero` covers fields where 0 means "no data" rather + # than a real value (releaseYear, ageRating). publicationStatus 0 + # is a valid "Ongoing" status — never skip it. + scalar_map = [ + ("summary", "summary", "summaryLocked", None, False), + ("releaseYear", "releaseYear", "releaseYearLocked", int, True), + ("ageRating", "ageRating", "ageRatingLocked", int, True), + ("publicationStatus", "publicationStatus", "publicationStatusLocked", int, False), + ("language", "language", "languageLocked", None, False), + ] + for built_key, meta_key, locked_key, transform, skip_zero in scalar_map: + new_val = built.get(built_key) + if new_val is None or new_val == "": + report[meta_key] = "skipped" + continue + if transform is not None: + try: + new_val = transform(new_val) + except (TypeError, ValueError): + report[meta_key] = "skipped" + continue + if skip_zero and new_val == 0: + report[meta_key] = "skipped" + continue + if metadata.get(locked_key): + report[meta_key] = "locked" + continue + if metadata.get(meta_key) != new_val: + metadata[meta_key] = new_val + changed = True + report[meta_key] = "changed" + else: + report[meta_key] = "unchanged" + + # ----- Web links (single comma-separated string) --------------- + # SeriesMetadataDto has no dedicated lock for webLinks — always update. + web_str, web_changed = _merge_web_links( + metadata.get("webLinks"), built.get("webLinks") or []) + if web_changed: + metadata["webLinks"] = web_str + changed = True + report["webLinks"] = "changed" + else: + report["webLinks"] = "unchanged" + + # ----- List fields -------------------------------------------- + list_map = { + "genres": built.get("genres"), + "tags": built.get("tags"), + "characters": built.get("characters"), + "writers": built.get("writers"), + "coverArtists": built.get("coverArtists"), + "publishers": built.get("publishers"), + "imprints": [built["imprint"]] if built.get("imprint") else [], + } + for meta_key, locked_key, item_key in _LIST_FIELDS: + new_names = list_map.get(meta_key) or [] + if metadata.get(locked_key): + report[meta_key] = "locked" + continue + if not new_names and not (metadata.get(meta_key) or []): + report[meta_key] = "unchanged" + continue + merged, list_changed = _merge_list( + metadata.get(meta_key) or [], new_names, item_key) + if list_changed: + metadata[meta_key] = merged + changed = True + report[meta_key] = "changed" + else: + report[meta_key] = "unchanged" + + return changed + + # ------------------------------------------------------------------ + # Internal: SeriesDto (sortName, userRating, tracker ids) + # ------------------------------------------------------------------ + def _diff_series(self, series: dict, built: dict, report: dict) -> bool: + changed = False + + # sortName / localizedName + if not series.get("sortNameLocked"): + new_sort = built.get("sortName") or "" + if new_sort and series.get("sortName") != new_sort: + series["sortName"] = new_sort + changed = True + report["sortName"] = "changed" + else: + report["sortName"] = "unchanged" + else: + report["sortName"] = "locked" + + if not series.get("localizedNameLocked"): + new_loc = built.get("localizedName") or "" + if new_loc and series.get("localizedName") != new_loc: + series["localizedName"] = new_loc + changed = True + report["localizedName"] = "changed" + else: + report["localizedName"] = "unchanged" + else: + report["localizedName"] = "locked" + + # Tracker ids — Kavita exposes malId, aniListId, mangaBakaId + for built_key, series_key in ( + ("malId", "malId"), + ("anilistId", "aniListId"), + ("mangabakaId", "mangaBakaId"), + ): + new_val = built.get(built_key) + if new_val in (None, "", 0): + continue + try: + new_int = int(new_val) + except (TypeError, ValueError): + continue + if int(series.get(series_key) or 0) != new_int: + series[series_key] = new_int + changed = True + report[series_key] = "changed" + + # userRating from MangaBaka (0..5) + new_score = built.get("score") + if new_score is not None: + try: + new_score = float(new_score) + except (TypeError, ValueError): + new_score = None + if new_score is not None: + current_score = series.get("userRating") + try: + current_score = float(current_score) if current_score is not None else None + except (TypeError, ValueError): + current_score = None + if current_score != new_score: + series["userRating"] = new_score + series["hasUserRated"] = True + changed = True + report["userRating"] = "changed" + else: + report["userRating"] = "unchanged" + + return changed diff --git a/src/ln/LightNovelMetadataBuilder.py b/src/ln/LightNovelMetadataBuilder.py new file mode 100644 index 0000000..793b1bc --- /dev/null +++ b/src/ln/LightNovelMetadataBuilder.py @@ -0,0 +1,571 @@ +""" +light_novel_metadata_builder.py +=============================== + +Fetches series-level metadata for a light novel from MangaBaka, enriches +it with MyAnimeList / AniList tracker statistics and character data, and +returns a structured dict ready to be diffed against Kavita's +SeriesMetadataDto. + +Differences vs. the manga project's ComicInfoBuilder: + - No chapter / page handling — Kavita reads volumes from the files. + - No XML output — produces a plain dict. + - No MangaDex resolver — light novels don't have a chapter→volume + mapping problem. + - MangaBaka search type is fixed to ``novel`` so only light/web novels + are returned. +""" + +from __future__ import annotations + +import re + +import requests + +from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit +from MALResolver import MALResolver +from AniListResolver import AniListResolver +from MatchesCache import MatchesCache +from TextUtils import paragraphs_to_html, person_name_with_id + + +# MangaBaka series type for the search endpoint. +_SEARCH_TYPES = ["novel"] + +# MangaBaka content_rating -> Kavita AgeRating enum +# Kavita AgeRating values (from openapi.json): +# 0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly +_AGE_RATING_MAP = { + "safe": 3, # Everyone + "suggestive": 8, # Teen + "erotica": 10, # Mature17Plus + "pornographic": 13, # AdultsOnly +} + +# MangaBaka status -> Kavita PublicationStatus enum +# Kavita PublicationStatus (from openapi.json): +# 0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended +_PUB_STATUS_MAP = { + "ongoing": 0, + "hiatus": 1, + "completed": 2, + "cancelled": 3, + "ended": 4, +} + +# External-tracker URL templates used to enrich the web-links list. +_TRACKER_URL_TEMPLATES = { + "anilist": "https://anilist.co/manga/{id}", + "myanimelist": "https://myanimelist.net/manga/{id}", + "mal": "https://myanimelist.net/manga/{id}", + "mangaupdates": "https://www.mangaupdates.com/series.html?id={id}", + "kitsu": "https://kitsu.app/manga/{id}", + "animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", + "ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}", + "animeplanet": "https://www.anime-planet.com/manga/{id}", + "shikimori": "https://shikimori.one/mangas/{id}", + "bookwalker": "https://bookwalker.jp/{id}", +} + +_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])') + + +# -------------------------------------------------------------------------- +# Helpers +# -------------------------------------------------------------------------- +def _normalise_key(key) -> str: + return re.sub(r"[^a-z0-9]", "", str(key).lower()) + + +def _format_term(value: str) -> str: + return str(value).replace("_", " ").strip().title() if value else "" + + +def _md_to_html(text: str) -> str: + """Converts the subset of Markdown produced by MangaBaka to compact HTML.""" + if not text: + return "" + text = _MD_ESCAPE_RE.sub(r'\1', text) + text = re.sub( + r'\[([^\]]+)\]\(([^)]+)\)', + lambda m: f'{m.group(1)}', + text, + ) + text = re.sub(r'\*\*(.+?)\*\*', r'\1', text, flags=re.DOTALL) + text = re.sub(r'\*(.+?)\*', r'\1', text, flags=re.DOTALL) + return paragraphs_to_html(text) + + +def pick_cover_url(cover) -> "str | None": + """Selects the best cover URL from a MangaBaka cover object.""" + if not cover: + return None + if isinstance(cover, str): + return cover + if not isinstance(cover, dict): + return None + + raw = cover.get("raw") + if isinstance(raw, dict): + url = raw.get("url") + if isinstance(url, str) and url: + return url + elif isinstance(raw, str) and raw: + return raw + + for size_key in ("x350", "x250", "x150"): + variant = cover.get(size_key) + if isinstance(variant, dict): + for density in ("x3", "x2", "x1"): + url = variant.get(density) + if isinstance(url, str) and url: + return url + elif isinstance(variant, str) and variant: + return variant + + for val in cover.values(): + if isinstance(val, str) and val.startswith("http"): + return val + if isinstance(val, dict): + for sub in val.values(): + if isinstance(sub, str) and sub.startswith("http"): + return sub + return None + + +def pick_thumbnail_url(cover) -> "str | None": + """Picks a small cover variant suitable for a UI thumbnail.""" + if not cover: + return None + if isinstance(cover, str): + return cover + if not isinstance(cover, dict): + return None + for size_key in ("x150", "x250", "x350"): + variant = cover.get(size_key) + if isinstance(variant, dict): + for density in ("x2", "x1", "x3"): + url = variant.get(density) + if isinstance(url, str) and url: + return url + elif isinstance(variant, str) and variant: + return variant + return pick_cover_url(cover) + + +def _id_from_source(md: dict, *names: str) -> "int | None": + target = {_normalise_key(n) for n in names} + for raw_key, info in (md.get("source") or {}).items(): + if _normalise_key(raw_key) in target and isinstance(info, dict): + mid = info.get("id") + if mid is not None: + try: + return int(mid) + except (TypeError, ValueError): + pass + return None + + +# -------------------------------------------------------------------------- +# Builder +# -------------------------------------------------------------------------- +class LightNovelMetadataBuilder: + """ + Resolves a light-novel series on MangaBaka and produces a structured + metadata dict ready to be merged into Kavita. + """ + + def __init__(self, *, + api_base_url: str = "https://api.mangabaka.dev/v1", + language: str = "en", + request_timeout: int = 30, + session: "requests.Session | None" = None, + mal_resolver: "MALResolver | None" = None, + al_resolver: "AniListResolver | None" = None, + matches_cache: "MatchesCache | None" = None): + self.api_base_url = api_base_url.rstrip("/") + self.language = language + self.request_timeout = request_timeout + + self._session = session or requests.Session() + self._session.headers.setdefault("User-Agent", + "LightNovelMetadataBuilder/1.0") + _apply_mangabaka_rate_limit(self._session) + + self._mal = mal_resolver or MALResolver( + request_timeout=request_timeout, search_type="lightnovel") + self._al = al_resolver or AniListResolver( + request_timeout=request_timeout, media_format="novel") + self._matches_cache = matches_cache + + # ------------------------------------------------------------------ + # MangaBaka search / fetch + # ------------------------------------------------------------------ + def search_series(self, title: str) -> "dict | None": + """Returns the top MangaBaka novel hit for `title`, or None.""" + if not title or not title.strip(): + return None + url = f"{self.api_base_url}/series/search" + try: + resp = self._session.get( + url, params={"q": title, "type": _SEARCH_TYPES, + "page": 1, "limit": 1}, + timeout=self.request_timeout) + resp.raise_for_status() + except requests.RequestException: + return None + data = resp.json().get("data") or [] + return data[0] if data else None + + def fetch_series(self, series_id) -> "dict | None": + """ + Returns the full MangaBaka series dict for the given id, following + ``merged_with`` redirects. A seen-set guards against merge cycles. + """ + if series_id is None or str(series_id).strip() == "": + return None + seen: set[str] = set() + current = series_id + while str(current) not in seen: + seen.add(str(current)) + url = f"{self.api_base_url}/series/{current}" + resp = self._session.get(url, timeout=self.request_timeout) + resp.raise_for_status() + data = resp.json().get("data") + if data and data.get("state") == "merged" and data.get("merged_with"): + current = data["merged_with"] + continue + return data + return None + + # ------------------------------------------------------------------ + # Resolve title -> MangaBaka series (caches the match) + # ------------------------------------------------------------------ + def resolve(self, title: str) -> "dict | None": + """ + Returns the MangaBaka series for `title`. + + Lookup order: + 1. MatchesCache (uses stored mangabakaId, skips the search). + 2. Fresh MangaBaka search — top hit. Result is persisted to the + cache so it survives a crash. + """ + if self._matches_cache is not None: + cached = self._matches_cache.get(title) + if cached and cached.get("mangabakaId"): + try: + series = self.fetch_series(cached["mangabakaId"]) + if series: + return series + except Exception: + pass + + series = self.search_series(title) + if series and self._matches_cache is not None: + self._matches_cache.upsert( + title, + mangabaka_id=series.get("id"), + mangabaka_name=series.get("title") or "", + image_url=pick_thumbnail_url(series.get("cover")), + ) + return series + + # ------------------------------------------------------------------ + # Main entry point + # ------------------------------------------------------------------ + def build(self, *, title: str = "", + mangabaka_id=None) -> "dict | None": + """ + Fetches and enriches metadata for one series, returning the + normalised dict described in the module docstring. + + Pass either `title` (will resolve via cache/search) or + `mangabaka_id` (direct fetch). + """ + if mangabaka_id is not None and str(mangabaka_id).strip(): + md = self.fetch_series(mangabaka_id) + else: + md = self.resolve(title) + if not md: + return None + return self._assemble(md) + + # ------------------------------------------------------------------ + # Internal: assemble the result dict + # ------------------------------------------------------------------ + def _assemble(self, md: dict) -> dict: + mal_id = _id_from_source(md, "myanimelist", "mal") + al_id = _id_from_source(md, "anilist") + + # Fall back to a title-based MAL lookup when the source map does + # not carry an id — Jikan is the only tracker that ships staff + # data we can use to enrich author / artist person records. + if mal_id is None: + mal_id = self._mal.find_mal_id(md.get("title") or "") + + mal_stats = self._mal.get_stats(mal_id) if mal_id else None + + characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else [] + if not characters_detailed and al_id: + characters_detailed = self._al.get_characters_detailed(al_id) + + staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else [] + if not staff_detailed and al_id: + staff_detailed = self._al.get_staff_detailed(al_id) + + # Character names for SeriesMetadata, disambiguated with the + # tracker character id ("Rem (MAL 118737)") because Kavita person + # records are global and keyed by name only. + character_names = [ + person_name_with_id(c["name"], + mal_id=c.get("mal_id"), + al_id=c.get("al_id")) + for c in characters_detailed if c.get("name") + ] + # Writers come from MangaBaka first (authoritative for novels) + writers = list(md.get("authors") or []) + # Illustrators / artists -> CoverArtists (Kavita has no dedicated + # illustrator field, and Pencillers is the wrong semantic for + # text-only novels). + cover_artists = list(md.get("artists") or []) + + # Publisher: prefer English licence, else original. When both + # exist, the original publisher becomes the imprint. + english_pubs = self._publishers_by_type(md, "English") + original_pubs = self._publishers_by_type(md, "Original") + publishers = english_pubs or original_pubs + imprint = original_pubs[0] if english_pubs and original_pubs else None + + # Release year + release_year = None + try: + if md.get("year") is not None: + release_year = int(md["year"]) + except (TypeError, ValueError): + pass + + # Score: MangaBaka rating is 0..100 -> Kavita userRating is 0..5 + score = None + if md.get("rating") is not None: + try: + score = round(float(md["rating"]) / 20.0, 1) + except (TypeError, ValueError): + pass + + # Tags / genres come back as snake_case slugs. + genres = [_format_term(g) for g in (md.get("genres") or []) if g] + tags = [_format_term(t) for t in (md.get("tags") or []) if t] + + # Web links + web_links = self._collect_web_links(md) + + # Summary HTML + summary = self._build_summary(md, mal_stats) + + # Cover URL + cover_url = pick_cover_url(md.get("cover")) + + # Title variants + all_alt = self._collect_all_alt_titles(md) + + return { + "mangabakaId": str(md.get("id") or ""), + "mangabakaTitle": md.get("title") or "", + "originalName": md.get("native_title") or "", + "localizedName": md.get("romanized_title") or "", + "sortName": self._sort_title(md), + "altTitles": all_alt, + "summary": summary, + "genres": genres, + "tags": tags, + "characters": character_names, + "writers": writers, + "coverArtists": cover_artists, + "publishers": publishers, + "imprint": imprint, + "releaseYear": release_year, + "ageRating": _AGE_RATING_MAP.get(md.get("content_rating"), 0), + "publicationStatus": _PUB_STATUS_MAP.get( + (md.get("status") or "").lower(), 0), + "language": self.language, + "webLinks": web_links, + "score": score, + "coverUrl": cover_url, + "malId": mal_id, + "anilistId": al_id, + "relationships": list(md.get("relationships_v2") or []), + "charactersDetailed": characters_detailed, + "staffDetailed": staff_detailed, + "raw": md, + } + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + @staticmethod + def _publishers_by_type(md: dict, ptype: str) -> list[str]: + return [p.get("name") for p in (md.get("publishers") or []) + if p.get("type") == ptype and p.get("name")] + + def _sort_title(self, md: dict) -> str: + lang = self.language.lower() + alts = self._collect_alt_titles(md) + return alts.get(lang) or md.get("title") or "" + + def _collect_alt_titles(self, md: dict) -> "dict[str, str]": + """Returns one best title per language code (en/de/jp/romaji).""" + titles = md.get("titles") or md.get("alt_titles") or [] + + def pick(language_codes: tuple, prefer_trait: "str | None" = None + ) -> "str | None": + best_score = -1 + best_title: "str | None" = None + for entry in titles: + if not isinstance(entry, dict): + continue + lang = (entry.get("language") or entry.get("lang") or "").lower() + if lang not in language_codes: + continue + title = entry.get("title") + if not title: + continue + traits = entry.get("traits") or [] + score = 0 + if prefer_trait and prefer_trait in traits: + score += 4 + if "official" in traits: + score += 2 + if entry.get("is_primary"): + score += 1 + if score > best_score: + best_score, best_title = score, title + return best_title + + result: dict[str, str] = {} + kanji = pick(("ja",), prefer_trait="native") or md.get("native_title") + if kanji: + result["jp"] = kanji + romaji = pick(("ja-latn", "ja-romaji")) + if not romaji: + rt = md.get("romanized_title") or "" + if rt and all(ord(c) < 128 for c in rt): + romaji = rt + if romaji: + result["romaji"] = romaji + en = pick(("en",)) or md.get("title") + if en: + result["en"] = en + de = pick(("de",)) + if de: + result["de"] = de + return result + + @staticmethod + def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]": + _GROUPS = { + "en": ("en",), + "de": ("de",), + "ja": ("ja",), + "ja-romaji": ("ja-latn", "ja-romaji"), + "ko": ("ko",), + "ko-romaji": ("ko-latn", "ko-romaji"), + "zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"), + "zh-romaji": ("zh-latn",), + } + lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls} + result: dict[str, list[str]] = {} + seen: dict[str, set] = {} + for entry in (md.get("titles") or md.get("alt_titles") or []): + if not isinstance(entry, dict): + continue + lang = (entry.get("language") or entry.get("lang") or "").lower() + group = lang_to_group.get(lang) + if not group: + continue + title = (entry.get("title") or "").strip() + if not title: + continue + result.setdefault(group, []) + seen.setdefault(group, set()) + if title not in seen[group]: + result[group].append(title) + seen[group].add(title) + return result + + def _collect_web_links(self, md: dict) -> list[str]: + links: list[str] = [l for l in (md.get("links") or []) if l] + for raw_key, info in (md.get("source") or {}).items(): + template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key)) + if not template or not isinstance(info, dict): + continue + source_id = info.get("id") + if source_id is not None: + links.append(template.format(id=source_id)) + seen: set[str] = set() + unique: list[str] = [] + for link in links: + if link not in seen: + seen.add(link) + unique.append(link) + return unique + + def _build_summary(self, md: dict, + mal_stats: "dict | None") -> str: + """Builds the HTML summary with stats table + description + alt titles.""" + _TD = 'style="padding-right:1.5em"' + parts: list[str] = [] + + if mal_stats: + url = mal_stats.get("url", "") + as_of = mal_stats.get("as_of", "") + rows: list[str] = [] + for label, key, fmt in ( + ("Score", "score", "{}"), + ("Ranked", "rank", "#{}"), + ("Scored by", "scored_by", "{:,} users"), + ("Popularity","popularity", "#{}"), + ("Members", "members", "{:,}"), + ("Favorites", "favorites", "{:,}"), + ): + v = mal_stats.get(key) + if v is None: + continue + try: + formatted = fmt.format(v) + except (TypeError, ValueError): + formatted = str(v) + rows.append(f"{label}{formatted}") + if rows: + link = f'MyAnimeList' if url else "MyAnimeList" + parts.append(f"

{link} stats as of {as_of}:

" + f"{''.join(rows)}
") + + desc_raw = (md.get("description") or "").strip() + if desc_raw: + parts.append(_md_to_html(desc_raw)) + + all_alt = self._collect_all_alt_titles(md) + if all_alt: + label_map = { + "en": "EN", + "de": "DE", + "ja": "JA", + "ja-romaji": "JA Romaji", + "ko": "KO", + "ko-romaji": "KO Romaji", + "zh": "ZH", + "zh-romaji": "ZH Romaji", + } + alt_rows: list[str] = [] + for group in ("en", "de", "ja", "ja-romaji", + "ko", "ko-romaji", "zh", "zh-romaji"): + titles = all_alt.get(group) + if not titles: + continue + cell = "
".join(titles) + alt_rows.append( + f"{label_map[group]}{cell}") + if alt_rows: + parts.append(f"{''.join(alt_rows)}
") + + return "
".join(parts) diff --git a/src/ln/LightNovelOrchestrator.py b/src/ln/LightNovelOrchestrator.py new file mode 100644 index 0000000..339819e --- /dev/null +++ b/src/ln/LightNovelOrchestrator.py @@ -0,0 +1,260 @@ +""" +light_novel_orchestrator.py +=========================== + +High-level workflow on top of the resolvers, the Kavita client and the +diff-based updaters. Exposes three operations to the WebApp: + + - build_matches(library_ids): + Scan one or more Kavita libraries, resolve every series against + MangaBaka and persist the match in matches.json. + - update_series(kavita_series_id): + Re-fetch MangaBaka, MAL and AniList data for a single Kavita + series and apply the diff (metadata + persons + relationships). + - update_all(library_ids): + Run update_series for every series that has a match in the + cache and lives in the given libraries. + +A single shared HTTP session (rate-limited for MangaBaka) and shared +resolver singletons are used across the whole run to maximise cache +hits. +""" + +from __future__ import annotations + +import requests + +from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit +from MALResolver import MALResolver +from AniListResolver import AniListResolver +from MatchesCache import MatchesCache +from KavitaClient import KavitaClient +from KavitaPersonUpdater import KavitaPersonUpdater +from KavitaSeriesUpdater import KavitaSeriesUpdater +from LightNovelMetadataBuilder import ( + LightNovelMetadataBuilder, + pick_thumbnail_url, +) +from RelationshipSync import RelationshipSync + + +class LightNovelOrchestrator: + def __init__(self, *, + kavita_url: str, + kavita_api_key: str, + matches_cache: MatchesCache, + language: str = "en", + request_timeout: int = 30, + api_base_url: str = "https://api.mangabaka.dev/v1"): + self._cache = matches_cache + self._timeout = request_timeout + + session = requests.Session() + session.headers.setdefault("User-Agent", + "KavitaLightNovelOrchestrator/1.0") + _apply_mangabaka_rate_limit(session) + self._session = session + + # First construction in the LN container — pins the singletons to + # light-novel search mode (manga container uses the defaults). + self._mal = MALResolver(request_timeout=request_timeout, + search_type="lightnovel") + self._al = AniListResolver(request_timeout=request_timeout, + media_format="novel") + + self._client = KavitaClient(kavita_url, kavita_api_key, + request_timeout=request_timeout) + self._builder = LightNovelMetadataBuilder( + api_base_url=api_base_url, + language=language, + request_timeout=request_timeout, + session=session, + mal_resolver=self._mal, + al_resolver=self._al, + matches_cache=matches_cache, + ) + self._series_updater = KavitaSeriesUpdater(self._client) + self._person_updater = KavitaPersonUpdater( + self._client, + mal_resolver=self._mal, + al_resolver=self._al, + ) + self._relation_sync = RelationshipSync( + self._client, matches_cache, builder=self._builder) + + # ------------------------------------------------------------------ + # Library listings + # ------------------------------------------------------------------ + def list_libraries(self) -> list[dict]: + return self._client.list_libraries() + + def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]: + result: list[dict] = [] + for lib_id in library_ids: + try: + result.extend(self._client.list_series_in_library(int(lib_id))) + except Exception as exc: + print(f"[orchestrator] library {lib_id} list failed: {exc}", + flush=True) + return result + + # ------------------------------------------------------------------ + # Matching + # ------------------------------------------------------------------ + def build_matches(self, library_ids: list[int]) -> dict: + """ + Resolves every series in the given libraries against MangaBaka. + + Series already present in matches.json keep their stored + mangabakaId; the kavitaSeriesId + libraryId fields are refreshed + in case the user moved a series between libraries. + """ + stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0} + for series in self.list_series_in_libraries(library_ids): + title = (series.get("name") or "").strip() + if not title: + continue + stats["checked"] += 1 + kavita_id = int(series.get("id") or 0) + library_id = int(series.get("libraryId") or 0) + + cached = self._cache.get(title) + if cached and cached.get("mangabakaId"): + self._cache.upsert( + title, + kavita_series_id=kavita_id, + library_id=library_id, + ) + stats["skipped"] += 1 + continue + + mb_series = self._builder.search_series(title) + if not mb_series: + self._cache.upsert( + title, + kavita_series_id=kavita_id, + library_id=library_id, + ) + stats["missing"] += 1 + print(f"[match] {title!r}: no MangaBaka hit", flush=True) + continue + + self._cache.upsert( + title, + mangabaka_id=mb_series.get("id"), + mangabaka_name=mb_series.get("title") or "", + image_url=pick_thumbnail_url(mb_series.get("cover")), + kavita_series_id=kavita_id, + library_id=library_id, + ) + stats["matched"] += 1 + print(f"[match] {title!r} -> {mb_series.get('title')!r} " + f"(id={mb_series.get('id')})", flush=True) + return stats + + # ------------------------------------------------------------------ + # Updating + # ------------------------------------------------------------------ + def update_series(self, kavita_series_id: int) -> dict: + """Runs a full metadata update for a single Kavita series.""" + hit = self._cache.get_by_kavita_id(int(kavita_series_id)) + if not hit: + # Try to resolve via the Kavita series name on the fly. + series = self._client.get_series(int(kavita_series_id)) + title = (series.get("name") or "").strip() + if not title: + return {"ok": False, "error": "series not in matches.json"} + built = self._builder.build(title=title) + if not built: + return {"ok": False, "error": "no MangaBaka match"} + self._cache.upsert( + title, + mangabaka_id=built.get("mangabakaId"), + mangabaka_name=built.get("mangabakaTitle"), + image_url=built.get("coverUrl"), + kavita_series_id=int(kavita_series_id), + library_id=int(series.get("libraryId") or 0), + ) + cached_title = title + cached_entry = self._cache.get(title) or {} + else: + cached_title, cached_entry = hit + built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId")) + if not built: + return {"ok": False, "error": "mangabaka id no longer resolvable"} + + prev_cover = cached_entry.get("imageUrl") or "" + try: + series_report = self._series_updater.update_series( + int(kavita_series_id), built, + previous_cover_url=prev_cover, + ) + except Exception as exc: + return {"ok": False, "error": f"series update failed: {exc}"} + + # Persons + try: + person_report = self._person_updater.update_for_manga( + built.get("malId"), + al_manga_id=built.get("anilistId"), + ) + except Exception as exc: + person_report = {"error": str(exc)} + + # Relationships + collection + try: + relation_report = self._relation_sync.sync( + int(kavita_series_id), built) + except Exception as exc: + relation_report = {"error": str(exc)} + + # Stamp the new cover URL on the cache so the next run knows when + # to re-upload. + self._cache.upsert( + cached_title, + image_url=built.get("coverUrl") or prev_cover, + ) + self._cache.mark_updated(cached_title) + + return { + "ok": True, + "title": cached_title, + "mangabakaId": built.get("mangabakaId"), + "series": series_report, + "persons": person_report, + "relationships": relation_report, + } + + def update_all(self, library_ids: "list[int] | None") -> dict: + """Updates every cached series in the given libraries.""" + if library_ids is None: + entries = self._cache.all()["matches"] + else: + entries = self._cache.all_in_libraries(library_ids)["matches"] + + results: list[dict] = [] + ok = fail = 0 + for title, entry in entries.items(): + ksid = int(entry.get("kavitaSeriesId") or 0) + if not ksid or not entry.get("mangabakaId"): + continue + try: + res = self.update_series(ksid) + except Exception as exc: + res = {"ok": False, "error": str(exc)} + res["title"] = title + results.append(res) + if res.get("ok"): + ok += 1 + else: + fail += 1 + print(f"[update] {title!r}: " + f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}", + flush=True) + return {"ok": ok, "failed": fail, "results": results} + + # ------------------------------------------------------------------ + # Direct helpers exposed to the WebApp + # ------------------------------------------------------------------ + def fetch_series(self, mangabaka_id) -> "dict | None": + return self._builder.fetch_series(mangabaka_id) diff --git a/src/ln/MatchesCache.py b/src/ln/MatchesCache.py new file mode 100644 index 0000000..a7d83e1 --- /dev/null +++ b/src/ln/MatchesCache.py @@ -0,0 +1,187 @@ +""" +matches_cache.py +================ + +Persistent JSON cache that maps a Kavita series title to the MangaBaka +series it was matched against, plus enough context to update the right +Kavita record later. + +Structure on disk:: + + { + "matches": { + "": { + "mangabakaId": "12345", + "mangabakaName": "Re:Zero", + "imageUrl": "https://.../cover.jpg", + "kavitaSeriesId": 42, + "libraryId": 3, + "firstMatchTime": 1700000000, + "lastUpdateTime": 1700100000 + }, + ... + } + } + +The cache is the source of truth for the WebUI's matches table and is +written back on every mutation so a crash mid-batch does not lose +matches that were resolved in the current run. +""" + +from __future__ import annotations + +import json +import threading +import time +from pathlib import Path + + +def _set_int(entry: dict, key: str, value) -> None: + """Sets entry[key] = int(value); ignores values that don't coerce.""" + try: + entry[key] = int(value) + except (TypeError, ValueError): + pass + + +class MatchesCache: + def __init__(self, path): + self._path = Path(path) + self._lock = threading.RLock() + self._data: dict = {"matches": {}} + self._load() + + # ------------------------------------------------------------------ + # Public lookup / mutation API + # ------------------------------------------------------------------ + def get(self, title: str) -> "dict | None": + with self._lock: + entry = self._data["matches"].get(title) + return dict(entry) if entry else None + + def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None": + with self._lock: + for title, entry in self._data["matches"].items(): + if entry.get("kavitaSeriesId") == kavita_series_id: + return title, dict(entry) + return None + + def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None": + target = str(mangabaka_id) if mangabaka_id is not None else "" + if not target: + return None + with self._lock: + for title, entry in self._data["matches"].items(): + if str(entry.get("mangabakaId") or "") == target: + return title, dict(entry) + return None + + def upsert(self, title: str, *, + mangabaka_id=None, + mangabaka_name=None, + image_url=None, + kavita_series_id=None, + library_id=None, + first_match_time=None, + last_update_time=None) -> dict: + """ + Inserts or updates an entry. Only fields passed explicitly are + modified; the rest are preserved. + """ + with self._lock: + entry = self._data["matches"].get(title) + if entry is None: + entry = { + "mangabakaId": "", + "mangabakaName": "", + "imageUrl": "", + "kavitaSeriesId": 0, + "libraryId": 0, + "firstMatchTime": int(time.time()), + "lastUpdateTime": 0, + } + self._data["matches"][title] = entry + if mangabaka_id is not None: + entry["mangabakaId"] = str(mangabaka_id) + if mangabaka_name is not None: + entry["mangabakaName"] = mangabaka_name + if image_url is not None: + entry["imageUrl"] = image_url + if kavita_series_id is not None: + _set_int(entry, "kavitaSeriesId", kavita_series_id) + if library_id is not None: + _set_int(entry, "libraryId", library_id) + if first_match_time is not None: + _set_int(entry, "firstMatchTime", first_match_time) + if last_update_time is not None: + _set_int(entry, "lastUpdateTime", last_update_time) + self._save_unlocked() + return dict(entry) + + def mark_updated(self, title: str) -> None: + with self._lock: + entry = self._data["matches"].get(title) + if entry is not None: + entry["lastUpdateTime"] = int(time.time()) + self._save_unlocked() + + def rename(self, old_title: str, new_title: str) -> bool: + if not new_title or old_title == new_title: + return False + with self._lock: + entry = self._data["matches"].pop(old_title, None) + if entry is None: + return False + self._data["matches"][new_title] = entry + self._save_unlocked() + return True + + def remove(self, title: str) -> bool: + with self._lock: + existed = title in self._data["matches"] + if existed: + del self._data["matches"][title] + self._save_unlocked() + return existed + + def all(self) -> dict: + with self._lock: + return {"matches": {k: dict(v) + for k, v in self._data["matches"].items()}} + + def all_in_libraries(self, library_ids: "list[int] | None") -> dict: + """ + Returns the cache filtered to entries whose libraryId is in + `library_ids`. Pass None to return everything. + """ + if library_ids is None: + return self.all() + ids = {int(i) for i in library_ids} + with self._lock: + return {"matches": { + k: dict(v) for k, v in self._data["matches"].items() + if int(v.get("libraryId") or 0) in ids + }} + + # ------------------------------------------------------------------ + # Internal IO + # ------------------------------------------------------------------ + def _load(self) -> None: + if not self._path.is_file(): + return + try: + with self._path.open("r", encoding="utf-8") as f: + loaded = json.load(f) + except (OSError, json.JSONDecodeError) as exc: + print(f"[MatchesCache] failed to load {self._path}: {exc}", + flush=True) + return + if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict): + self._data = loaded + + def _save_unlocked(self) -> None: + self._path.parent.mkdir(parents=True, exist_ok=True) + tmp = self._path.with_suffix(self._path.suffix + ".tmp") + with tmp.open("w", encoding="utf-8") as f: + json.dump(self._data, f, ensure_ascii=False, indent=2) + tmp.replace(self._path) diff --git a/src/ln/MatchesWebApp.py b/src/ln/MatchesWebApp.py new file mode 100644 index 0000000..faa7caa --- /dev/null +++ b/src/ln/MatchesWebApp.py @@ -0,0 +1,764 @@ +""" +matches_web_app.py +================== + +Flask web UI for the Kavita light-novel metadata fetcher. + +Pages +----- +GET / HTML UI (matches table + actions) + +Match cache (JSON) +------------------ +GET /api/libraries Lists Kavita libraries +GET /api/matches Full cache, optionally filtered by libraryIds= +POST /api/matches Upsert a single match + body: {title, mangabakaId} +POST /api/matches/delete Remove a match + body: {title} + +Background jobs +--------------- +POST /api/build Build matches for libraries + body: {libraryIds: [int, ...]} +POST /api/update Update a single series + body: {kavitaSeriesId} +POST /api/update-all Update every cached series in libraries + body: {libraryIds: [int, ...] | null} +GET /api/status Current background job status (status, log) +""" + +from __future__ import annotations + +import threading +import time + +from flask import Flask, jsonify, request, Response + +from MatchesCache import MatchesCache +from LightNovelMetadataBuilder import pick_thumbnail_url + + +def _int_list(values) -> list[int]: + """Coerces an iterable of mixed values to a list of positive ints.""" + out: list[int] = [] + for v in (values or []): + try: + n = int(v) + except (TypeError, ValueError): + continue + if n > 0: + out.append(n) + return out + + +_INDEX_HTML = r""" + + + + Kavita light-novel metadata fetcher + + + +

Kavita light-novel metadata fetcher

+ +
+ + + + + + +
+ +
+ + +
+ + + + + + + + + + + + + + + + +
Title mangabakaIdmangabakaNamelibraryLast update Image
+ + + + +""" + + +class _JobState: + """Thread-safe container for the current background job's progress.""" + + def __init__(self): + self._lock = threading.Lock() + self._running = False + self._label = "" + self._log: list[str] = [] + self._last_finished_at = 0 + self._thread: "threading.Thread | None" = None + + def start(self, label: str, target, *args, **kwargs) -> bool: + with self._lock: + if self._running: + return False + self._running = True + self._label = label + self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"] + + def runner(): + try: + target(self, *args, **kwargs) + except Exception as exc: + self.append(f"FATAL: {exc}") + finally: + with self._lock: + self._running = False + self._last_finished_at = int(time.time()) + self.append(f"[{time.strftime('%H:%M:%S')}] finished") + + self._thread = threading.Thread(target=runner, + name=f"job:{label}", + daemon=True) + self._thread.start() + return True + + def append(self, line: str) -> None: + with self._lock: + self._log.append(line) + # Cap log length so the response stays bounded. + if len(self._log) > 1000: + self._log = self._log[-800:] + + def snapshot(self) -> dict: + with self._lock: + return { + "running": self._running, + "label": self._label, + "log": list(self._log), + "lastFinished": self._last_finished_at, + } + + +class MatchesWebApp: + def __init__(self, cache: MatchesCache, *, + orchestrator=None, + default_library_ids: "list[int] | None" = None, + host: str = "0.0.0.0", + port: int = 8080): + self._cache = cache + self._orchestrator = orchestrator + self._defaults = list(default_library_ids or []) + self._host = host + self._port = port + self._job = _JobState() + self._app = Flask(__name__) + self._thread: "threading.Thread | None" = None + self._register_routes() + + @property + def app(self) -> Flask: + return self._app + + def start(self) -> threading.Thread: + if self._thread is not None and self._thread.is_alive(): + return self._thread + self._thread = threading.Thread( + target=self._app.run, + kwargs={"host": self._host, "port": self._port, + "debug": False, "use_reloader": False, + "threaded": True}, + name="MatchesWebApp", + daemon=False, + ) + self._thread.start() + print(f"[MatchesWebApp] listening on {self._host}:{self._port}", + flush=True) + return self._thread + + def wait(self) -> None: + if self._thread is not None: + self._thread.join() + + # ------------------------------------------------------------------ + # Routes + # ------------------------------------------------------------------ + def _register_routes(self) -> None: + app = self._app + cache = self._cache + + @app.get("/") + def index() -> Response: + return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8") + + @app.get("/api/libraries") + def api_libraries(): + if self._orchestrator is None: + return jsonify([]) + try: + libs = self._orchestrator.list_libraries() + except Exception as exc: + return Response(f"libraries failed: {exc}", status=502) + return jsonify({"libraries": libs, "defaults": self._defaults}) + + @app.get("/api/matches") + def api_list(): + raw = request.args.get("libraryIds") or "" + lib_ids = _int_list(raw.split(",")) + if lib_ids: + return jsonify(cache.all_in_libraries(lib_ids)) + return jsonify(cache.all()) + + @app.post("/api/matches") + def api_upsert(): + body = request.get_json(silent=True) or {} + title = (body.get("title") or "").strip() + if not title: + return Response("title is required", status=400) + new_id_raw = body.get("mangabakaId") + new_id = str(new_id_raw).strip() if new_id_raw is not None else "" + if not new_id: + return Response("mangabakaId is required", status=400) + + new_name: "str | None" = None + new_image: "str | None" = None + if self._orchestrator is not None: + try: + series = self._orchestrator.fetch_series(new_id) + except Exception as exc: + return Response(f"resolve failed: {exc}", status=502) + if not series: + return Response( + f"MangaBaka has no series with id {new_id}", + status=404) + new_name = series.get("title") or "" + new_image = pick_thumbnail_url(series.get("cover")) or "" + + entry = cache.upsert( + title, + mangabaka_id=new_id, + mangabaka_name=new_name, + image_url=new_image, + ) + return jsonify({"title": title, "entry": entry}) + + @app.post("/api/matches/delete") + def api_delete(): + body = request.get_json(silent=True) or {} + title = (body.get("title") or "").strip() + if not title: + return Response("title is required", status=400) + removed = cache.remove(title) + return jsonify({"removed": removed, "title": title}) + + @app.post("/api/build") + def api_build(): + if self._orchestrator is None: + return Response("no orchestrator configured", status=503) + body = request.get_json(silent=True) or {} + library_ids = _int_list(body.get("libraryIds")) + if not library_ids: + return Response("libraryIds required", status=400) + + label = f"match libraries {library_ids}" + + def task(job: _JobState, lib_ids): + stats = self._orchestrator.build_matches(lib_ids) + job.append(f"matched={stats.get('matched')} " + f"skipped={stats.get('skipped')} " + f"missing={stats.get('missing')} " + f"checked={stats.get('checked')}") + + if not self._job.start(label, task, library_ids): + return Response("a job is already running", status=409) + return jsonify({"started": label}) + + @app.post("/api/update") + def api_update(): + if self._orchestrator is None: + return Response("no orchestrator configured", status=503) + body = request.get_json(silent=True) or {} + ksid = body.get("kavitaSeriesId") + try: + ksid_int = int(ksid) + except (TypeError, ValueError): + return Response("kavitaSeriesId required", status=400) + try: + res = self._orchestrator.update_series(ksid_int) + except Exception as exc: + return Response(f"update failed: {exc}", status=500) + return jsonify(res) + + @app.post("/api/update-all") + def api_update_all(): + if self._orchestrator is None: + return Response("no orchestrator configured", status=503) + body = request.get_json(silent=True) or {} + raw = body.get("libraryIds") + library_ids = None if raw is None else _int_list(raw) + + label = ("update all (every library)" if library_ids is None + else f"update all in libraries {library_ids}") + + def task(job: _JobState, lib_ids): + summary = self._orchestrator.update_all(lib_ids) + job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}") + for res in summary.get("results", []): + title = res.get("title", "?") + if res.get("ok"): + flags = [] + sr = res.get("series") or {} + for k, v in sr.items(): + if v == "changed": + flags.append(k) + job.append( + f" {title}: changed=[{', '.join(flags) or '-'}]") + else: + job.append(f" {title}: FAIL {res.get('error')}") + + if not self._job.start(label, task, library_ids): + return Response("a job is already running", status=409) + return jsonify({"started": label}) + + @app.get("/api/status") + def api_status(): + snap = self._job.snapshot() + snap["defaults"] = self._defaults + return jsonify(snap) diff --git a/src/ln/RelationshipSync.py b/src/ln/RelationshipSync.py new file mode 100644 index 0000000..87caff7 --- /dev/null +++ b/src/ln/RelationshipSync.py @@ -0,0 +1,174 @@ +""" +relationship_sync.py +==================== + +Mirrors MangaBaka's ``relationships_v2`` graph into Kavita: + + 1. Every related MangaBaka series that is *also* present in Kavita + (resolved via MatchesCache) is added to a shared Kavita collection + so the whole franchise can be browsed in one place. + 2. Series-level relationships (prequel / sequel / spin-off / …) are + written via ``POST /api/Series/update-related`` so navigating + between entries surfaces the right neighbours. + +Only relationships where both endpoints exist in Kavita are written. +Relationships pointing to series that have not been imported yet are +silently skipped (the next match run picks them up). +""" + +from __future__ import annotations + +from KavitaClient import KavitaClient +from MatchesCache import MatchesCache + + +# MangaBaka relation_type -> Kavita UpdateRelatedSeriesDto bucket +_RELATION_MAP = { + "prequel": "prequels", + "sequel": "sequels", + "side_story": "sideStories", + "spin_off": "spinOffs", + "spinoff": "spinOffs", + "alternative_version": "alternativeVersions", + "alternative_story": "alternativeVersions", + "alternative_setting": "alternativeSettings", + "adapted_from": "adaptations", + "adaptation": "adaptations", + "doujinshi": "doujinshis", + "parent": "contains", # the parent "contains" the child +} + +_ALL_BUCKETS = ( + "adaptations", "characters", "contains", "others", + "prequels", "sequels", "sideStories", "spinOffs", + "alternativeSettings", "alternativeVersions", "doujinshis", + "editions", "annuals", +) + + +class RelationshipSync: + def __init__(self, client: KavitaClient, cache: MatchesCache, *, + builder=None): + """ + Parameters + ---------- + client : KavitaClient for collection / relation writes. + cache : MatchesCache to resolve mangabakaId -> kavitaSeriesId. + builder : optional LightNovelMetadataBuilder used to fetch parent + series titles when picking the collection name. + """ + self._client = client + self._cache = cache + self._builder = builder + + # ------------------------------------------------------------------ + # Public + # ------------------------------------------------------------------ + def sync(self, kavita_series_id: int, built: dict) -> dict: + """ + Applies the relationship and collection links described by + `built["relationships"]` (raw MangaBaka relationships_v2 list) + for the given Kavita series. Returns a small status dict. + """ + report: dict = {"relations": {}, "collection": None, + "missing_series": []} + + relationships = built.get("relationships") or [] + if not relationships: + return report + + # Resolve mangabakaId -> kavitaSeriesId for every related entry. + related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS} + all_kavita_ids: set[int] = set() + for rel in relationships: + mb_id = rel.get("to_series_id") + if mb_id is None: + continue + hit = self._cache.get_by_mangabaka_id(mb_id) + if not hit: + report["missing_series"].append(int(mb_id)) + continue + _title, entry = hit + ksid = int(entry.get("kavitaSeriesId") or 0) + if not ksid: + report["missing_series"].append(int(mb_id)) + continue + bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(), + "others") + if ksid not in related[bucket]: + related[bucket].append(ksid) + all_kavita_ids.add(ksid) + + # ----- Relationships ------------------------------------------ + if any(related.values()): + payload = {"seriesId": int(kavita_series_id)} + for bucket in _ALL_BUCKETS: + payload[bucket] = related[bucket] + try: + self._client.update_related(payload) + report["relations"] = {k: v for k, v in related.items() if v} + except Exception as exc: + report["relations"] = {"error": str(exc)} + + # ----- Collection --------------------------------------------- + # Include the current series in the collection so it shows up too. + all_kavita_ids.add(int(kavita_series_id)) + if len(all_kavita_ids) >= 2: + collection_name = self._collection_name(built, relationships) + collection_id = self._find_collection_id(collection_name) + try: + self._client.add_series_to_collection( + collection_id=collection_id, + title=collection_name, + series_ids=sorted(all_kavita_ids), + ) + report["collection"] = collection_name + except Exception as exc: + report["collection"] = f"error: {exc}" + + return report + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + def _find_collection_id(self, name: str) -> int: + """Returns the id of an existing collection by title, or 0 to create.""" + if not name: + return 0 + target = name.strip().lower() + try: + for col in self._client.list_collections(): + if (col.get("title") or "").strip().lower() == target: + try: + return int(col.get("id") or 0) + except (TypeError, ValueError): + return 0 + except Exception: + pass + return 0 + + def _collection_name(self, built: dict, + relationships: list[dict]) -> str: + """ + Picks the collection name. Uses the parent series title from + MangaBaka if the current series has one; otherwise falls back to + the current series' own title. + """ + for rel in relationships: + if (rel.get("relation_type") or "").lower() == "parent": + parent_id = rel.get("to_series_id") + if parent_id is not None and self._builder is not None: + try: + parent_md = self._builder.fetch_series(parent_id) + if parent_md and parent_md.get("title"): + return parent_md["title"] + except Exception: + pass + # Even without a builder, the cache may know the parent. + hit = self._cache.get_by_mangabaka_id(parent_id) + if hit: + _title, entry = hit + name = entry.get("mangabakaName") + if name: + return name + return built.get("mangabakaTitle") or "" diff --git a/src/ComicInfoBuilder.py b/src/manga/ComicInfoBuilder.py similarity index 97% rename from src/ComicInfoBuilder.py rename to src/manga/ComicInfoBuilder.py index b0a6668..d337f36 100644 --- a/src/ComicInfoBuilder.py +++ b/src/manga/ComicInfoBuilder.py @@ -38,18 +38,25 @@ Data source notes from __future__ import annotations import re +import sys import xml.etree.ElementTree as ET from pathlib import Path import requests +# Shared modules live one level up (src/); needed when a module in this +# folder is run directly as a script (the entry points set the path). +if __name__ == "__main__": + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url from MALResolver import MALResolver from AniListResolver import AniListResolver from MatchesCache import MatchesCache from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit -from CoverCache import CoverCache +from CoverCache import CoverCache, _IMAGE_EXTS +from TextUtils import person_name_with_id try: from PIL import Image @@ -61,8 +68,6 @@ except ImportError: # -------------------------------------------------------------------------- # Constants # -------------------------------------------------------------------------- -_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} - # Series types accepted by the MangaBaka search endpoint. Light/web novels # are filtered out because this pipeline only handles image-based manga. # Passed to `requests` as a list so each value becomes its own `&type=...` @@ -485,9 +490,19 @@ class ComicInfoBuilder: add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or []))) # ----- Characters — MAL first, AniList fallback --------------------- - characters = self._mal_resolver.get_characters(mal_id) - if not characters and al_id: - characters = self._al_resolver.get_characters(al_id) + # Names are disambiguated with the tracker *character* id + # ("Rem (MAL 118737)") so same-named characters from different + # series stay separate Kavita person records. The format is shared + # with the light-novel updater — see TextUtils.person_name_with_id. + char_entries = self._mal_resolver.get_characters_detailed(mal_id) + if not char_entries and al_id: + char_entries = self._al_resolver.get_characters_detailed(al_id) + characters = [ + person_name_with_id(e.get("name"), + mal_id=e.get("mal_id"), + al_id=e.get("al_id")) + for e in char_entries if (e.get("name") or "").strip() + ] add("Characters", ", ".join(characters) if characters else None) # ----- Web links ---------------------------------------------------- @@ -1112,17 +1127,6 @@ def _pick_thumbnail_url(cover) -> "str | None": return _pick_cover_url(cover) -def _guess_extension(url: str, content_type: str) -> str: - url_ext = Path(url.split("?")[0]).suffix.lower() - if url_ext in _IMAGE_EXTS: - return url_ext - ct = (content_type or "").lower() - if "png" in ct: return ".png" - if "webp" in ct: return ".webp" - if "gif" in ct: return ".gif" - return ".jpg" - - # -------------------------------------------------------------------------- # Usage example # -------------------------------------------------------------------------- diff --git a/src/KavitaVolumeCoverUpdater.py b/src/manga/KavitaVolumeCoverUpdater.py similarity index 98% rename from src/KavitaVolumeCoverUpdater.py rename to src/manga/KavitaVolumeCoverUpdater.py index 5848b1d..aba084e 100644 --- a/src/KavitaVolumeCoverUpdater.py +++ b/src/manga/KavitaVolumeCoverUpdater.py @@ -44,6 +44,7 @@ Dependencies from __future__ import annotations import io +import sys import threading import xml.etree.ElementTree as ET import zipfile @@ -52,7 +53,12 @@ from pathlib import Path import requests -from ComicInfoBuilder import ComicInfoBuilder, _IMAGE_EXTS +# Shared modules live one level up (src/); needed when a module in this +# folder is run directly as a script (the entry points set the path). +if __name__ == "__main__": + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from ComicInfoBuilder import ComicInfoBuilder from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver @@ -62,7 +68,7 @@ from SuwayomiMover import (_load_chapter_index, _save_chapter_index, _sanitize_dirname, _normalise_volume_value) from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit from CronSchedule import CronSchedule -from CoverCache import CoverCache +from CoverCache import CoverCache, _IMAGE_EXTS try: from PIL import Image diff --git a/src/MangadexVolumeResolver.py b/src/manga/MangadexVolumeResolver.py similarity index 100% rename from src/MangadexVolumeResolver.py rename to src/manga/MangadexVolumeResolver.py diff --git a/src/MatchesCache.py b/src/manga/MatchesCache.py similarity index 100% rename from src/MatchesCache.py rename to src/manga/MatchesCache.py diff --git a/src/MatchesWebApp.py b/src/manga/MatchesWebApp.py similarity index 100% rename from src/MatchesWebApp.py rename to src/manga/MatchesWebApp.py diff --git a/src/SuwayomiFolderWatcher.py b/src/manga/SuwayomiFolderWatcher.py similarity index 100% rename from src/SuwayomiFolderWatcher.py rename to src/manga/SuwayomiFolderWatcher.py diff --git a/src/SuwayomiMover.py b/src/manga/SuwayomiMover.py similarity index 98% rename from src/SuwayomiMover.py rename to src/manga/SuwayomiMover.py index b66ba67..51ae101 100644 --- a/src/SuwayomiMover.py +++ b/src/manga/SuwayomiMover.py @@ -46,22 +46,29 @@ from __future__ import annotations import json import re import shutil +import sys import xml.etree.ElementTree as ET import zipfile from pathlib import Path import requests +# Shared modules live one level up (src/); needed when a module in this +# folder is run directly as a script (the entry points set the path). +if __name__ == "__main__": + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url, - _SEARCH_TYPES, _IMAGE_EXTS, _natural_key) + _SEARCH_TYPES, _natural_key) from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver +from KavitaClient import KavitaClient from KavitaPersonUpdater import KavitaPersonUpdater from MatchesCache import MatchesCache from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit -from CoverCache import CoverCache +from CoverCache import CoverCache, _IMAGE_EXTS _CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)') @@ -347,11 +354,13 @@ class SuwayomiMover: self._person_updater: "KavitaPersonUpdater | None" = None if kavita_base_url and kavita_api_key: - self._person_updater = KavitaPersonUpdater( + kavita_client = KavitaClient( kavita_base_url, kavita_api_key, - mal_resolver=self._mal, - al_resolver=self._al, request_timeout=request_timeout) + self._person_updater = KavitaPersonUpdater( + kavita_client, + mal_resolver=self._mal, + al_resolver=self._al) # ------------------------------------------------------------------ # Public API