7 Commits

Author SHA1 Message Date
johannesbot b7bec295f2 Merge pull request 'Performance and Person Updater Improvements' (#7) from time-measurement into master
Build and Deploy / build (push) Successful in 36s
Build and Deploy / deploy (push) Successful in 37s
Build Release / build (push) Successful in 24s
Reviewed-on: #7
2026-06-16 18:46:55 +02:00
johannesbot 6ca1a245a3 Person Updater overhaul 2026-06-16 18:46:17 +02:00
johannesbot a59cff3951 Performance Improvements 2026-06-16 11:37:47 +02:00
johannesbot b6d7f2d0af time measurement 2026-06-15 11:23:37 +02:00
johannesbot b0692a6527 time measurement 2026-06-15 11:23:20 +02:00
johannesbot 216771f709 merged ln metadata into manga mover
Build and Deploy / build (push) Successful in 59s
Build and Deploy / deploy (push) Successful in 24s
2026-06-14 10:47:47 +02:00
johannesbot 8a44b85a48 cleanup
Build and Deploy / build (push) Successful in 23s
Build and Deploy / deploy (push) Successful in 41s
Build Release / build (push) Successful in 16s
2026-06-11 21:31:20 +02:00
32 changed files with 4472 additions and 1006 deletions
+28
View File
@@ -0,0 +1,28 @@
# Shared
KAVITA_URL=http://192.168.1.100:5000
KAVITA_API_KEY=your-api-key-here
LANGUAGE=en
TZ=Europe/Berlin
# Manga container (manga-mover-and-metadata-collector)
HOST_SUWAYOMI_PATH=/path/to/suwayomi/downloads
HOST_KAVITA_PATH=/path/to/kavita/library
HOST_MANGA_CONFIG_PATH=/path/to/manga-config
MANGA_WEB_PORT=8080
SETTLE_SECONDS=600
DELETE_SOURCE=true
# Periodic updaters (volume/cover + global person sync) run together on
# this cron. Sundays 10:00. Person updater also covers LN libraries.
UPDATER_ENABLED=true
UPDATER_SCHEDULE=0 10 * * 0
COVER_CACHE_PATH=/config/covers
PERF_PATH=/config/perf_stats.json
VOLUME_PERF_PATH=/config/volume_perf_stats.json
PERSON_PERF_PATH=/config/person_perf_stats.json
# Light-novel container (kavita-lightnovel-metadata-fetcher)
HOST_LN_CONFIG_PATH=/path/to/ln-config
LN_WEB_PORT=8081
LN_LIBRARY_IDS=3,5
LN_UPDATER_ENABLED=true
+17 -7
View File
@@ -5,6 +5,11 @@ on:
branches:
- master
env:
REGISTRY: gitea.johannesbot.de/johannesbot
MANGA_IMAGE: manga-mover-and-metadata-collector
LN_IMAGE: kavita-lightnovel-metadata-fetcher
jobs:
build:
runs-on: ubuntu-latest
@@ -17,11 +22,16 @@ jobs:
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
- name: Build Image
run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest .
- name: Build Manga Image
run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest .
- name: Push Image
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
- name: Build LN Image
run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest .
- name: Push Images
run: |
docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest
docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest
deploy:
needs: build
@@ -37,7 +47,7 @@ jobs:
username: ${{ secrets.SSH_USER }}
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
script: mkdir -p /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
- name: Copy docker-compose via SCP
uses: appleboy/scp-action@v0.1.7
@@ -47,7 +57,7 @@ jobs:
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
source: "docker-compose.prod.yml"
target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
target: "/home/${{ secrets.SSH_USER }}/kavita-metadata-collector"
- name: Deploy via SSH
uses: appleboy/ssh-action@v1.0.3
@@ -57,7 +67,7 @@ jobs:
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
script: |
cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
cd /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
mv docker-compose.prod.yml docker-compose.yml
echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
sudo docker compose pull
+14 -4
View File
@@ -5,6 +5,11 @@ on:
tags:
- 'v*'
env:
REGISTRY: gitea.johannesbot.de/johannesbot
MANGA_IMAGE: manga-mover-and-metadata-collector
LN_IMAGE: kavita-lightnovel-metadata-fetcher
jobs:
build:
runs-on: ubuntu-latest
@@ -21,8 +26,13 @@ jobs:
id: tag
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
- name: Build Image
run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }} .
- name: Build Manga Image
run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
- name: Push Image
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:${{ steps.tag.outputs.VERSION }}
- name: Build LN Image
run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
- name: Push Images
run: |
docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }}
docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }}
+19 -7
View File
@@ -1,8 +1,18 @@
# One Dockerfile, two images: the build arg APP selects the entry point.
#
# docker build --build-arg APP=manga -t .../manga-mover-and-metadata-collector .
# docker build --build-arg APP=ln -t .../kavita-lightnovel-metadata-fetcher .
#
# Both variants share src/; the variant-specific code lives in
# src/manga/ resp. src/ln/ and is selected by the entry point.
FROM python:3.12-slim
ARG APP=manga
WORKDIR /app
# System deps for Pillow (image dimensions); kept minimal.
# System deps for Pillow (image dimensions, manga variant); kept minimal.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libjpeg62-turbo \
@@ -11,15 +21,17 @@ RUN apt-get update \
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/
COPY main.py /app/main.py
COPY src/ /app/src/
COPY main_manga.py main_ln.py /app/
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
PYTHONDONTWRITEBYTECODE=1 \
APP_VARIANT=${APP}
# Mount points used by main.py defaults
VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
# /config is used by both variants; the manga variant additionally mounts
# /mnt/suwayomi and /mnt/kavita (see docker-compose.prod.yml).
VOLUME ["/config"]
EXPOSE 8080
CMD ["python", "/app/main.py"]
CMD python /app/main_${APP_VARIANT}.py
+41 -11
View File
@@ -1,5 +1,8 @@
services:
manga-mover:
# ------------------------------------------------------------------
# Manga: Suwayomi -> Kavita mover + metadata enrichment
# ------------------------------------------------------------------
manga-mover-and-metadata-collector:
image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
container_name: manga-mover-and-metadata-collector
restart: unless-stopped
@@ -9,19 +12,46 @@ services:
LANGUAGE: "${LANGUAGE:-en}"
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
WEB_PORT: "${WEB_PORT:-8080}"
# Volume/cover back-fill updater
MATCH_PATH: "/config/matches.json"
# Periodic updaters (volume/cover back-fill + global person sync) run
# together on this cron. "0 10 * * 0" = Sundays 10:00 (local time, see TZ)
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
# (local time, see TZ)
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
UPDATER_LOG: "${UPDATER_LOG:-/config/volume_updater.log}"
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
UPDATER_LOG: "/config/volume_updater.log"
# Persistent cover cache (empty = temp dir, deleted on container stop)
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
# Per-step timing stats (viewable at /perf, /perf/volume, /perf/person)
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
VOLUME_PERF_PATH: "${VOLUME_PERF_PATH:-/config/volume_perf_stats.json}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
# Timezone for the cron schedule — without this 10:00 means 10:00 UTC
TZ: "${TZ:-Europe/Berlin}"
ports:
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
- "${MANGA_WEB_PORT:-8080}:8080"
volumes:
- "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
- "${HOST_KAVITA_PATH}:/mnt/kavita"
- "${HOST_CONFIG_PATH}:/config"
- "${HOST_MANGA_CONFIG_PATH}:/config"
# ------------------------------------------------------------------
# Light novels: Kavita metadata fetcher (HTTP only, no file mover)
# ------------------------------------------------------------------
kavita-lightnovel-metadata-fetcher:
image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest
container_name: kavita-lightnovel-metadata-fetcher
restart: unless-stopped
environment:
KAVITA_URL: "${KAVITA_URL}"
KAVITA_API_KEY: "${KAVITA_API_KEY}"
LIBRARY_IDS: "${LN_LIBRARY_IDS}"
LANGUAGE: "${LANGUAGE:-en}"
MATCH_PATH: "/config/matches.json"
# Global person sync on cron (same default cadence as the manga side)
UPDATER_ENABLED: "${LN_UPDATER_ENABLED:-true}"
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 10 * * 0}"
PERSON_PERF_PATH: "${PERSON_PERF_PATH:-/config/person_perf_stats.json}"
TZ: "${TZ:-Europe/Berlin}"
ports:
- "${LN_WEB_PORT:-8081}:8080"
volumes:
- "${HOST_LN_CONFIG_PATH}:/config"
-155
View File
@@ -1,155 +0,0 @@
"""
main.py
=======
Container entry point. Watches the mounted Suwayomi download directory
and, after a quiet period, triggers SuwayomiMover (which also runs the
Kavita person sync for every processed series).
Mount points (Docker)
---------------------
/mnt/suwayomi -> Suwayomi downloads (read/write, sources deleted)
/mnt/kavita -> Kavita library (read/write, CBZs written here)
Environment variables
---------------------
Required:
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
KAVITA_API_KEY Kavita API key (Settings → User → API key)
Optional:
SUWAYOMI_PATH default /mnt/suwayomi
KAVITA_PATH default /mnt/kavita
LANGUAGE default en
SETTLE_SECONDS default 600 (10-minute quiet window)
REQUEST_TIMEOUT default 30
DELETE_SOURCE default true (delete source folders after pack)
MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (volume/cover back-fill cron)
UPDATER_SCHEDULE cron expression for the updater scans,
default "0 19 * * 1,4" = 19:00 every Mon + Thu
(local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log
"""
from __future__ import annotations
import os
import signal
import sys
from pathlib import Path
# Make src/ importable when running as `python main.py`.
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
from src.SuwayomiMover import SuwayomiMover # noqa: E402
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
from src.MatchesCache import MatchesCache # noqa: E402
from src.MatchesWebApp import MatchesWebApp # noqa: E402
from src.KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
def _env_str(name: str, default: "str | None" = None,
required: bool = False) -> "str | None":
value = os.environ.get(name, default)
if required and not value:
print(f"[main] missing required env var: {name}", flush=True)
sys.exit(2)
return value
def _env_int(name: str, default: int) -> int:
raw = os.environ.get(name)
if raw is None or raw == "":
return default
try:
return int(raw)
except ValueError:
print(f"[main] {name}={raw!r} is not a valid integer; "
f"falling back to {default}", flush=True)
return default
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def main() -> int:
suwayomi_path = _env_str("SUWAYOMI_PATH", r"/mnt/suwayomi")
kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita")
kavita_url = _env_str("KAVITA_URL", "http://kavita:5000")
kavita_api_key = _env_str("KAVITA_API_KEY", "")
language = _env_str("LANGUAGE", "en") or "en"
settle_seconds = _env_int("SETTLE_SECONDS", 600)
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
delete_source = _env_bool("DELETE_SOURCE", True)
match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True)
print(f"[main] kavita url= {kavita_url}", flush=True)
print(f"[main] settle = {settle_seconds}s", flush=True)
print(f"[main] language = {language}", flush=True)
print(f"[main] delete src= {delete_source}", flush=True)
print(f"[main] match path= {match_path}", flush=True)
print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path)
mover = SuwayomiMover(
suwayomi_path, kavita_path,
kavita_base_url=kavita_url,
kavita_api_key=kavita_api_key,
language=language,
request_timeout=request_timeout,
delete_source=delete_source,
matches_cache=matches_cache,
)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
web_app.start()
if updater_enabled:
try:
updater = KavitaVolumeCoverUpdater(
kavita_path,
matches_cache=matches_cache,
language=language,
request_timeout=request_timeout,
log_path=updater_log,
schedule=updater_schedule,
)
updater.start()
except ValueError as exc:
# Invalid cron expression — keep the service up, just without
# the updater, and make the config error obvious in the logs.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"volume/cover updater DISABLED", flush=True)
# def shutdown(signum, _frame):
# print(f"[main] received signal {signum}", flush=True)
# watcher.stop()
#
# signal.signal(signal.SIGTERM, shutdown)
# signal.signal(signal.SIGINT, shutdown)
#
# watcher.start()
# watcher.wait() # blocks until stop() is called via a signal
web_app.wait() # keep process alive while the watcher is disabled
return 0
if __name__ == "__main__":
sys.exit(main())
+162
View File
@@ -0,0 +1,162 @@
"""
main_ln.py
==========
Container entry point for the **light-novel** variant (Kavita metadata
fetcher). The manga variant has its own entry point (main_manga.py);
both share the modules in src/ and add their variant-specific code from
src/ln/ resp. src/manga/.
Reads configuration from environment variables, starts the orchestrator
and exposes the Flask WebApp on WEB_HOST:WEB_PORT. Everything happens
through HTTP — there is no folder watcher and no file mover (Kavita is
the source of truth for the library content; this service only writes
metadata back to it).
Environment variables
---------------------
Required:
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
KAVITA_API_KEY Kavita API key (Settings -> User -> API key)
Optional:
LIBRARY_IDS comma-separated default library ids (e.g. "3,5").
Empty = user picks in the WebUI each time.
LANGUAGE default "en"
REQUEST_TIMEOUT default 30
MATCH_PATH default /config/matches.json
WEB_PORT default 8080
WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (run the person updater on cron)
UPDATER_SCHEDULE cron expression for the person updater,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
# Shared code in src/, LN-specific code in src/ln/. Modules are imported
# by their plain names so src-internal imports resolve to the same module
# objects (a `src.X` import would load everything twice).
_BASE = Path(__file__).resolve().parent
sys.path.insert(0, str(_BASE / "src"))
sys.path.insert(0, str(_BASE / "src" / "ln"))
from MatchesCache import MatchesCache # noqa: E402
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def _env_str(name: str, default: "str | None" = None,
required: bool = False) -> "str | None":
value = os.environ.get(name, default)
if required and not value:
print(f"[main] missing required env var: {name}", flush=True)
sys.exit(2)
return value
def _env_int(name: str, default: int) -> int:
raw = os.environ.get(name)
if raw is None or raw == "":
return default
try:
return int(raw)
except ValueError:
print(f"[main] {name}={raw!r} is not a valid integer; "
f"falling back to {default}", flush=True)
return default
def _env_int_list(name: str) -> list[int]:
raw = os.environ.get(name) or ""
out: list[int] = []
for part in raw.split(","):
part = part.strip()
if not part:
continue
try:
out.append(int(part))
except ValueError:
print(f"[main] {name}: ignoring non-integer value {part!r}",
flush=True)
return out
def main() -> int:
kavita_url = _env_str("KAVITA_URL", required=True)
kavita_api_key = _env_str("KAVITA_API_KEY", required=True)
language = _env_str("LANGUAGE", "en") or "en"
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
library_ids = _env_int_list("LIBRARY_IDS")
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] kavita url = {kavita_url}", flush=True)
print(f"[main] language = {language}", flush=True)
print(f"[main] match path = {match_path}", flush=True)
print(f"[main] libraries = {library_ids or '(picked in WebUI)'}",
flush=True)
print(f"[main] web = {web_host}:{web_port}", flush=True)
cache = MatchesCache(match_path)
person_perf = PerfStats(person_perf_path)
orchestrator = LightNovelOrchestrator(
kavita_url=kavita_url,
kavita_api_key=kavita_api_key,
matches_cache=cache,
language=language,
request_timeout=request_timeout,
)
app = MatchesWebApp(
cache, orchestrator=orchestrator,
default_library_ids=library_ids,
person_perf=person_perf,
host=web_host, port=web_port,
)
app.start()
if updater_enabled:
try:
CronRunner(
updater_schedule,
lambda: orchestrator.sync_persons(trigger="cron",
perf=person_perf),
name="person-updater").start()
except ValueError as exc:
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"scheduled person sync DISABLED", flush=True)
app.wait()
return 0
if __name__ == "__main__":
sys.exit(main())
+199
View File
@@ -0,0 +1,199 @@
"""
main_manga.py
=============
Container entry point for the **manga** variant (Suwayomi -> Kavita mover
plus metadata enrichment). The light-novel variant has its own entry
point (main_ln.py); both share the modules in src/ and add their
variant-specific code from src/manga/ resp. src/ln/.
Mount points (Docker)
---------------------
/mnt/suwayomi -> Suwayomi downloads (read/write, sources deleted)
/mnt/kavita -> Kavita library (read/write, CBZs written here)
Environment variables
---------------------
Required:
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
KAVITA_API_KEY Kavita API key (Settings -> User -> API key)
Optional:
SUWAYOMI_PATH default /mnt/suwayomi
KAVITA_PATH default /mnt/kavita
LANGUAGE default en
SETTLE_SECONDS default 600 (10-minute quiet window)
REQUEST_TIMEOUT default 30
DELETE_SOURCE default true (delete source folders after pack)
MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0
UPDATER_ENABLED default true (run volume/cover + person updaters on cron)
UPDATER_SCHEDULE cron expression for the periodic updaters,
default "0 10 * * 0" = Sundays 10:00
(local time — set TZ inside the container!)
UPDATER_LOG default /config/volume_updater.log
COVER_CACHE_PATH directory for the persistent cover cache;
empty (default) = temporary cache, deleted on exit
PERF_PATH JSON file for per-step move timing stats.
Default /config/perf_stats.json (empty disables it)
VOLUME_PERF_PATH JSON file for volume/cover updater timing.
Default /config/volume_perf_stats.json
PERSON_PERF_PATH JSON file for person updater timing.
Default /config/person_perf_stats.json
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
# Shared code in src/, manga-specific code in src/manga/. Modules are
# imported by their plain names so src-internal imports resolve to the
# same module objects (a `src.X` import would load everything twice).
_BASE = Path(__file__).resolve().parent
sys.path.insert(0, str(_BASE / "src"))
sys.path.insert(0, str(_BASE / "src" / "manga"))
from SuwayomiMover import SuwayomiMover # noqa: E402
from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
from MatchesCache import MatchesCache # noqa: E402
from MatchesWebApp import MatchesWebApp # noqa: E402
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
from KavitaClient import KavitaClient # noqa: E402
from KavitaPersonUpdater import KavitaPersonUpdater # noqa: E402
from PerfStats import PerfStats # noqa: E402
from CronRunner import CronRunner # noqa: E402
def _env_str(name: str, default: "str | None" = None,
required: bool = False) -> "str | None":
value = os.environ.get(name, default)
if required and not value:
print(f"[main] missing required env var: {name}", flush=True)
sys.exit(2)
return value
def _env_int(name: str, default: int) -> int:
raw = os.environ.get(name)
if raw is None or raw == "":
return default
try:
return int(raw)
except ValueError:
print(f"[main] {name}={raw!r} is not a valid integer; "
f"falling back to {default}", flush=True)
return default
def _env_bool(name: str, default: bool) -> bool:
raw = os.environ.get(name)
if raw is None:
return default
return raw.strip().lower() in ("1", "true", "yes", "y", "on")
def main() -> int:
suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi")
kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita")
kavita_url = _env_str("KAVITA_URL", "http://kavita:5000")
kavita_api_key = _env_str("KAVITA_API_KEY", "")
language = _env_str("LANGUAGE", "en") or "en"
settle_seconds = _env_int("SETTLE_SECONDS", 600)
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
delete_source = _env_bool("DELETE_SOURCE", True)
match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
updater_enabled = _env_bool("UPDATER_ENABLED", True)
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 10 * * 0")
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
volume_perf_path = _env_str("VOLUME_PERF_PATH",
"/config/volume_perf_stats.json") or None
person_perf_path = _env_str("PERSON_PERF_PATH",
"/config/person_perf_stats.json") or None
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True)
print(f"[main] kavita url= {kavita_url}", flush=True)
print(f"[main] settle = {settle_seconds}s", flush=True)
print(f"[main] language = {language}", flush=True)
print(f"[main] delete src= {delete_source}", flush=True)
print(f"[main] match path= {match_path}", flush=True)
print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path)
perf_move = PerfStats(perf_path)
perf_volume = PerfStats(volume_perf_path)
perf_person = PerfStats(person_perf_path)
mover = SuwayomiMover(
suwayomi_path, kavita_path,
language=language,
request_timeout=request_timeout,
delete_source=delete_source,
matches_cache=matches_cache,
cover_cache_dir=cover_cache_path,
perf_stats=perf_move,
)
# Standalone, global, id-based person updater (manga + LN libraries).
person_updater = None
if kavita_api_key:
kavita_client = KavitaClient(kavita_url, kavita_api_key,
request_timeout=request_timeout)
person_updater = KavitaPersonUpdater(kavita_client)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
web_app = MatchesWebApp(
matches_cache, mover=mover,
person_updater=person_updater, person_trigger="web",
perf_stats={"move": perf_move, "volume": perf_volume,
"person": perf_person},
host=web_host, port=web_port)
web_app.start()
if updater_enabled:
updater = KavitaVolumeCoverUpdater(
kavita_path,
matches_cache=matches_cache,
language=language,
request_timeout=request_timeout,
log_path=updater_log,
cover_cache_dir=cover_cache_path,
perf_stats=perf_volume,
)
def _scheduled_job():
updater.update_all()
if person_updater is not None:
person_updater.update_all_persons(trigger="cron",
perf=perf_person)
try:
CronRunner(updater_schedule, _scheduled_job,
name="updaters").start()
except ValueError as exc:
# Invalid cron expression — keep the service up, just without
# the scheduled updaters, and surface the config error.
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
f"scheduled updaters DISABLED", flush=True)
# watcher.start()
# watcher.wait() # blocks until stop() is called via a signal
web_app.wait() # keep process alive while the watcher is disabled
return 0
if __name__ == "__main__":
sys.exit(main())
+1
View File
@@ -2,3 +2,4 @@ requests>=2.31
Pillow>=10.0
watchdog>=4.0
Flask>=3.0
python-dotenv>=1.0
+32 -17
View File
@@ -32,27 +32,35 @@ Dependencies
from __future__ import annotations
import datetime
import difflib
import time
import requests
from MediaResolver import MediaResolver
from TextUtils import best_similarity
# --------------------------------------------------------------------------
# GraphQL query strings
# --------------------------------------------------------------------------
_SEARCH_MANGA = """
# AniList models both manga and light novels as type MANGA; the format
# clause decides which of the two a search returns. The placeholder is
# substituted at construction time (see `media_format`).
_SEARCH_MANGA_TEMPLATE = """
query ($search: String) {
Page(page: 1, perPage: 5) {
media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
id title { romaji english native } siteUrl
}
}
}
"""
_FORMAT_CLAUSES = {
"manga": "format_not_in: [NOVEL]",
"novel": "format_in: [NOVEL]",
}
_MANGA_STATS = """
query ($id: Int) {
Media(id: $id, type: MANGA) {
@@ -131,10 +139,24 @@ class AniListResolver(MediaResolver):
cls._instance._initialized = False
return cls._instance
def __init__(self, *, request_timeout: int = 30):
def __init__(self, *, request_timeout: int = 30,
media_format: str = "manga"):
"""
media_format : "manga" (excludes novels) or "novel" (novels only).
Only the FIRST construction in the process sets it
(singleton); construct the resolver with the correct
format in the entry point / orchestrator.
"""
if self._initialized:
return
if media_format not in _FORMAT_CLAUSES:
raise ValueError(f"media_format must be one of "
f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
self.media_format = media_format
self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
"__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
self.request_timeout = request_timeout
self._session = requests.Session()
@@ -178,7 +200,7 @@ class AniListResolver(MediaResolver):
return self._id_cache[key]
try:
data = self._gql(_SEARCH_MANGA, {"search": title})
data = self._gql(self._search_query, {"search": title})
results = ((data.get("data") or {})
.get("Page", {})
.get("media") or [])
@@ -469,18 +491,11 @@ class AniListResolver(MediaResolver):
def _score_title(query: str, entry: dict) -> float:
"""Returns the best title-similarity score for an AniList media entry."""
title_obj = entry.get("title") or {}
candidates = [
title_obj.get("romaji") or "",
title_obj.get("english") or "",
title_obj.get("native") or "",
]
best = 0.0
q = query.lower()
for t in candidates:
if t:
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
best = max(best, ratio)
return best
return best_similarity(query, (
title_obj.get("romaji"),
title_obj.get("english"),
title_obj.get("native"),
))
# --------------------------------------------------------------------------
+148
View File
@@ -0,0 +1,148 @@
"""
cover_cache.py
==============
Disk-backed cache for downloaded cover images, keyed by URL.
Why
---
The mover packs every chapter of a series individually, and each chapter
needs a cover image. Without caching, the same multi-megabyte cover is
downloaded once per chapter (20-chapter volume = 20 identical downloads).
This cache turns that into a single download per unique URL.
Persistence
-----------
* ``cache_dir`` given -> covers persist across runs in that directory.
* ``cache_dir`` omitted -> a temporary directory is used and removed
automatically when the process exits.
Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
from the URL / Content-Type at download time so it can be reused when
writing the cover into a chapter folder.
Thread safety: downloads are serialised per cache instance, so concurrent
mover / updater threads never fetch the same URL twice.
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import atexit
import hashlib
import shutil
import tempfile
import threading
from pathlib import Path
import requests
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
def _guess_extension(url: str, content_type: str) -> str:
"""Derives an image file extension from a URL or HTTP Content-Type."""
url_ext = Path(url.split("?")[0]).suffix.lower()
if url_ext in _IMAGE_EXTS:
return url_ext
ct = (content_type or "").lower()
if "png" in ct: return ".png"
if "webp" in ct: return ".webp"
if "gif" in ct: return ".gif"
return ".jpg"
class CoverCache:
"""
URL-keyed image cache on disk.
Parameters
----------
cache_dir : Directory for cached covers. None -> temporary
directory, deleted automatically at process exit.
session : Optional shared requests.Session for downloads.
request_timeout : HTTP timeout in seconds.
"""
def __init__(self, cache_dir=None, *,
session: "requests.Session | None" = None,
request_timeout: int = 30):
self._persistent = cache_dir is not None
if self._persistent:
self._dir = Path(cache_dir)
self._dir.mkdir(parents=True, exist_ok=True)
else:
self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
atexit.register(self.close)
self._session = session or requests.Session()
self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
self._timeout = request_timeout
self._lock = threading.Lock()
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get(self, url: str) -> "tuple[bytes, str] | None":
"""
Returns ``(image_bytes, extension)`` for the URL — from cache when
present, downloading (and caching) otherwise. Returns None when
the URL is empty or the download fails.
"""
if not url:
return None
with self._lock:
cached = self._find_cached(url)
if cached is not None:
try:
return cached.read_bytes(), cached.suffix
except OSError:
pass # unreadable cache file -> re-download
return self._download(url)
def clear(self) -> None:
"""Removes all cached covers (the directory itself is kept)."""
with self._lock:
for f in self._dir.glob("*"):
if f.is_file():
f.unlink(missing_ok=True)
def close(self) -> None:
"""Deletes the cache directory when it is non-persistent."""
if not self._persistent:
shutil.rmtree(self._dir, ignore_errors=True)
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
@staticmethod
def _key(url: str) -> str:
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
def _find_cached(self, url: str) -> "Path | None":
matches = list(self._dir.glob(self._key(url) + ".*"))
return matches[0] if matches else None
def _download(self, url: str) -> "tuple[bytes, str] | None":
try:
resp = self._session.get(url, timeout=self._timeout)
resp.raise_for_status()
except requests.RequestException:
return None
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
target = self._dir / f"{self._key(url)}{ext}"
try:
tmp = target.with_suffix(target.suffix + ".tmp")
tmp.write_bytes(resp.content)
tmp.replace(target)
except OSError:
pass # cache write failure is non-fatal — still return the bytes
return resp.content, ext
+87
View File
@@ -0,0 +1,87 @@
"""
cron_runner.py
==============
Runs a single callable on a cron schedule on a background thread.
Decouples *what* runs from *when*: both the manga container (volume/cover
updater + person updater) and the LN container (person updater) schedule
their work through this one helper, using a shared ``CronSchedule`` for the
``next_after`` arithmetic.
Usage::
runner = CronRunner("0 10 * * 0", job=my_callable) # Sundays 10:00
runner.start()
...
runner.stop()
When the schedule string is invalid, the CronSchedule constructor raises
ValueError — the caller decides whether to disable the runner or fall back.
The schedule is evaluated in local time (set TZ inside the container).
"""
from __future__ import annotations
import threading
from datetime import datetime
from CronSchedule import CronSchedule
def _now() -> str:
return datetime.now().isoformat(timespec="seconds")
class CronRunner:
"""
Fires ``job()`` whenever the cron ``schedule`` elapses.
Parameters
----------
schedule : 5-field cron expression (see CronSchedule).
job : Zero-arg callable invoked on each scheduled tick. Exceptions
are caught and logged so a failing run does not kill the loop.
name : Thread name (for logs).
"""
def __init__(self, schedule: str, job, *, name: str = "CronRunner"):
self._cron = CronSchedule(schedule)
self._job = job
self._name = name
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
def start(self) -> None:
"""Starts the scheduling thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name=self._name, daemon=True)
self._thread.start()
print(f"[{_now()}] [{self._name}] scheduled on "
f"cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Signals the loop to stop (a job already running finishes first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [{self._name}] next run: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
self._job()
except Exception as exc:
print(f"[{_now()}] [{self._name}] job ERROR: {exc}", flush=True)
+297
View File
@@ -0,0 +1,297 @@
"""
kavita_client.py
================
Thin HTTP client for the Kavita server REST API (v0.9.x).
Authenticates via the ``x-api-key`` header. All series / library /
collection / metadata reads and writes used by the light-novel updater
go through this single client so request shaping (paging, content types,
timeouts, retries) is consistent.
The class is intentionally state-light: no caching layer, just one
``requests.Session``. Higher-level diff / update logic lives in
KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync.
"""
from __future__ import annotations
import base64
from typing import Iterable
import requests
class KavitaClient:
def __init__(self, base_url: str, api_key: str, *,
request_timeout: int = 30):
self._base = base_url.rstrip("/")
self._timeout = request_timeout
# API session: sends + receives JSON.
self._session = requests.Session()
self._session.headers.update({
"x-api-key": api_key,
"Accept": "application/json",
"Content-Type": "application/json",
})
# Plain session for downloading external images (covers). Must NOT
# carry the API headers — some CDNs refuse to return image bytes
# when the client sends Accept: application/json.
self._image_session = requests.Session()
self._image_session.headers.update({
"User-Agent": "KavitaLightNovelUpdater/1.0",
})
# ------------------------------------------------------------------
# Libraries
# ------------------------------------------------------------------
def list_libraries(self) -> list[dict]:
"""Returns all libraries the authenticated user can access."""
r = self._session.get(f"{self._base}/api/Library/libraries",
timeout=self._timeout)
r.raise_for_status()
return r.json() or []
# ------------------------------------------------------------------
# Series
# ------------------------------------------------------------------
def list_series_in_library(self, library_id: int, *,
page_size: int = 200) -> list[dict]:
"""
Returns all SeriesDto entries in the given library.
Uses POST /api/Series/all-v2 with a FilterV2 that scopes by
library id. Pages through until an empty page is returned.
"""
results: list[dict] = []
page = 1
while True:
body = {
"statements": [
{
"comparison": 0, # Equal
"field": 19, # Libraries field id (Kavita v0.9.x)
"value": str(library_id),
}
],
"combination": 1, # And
"sortOptions": {"isAscending": True, "sortField": 1},
"limitTo": 0,
}
r = self._session.post(
f"{self._base}/api/Series/all-v2",
params={"PageNumber": page, "PageSize": page_size},
json=body, timeout=self._timeout)
r.raise_for_status()
chunk = r.json() or []
if not chunk:
break
results.extend(chunk)
if len(chunk) < page_size:
break
page += 1
return results
def get_series(self, series_id: int) -> dict:
"""Returns the SeriesDto for the given series id."""
r = self._session.get(f"{self._base}/api/Series/{series_id}",
timeout=self._timeout)
r.raise_for_status()
return r.json() or {}
def update_series(self, series: dict) -> None:
"""Updates the Series-level data (name, sortName, malId, …)."""
r = self._session.post(f"{self._base}/api/Series/update",
json=series, timeout=self._timeout)
r.raise_for_status()
# ------------------------------------------------------------------
# Series metadata
# ------------------------------------------------------------------
def get_series_metadata(self, series_id: int) -> dict:
"""Returns the SeriesMetadataDto for a series."""
r = self._session.get(
f"{self._base}/api/Series/metadata",
params={"seriesId": series_id}, timeout=self._timeout)
r.raise_for_status()
return r.json() or {}
def update_series_metadata(self, metadata: dict) -> None:
"""
Writes a SeriesMetadataDto back to Kavita.
Kavita expects the payload wrapped: {seriesMetadata: {...}}.
"""
r = self._session.post(
f"{self._base}/api/Series/metadata",
json={"seriesMetadata": metadata},
timeout=self._timeout)
r.raise_for_status()
# ------------------------------------------------------------------
# Related series
# ------------------------------------------------------------------
def get_related(self, series_id: int) -> dict:
"""Returns all related series grouped by relation type."""
r = self._session.get(
f"{self._base}/api/Series/all-related",
params={"seriesId": series_id}, timeout=self._timeout)
r.raise_for_status()
return r.json() or {}
def update_related(self, payload: dict) -> None:
"""
Sets the related-series relationships for a series.
Payload shape (UpdateRelatedSeriesDto):
{seriesId, prequels, sequels, sideStories, spinOffs,
adaptations, characters, contains, others,
alternativeSettings, alternativeVersions, doujinshis,
editions, annuals}
Each *_ids list contains target series ids (ints).
"""
r = self._session.post(
f"{self._base}/api/Series/update-related",
json=payload, timeout=self._timeout)
r.raise_for_status()
# ------------------------------------------------------------------
# Collections
# ------------------------------------------------------------------
def list_collections(self) -> list[dict]:
"""Returns all collection tags visible to the authenticated user."""
r = self._session.get(
f"{self._base}/api/Collection",
params={"ownedOnly": "false", "sortByLastModified": "false"},
timeout=self._timeout)
r.raise_for_status()
return r.json() or []
def add_series_to_collection(self, *, collection_id: int,
title: str,
series_ids: Iterable[int]) -> dict:
"""
Adds (or creates) a collection and attaches series to it.
Pass collection_id=0 to create a new collection named `title`.
For an existing collection set collection_id to its id (title is
still required by the API but acts as no-op when the id matches).
"""
body = {
"collectionTagId": int(collection_id),
"collectionTagTitle": title,
"seriesIds": [int(s) for s in series_ids],
}
r = self._session.post(
f"{self._base}/api/Collection/update-for-series",
json=body, timeout=self._timeout)
r.raise_for_status()
try:
return r.json() or {}
except ValueError:
return {}
# ------------------------------------------------------------------
# Persons
# ------------------------------------------------------------------
def search_persons(self, name: str) -> list[dict]:
"""Returns PersonDto entries matching `name` (Kavita's own search)."""
r = self._session.get(
f"{self._base}/api/Person/search",
params={"queryString": name}, timeout=self._timeout)
r.raise_for_status()
return r.json() or []
def list_all_persons(self, *, page_size: int = 200) -> list[dict]:
"""
Returns every PersonDto in the instance.
Pages through POST /api/Person/all (the browse endpoint) with an
empty filter until an empty page is returned — same paging pattern
as list_series_in_library.
"""
results: list[dict] = []
page = 1
while True:
r = self._session.post(
f"{self._base}/api/Person/all",
params={"PageNumber": page, "PageSize": page_size},
json={}, timeout=self._timeout)
r.raise_for_status()
chunk = r.json() or []
if not chunk:
break
results.extend(chunk)
if len(chunk) < page_size:
break
page += 1
return results
def update_person(self, payload: dict) -> None:
"""Writes a person record (malId, aniListId, description, …)."""
r = self._session.post(f"{self._base}/api/Person/update",
json=payload, timeout=self._timeout)
r.raise_for_status()
# ------------------------------------------------------------------
# Cover uploads
# ------------------------------------------------------------------
def upload_series_cover(self, series_id: int, image_url: str, *,
lock: bool = False) -> None:
"""Downloads an external image and uploads it as the series cover."""
self._upload_cover("/api/Upload/series", series_id, image_url, lock)
def upload_person_cover(self, person_id: int, image_url: str, *,
lock: bool = False) -> None:
"""Downloads an external image and uploads it as a person cover."""
self._upload_cover("/api/Upload/person", person_id, image_url, lock)
def _upload_cover(self, endpoint: str, entity_id: int,
image_url: str, lock: bool) -> None:
"""
Shared cover-upload path. Kavita's upload endpoints accept a raw
base64 blob (no ``data:`` prefix) in the ``url`` field — a data
URI or the two-step upload-by-url flow are rejected with HTTP 400
(verified against Kavita 0.9.0.2).
"""
img = self._image_session.get(image_url, timeout=self._timeout)
img.raise_for_status()
b64 = base64.b64encode(img.content).decode()
r = self._session.post(
f"{self._base}{endpoint}",
json={"id": entity_id, "url": b64, "lockCover": lock},
timeout=self._timeout)
if r.status_code >= 400:
# Include the body excerpt — Kavita's upload errors carry the
# actual reason there, not in the status line.
raise requests.HTTPError(
f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
response=r)
# ------------------------------------------------------------------
# Generic GET helper (used by callers that need a response object)
# ------------------------------------------------------------------
def get(self, path: str, params: "dict | None" = None) -> requests.Response:
return self._session.get(f"{self._base}{path}",
params=params, timeout=self._timeout)
def post(self, path: str, *,
json: "dict | list | None" = None,
params: "dict | None" = None) -> requests.Response:
return self._session.post(f"{self._base}{path}",
json=json, params=params,
timeout=self._timeout)
def _short_body(resp: requests.Response, limit: int = 400) -> str:
"""Returns the response body trimmed to `limit` chars for error messages."""
try:
text = resp.text or ""
except Exception:
return "<unreadable response body>"
text = text.strip().replace("\n", " ").replace("\r", " ")
if len(text) > limit:
text = text[:limit] + ""
return text or "<empty body>"
+151 -459
View File
@@ -2,544 +2,236 @@
kavita_person_updater.py
========================
Synchronises Kavita person / character records with MyAnimeList data.
Synchronises Kavita character person-records with MyAnimeList / AniList data.
For every character and staff member that MAL knows about for a given manga
the updater:
1. Searches Kavita for a matching Person record (by name similarity /
alias match, configurable threshold).
2. Sets the MAL ID on the Kavita person if it is not yet linked.
3. Uploads the MAL profile image when the cover is not locked and has
not been set in a previous sync run.
4. Populates the description field when Kavita has none and MAL provides
an 'about' text (requires an extra Jikan request per character; only
performed when update_descriptions=True).
Global, id-based mode
---------------------
Kavita person-records are created with a disambiguated name carrying the
tracker *character* id, e.g. ``Rem (MAL 118737)`` (manga: written into
ComicInfo <Characters>; light novels: written by the metadata builder).
``update_all_persons`` walks **every** person in the Kavita instance, reads
that id from the name, looks the character up on MAL / AniList by id, and
writes back:
* the tracker id into the ``malId`` / ``aniListId`` field (when still empty),
* a description (when the record has none),
* the profile image (when not locked and not already set).
Persons whose name carries no id (authors / staff, which are not
disambiguated) are skipped. A record already linked to a *different*
tracker id than its name says is reported as a conflict and left untouched.
This mode is format-independent (it only does id lookups, never title
searches) so a single pass covers both the manga and light-novel libraries.
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
(`/api/Person/all`, `/api/Person/update`, `/api/Upload/person`).
Kavita API version
------------------
Tested against Kavita 0.9.0.2.
Authentication
--------------
Uses the `x-api-key` header (API key from Kavita user settings).
No JWT login is required.
Relevant endpoints (Kavita 0.9.0.2)
-------------------------------------
GET /api/Person/search find persons by name / alias
POST /api/Person/update write metadata (malId, description, …)
POST /api/Upload/person set cover image (base64 data URI)
POST /api/Upload/upload-by-url download an external URL to temp storage
(used as an alternative upload path)
Cover upload flow
-----------------
The image is downloaded locally, base64-encoded, and sent as a data URI
to POST /api/Upload/person. This is more reliable than the
upload-by-url → upload/person two-step because it avoids Kavita's temp
file handling (which had known issues in 0.8.x 0.9.x, GitHub #3900).
Dependencies
------------
requests -> pip install requests
"""
from __future__ import annotations
import base64
import datetime
import difflib
import re
import requests
from KavitaClient import KavitaClient
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from PerfStats import PerfStats
from TextUtils import paragraphs_to_html, parse_person_tracker_id
class KavitaPersonUpdater:
"""
Syncs Kavita Person records with MyAnimeList data.
Syncs Kavita character person-records with MAL / AniList data, keyed by
the tracker id embedded in each person's name.
Parameters
----------
kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
api_key : Kavita API key (Settings → User → API key)
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
request_timeout : HTTP timeout in seconds for both Kavita and image requests
min_name_score : Minimum difflib similarity ratio (01) required to accept a
Kavita person as a match for a MAL name. Default 0.80.
client : Shared KavitaClient (session, auth, cover uploads).
mal_resolver : Shared MALResolver singleton (created if omitted).
al_resolver : Shared AniListResolver singleton (created if omitted).
"""
def __init__(self, kavita_base_url: str, api_key: str, *,
def __init__(self, client: KavitaClient, *,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None,
request_timeout: int = 30,
min_name_score: float = 0.80):
self._base = kavita_base_url.rstrip("/")
self._timeout = request_timeout
self._min_score = min_name_score
al_resolver: "AniListResolver | None" = None):
self._client = client
self._mal = mal_resolver or MALResolver()
self._al = al_resolver or AniListResolver()
# Session used for Kavita API calls.
self._session = requests.Session()
self._session.headers.update({
"x-api-key": api_key,
"Content-Type": "application/json",
"Accept": "application/json",
})
# Plain session used to download external images (MAL CDN etc.).
# Must NOT carry the Kavita API headers — Accept: application/json
# would prevent MAL CDN from returning the image bytes.
self._image_session = requests.Session()
self._image_session.headers.update({
"User-Agent": "KavitaPersonUpdater/1.0",
})
# Cache: normalised name -> list of PersonDto dicts (best matches first)
self._person_search_cache: dict[str, list[dict]] = {}
# ------------------------------------------------------------------
# Public: combined update
# Public: global person sync
# ------------------------------------------------------------------
def update_for_manga(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
def update_all_persons(self, *,
trigger: str = "cron",
perf: "PerfStats | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Runs a full update pass for both characters and staff of the manga.
MAL is tried first; AniList is used as fallback when MAL returns nothing.
Walks every Kavita person, syncing the ones whose name carries a
tracker character id.
Returns
-------
{
"characters": {"updated": n, "skipped": n, "not_found": n},
"staff": {"updated": n, "skipped": n, "not_found": n},
}
Parameters
----------
trigger : Source that started this run ("cron" | "web" | "ln") —
recorded in the perf-stats run meta.
perf : Optional PerfStats for per-person step timing.
Returns {"trigger", "updated", "skipped", "not_found",
"conflicts", "errors"}.
"""
return {
"characters": self.update_characters(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
"staff": self.update_staff(
mal_manga_id, al_manga_id=al_manga_id,
update_covers=update_covers,
update_descriptions=update_descriptions),
}
perf = perf or PerfStats(None)
run = perf.begin_run(meta={"trigger": trigger})
result: dict = {"trigger": trigger, "updated": 0, "skipped": 0,
"not_found": 0, "conflicts": 0, "errors": []}
# ------------------------------------------------------------------
# Public: character update
# ------------------------------------------------------------------
def update_characters(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList characters for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
try:
persons = self._client.list_all_persons()
except requests.RequestException as exc:
result["errors"].append(f"list persons failed: {exc}")
run.finish()
return result
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_characters_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_characters_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "character", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: staff update
# ------------------------------------------------------------------
def update_staff(self, mal_manga_id: "int | None", *,
al_manga_id: "int | None" = None,
update_covers: bool = True,
update_descriptions: bool = True) -> dict:
"""
Updates Kavita persons that match MAL/AniList staff for the manga.
MAL is tried first; AniList is the fallback when MAL returns nothing.
Returns {"updated": n, "skipped": n, "not_found": n}.
"""
entries = self._mal.get_staff_detailed(mal_manga_id) if mal_manga_id else []
resolver = self._mal
if not entries and al_manga_id:
entries = self._al.get_staff_detailed(al_manga_id)
resolver = self._al
return self._sync_entries(entries, "staff", resolver,
update_covers=update_covers,
update_descriptions=update_descriptions)
# ------------------------------------------------------------------
# Public: cache management
# ------------------------------------------------------------------
def clear_cache(self) -> None:
"""Clears the Kavita person search cache."""
self._person_search_cache.clear()
# ------------------------------------------------------------------
# Internal: main sync loop
# ------------------------------------------------------------------
def _sync_entries(self, entries: list[dict], kind: str, resolver, *,
update_covers: bool,
update_descriptions: bool) -> dict:
result: dict = {"updated": 0, "skipped": 0, "not_found": 0,
"errors": []}
for entry in entries:
name = (entry.get("name") or "").strip()
raw_name = (entry.get("raw_name") or "").strip()
if not name and not raw_name:
for person in persons:
name = (person.get("name") or "").strip()
parsed = parse_person_tracker_id(name)
if not parsed:
result["skipped"] += 1 # author/staff or un-tagged
continue
# Search by the cleaned (XML-safe) name first; if Kavita stores
# the legacy comma form, retry with the raw MAL name.
matches = self._find_kavita_person(name) if name else []
if not matches and raw_name and raw_name != name:
matches = self._find_kavita_person(raw_name)
if not matches:
result["not_found"] += 1
continue
changed = self._apply_mal_data(
matches[0], entry, kind, resolver,
update_cover=update_covers,
update_desc=update_descriptions,
errors=result["errors"])
result["updated" if changed else "skipped"] += 1
source, tracker_id = parsed
item = run.begin_item(name)
ok = True
try:
category = self._apply_to_person(
person, source, tracker_id, item,
update_cover=update_covers,
update_desc=update_descriptions,
errors=result["errors"])
result[category] += 1
ok = category != "conflicts"
except Exception as exc:
result["errors"].append(f"{name}: {exc}")
ok = False
finally:
item.finish(ok=ok)
run.finish()
print(f"[persons] trigger={trigger} updated={result['updated']} "
f"skipped={result['skipped']} not_found={result['not_found']} "
f"conflicts={result['conflicts']} errors={len(result['errors'])}",
flush=True)
return result
# ------------------------------------------------------------------
# Internal: Kavita person search
# Internal: apply tracker data to one person
# ------------------------------------------------------------------
def _find_kavita_person(self, name: str) -> list[dict]:
def _apply_to_person(self, person: dict, source: str, tracker_id: int,
item, *, update_cover: bool, update_desc: bool,
errors: list) -> str:
"""
Searches Kavita for persons matching `name`.
Checks both the main name and any stored aliases.
Returns persons sorted by similarity, filtered by min_name_score.
Results are cached per (normalised) query name.
Applies MAL/AniList character data to one Kavita person.
Returns the result category: "updated" | "skipped" | "not_found"
| "conflicts".
"""
key = name.lower().strip()
if key in self._person_search_cache:
return self._person_search_cache[key]
try:
resp = self._session.get(
f"{self._base}/api/Person/search",
params={"queryString": name},
timeout=self._timeout,
)
resp.raise_for_status()
persons: list[dict] = resp.json() or []
except requests.RequestException:
self._person_search_cache[key] = []
return []
def score(p: dict) -> float:
candidates = [p.get("name") or ""]
candidates += [a for a in (p.get("aliases") or []) if a]
best = 0.0
q = key
for c in candidates:
r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
best = max(best, r)
return best
ranked = sorted(persons, key=score, reverse=True)
filtered = [p for p in ranked if score(p) >= self._min_score]
self._person_search_cache[key] = filtered
return filtered
# ------------------------------------------------------------------
# Internal: apply MAL data to a single Kavita person
# ------------------------------------------------------------------
def _apply_mal_data(self, person: dict, mal_entry: dict, kind: str,
resolver, *,
update_cover: bool, update_desc: bool,
errors: "list | None" = None) -> bool:
"""
Applies tracker data (MAL or AniList) to one Kavita person record.
Fields updated
--------------
- malId : set when the entry carries a MAL ID and it differs
- aniListId : set when the entry carries an AniList ID and it differs
- description: set when empty and the tracker provides a description
- cover image: uploaded when not locked and no prior sync cover exists
Returns True if any change was made. Failures are appended to the
`errors` list (if provided) instead of being silently swallowed.
"""
person_id: "int | None" = person.get("id")
person_id = person.get("id")
if not person_id:
return False
return "skipped"
person_name = person.get("name") or ""
resolver = self._mal if source == "mal" else self._al
id_field = "malId" if source == "mal" else "aniListId"
current = person.get(id_field) or 0
# Tracker IDs — a MAL entry has mal_id set; an AniList entry has al_id.
mal_id: "int | None" = mal_entry.get("mal_id")
al_id: "int | None" = mal_entry.get("al_id")
entity_id = mal_id or al_id # used for resolver detail calls
# The name is authoritative; a record linked to a different id is a
# data conflict — never overwrite it.
if current and current != tracker_id:
errors.append(
f"conflict: '{person.get('name')}' (#{person_id}) has "
f"{id_field}={current} but name says {tracker_id} — skipped")
return "conflicts"
current_mal_id: int = person.get("malId") or 0
current_al_id: int = person.get("aniListId") or 0
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
needs_al_id = bool(al_id and current_al_id != al_id)
with item.measure("detail_fetch"):
details = resolver.get_character_details(tracker_id)
if not details:
return "not_found"
# ------ Lazy description fetch -----------------------------------
description: "str | None" = None
need_id = not current # write id when still missing
description = None
if update_desc and not (person.get("description") or "").strip():
if entity_id:
if kind == "character":
details = resolver.get_character_details(entity_id)
if details:
description = _build_character_description(details) or None
else:
details = resolver.get_person_details(entity_id)
if details:
description = _build_person_description(details) or None
description = _build_character_description(details) or None
need_desc = bool(description)
needs_desc = bool(description)
# ------ Metadata update ------------------------------------------
changed = False
if needs_mal_id or needs_al_id or needs_desc:
payload: dict = {
if need_id or need_desc:
payload = {
"id": person_id,
"name": person_name,
# MUST stay a boolean — the cover image itself is uploaded
# separately via POST /api/Upload/person (below). Putting a
# URL here makes Kavita reject the whole payload with HTTP 400.
"name": person.get("name") or "",
# MUST stay a boolean — the cover is uploaded separately.
"coverImageLocked": bool(person.get("coverImageLocked", False)),
"aliases": person.get("aliases") or [],
"description": description or person.get("description"),
"malId": mal_id if needs_mal_id else (current_mal_id or None),
"aniListId": al_id if needs_al_id else (current_al_id or None),
"malId": tracker_id if source == "mal"
else (person.get("malId") or None),
"aniListId": tracker_id if source == "al"
else (person.get("aniListId") or None),
}
try:
resp = self._session.post(
f"{self._base}/api/Person/update",
json=payload,
timeout=self._timeout,
)
resp.raise_for_status()
with item.measure("person_update"):
self._client.update_person(payload)
changed = True
except requests.RequestException as e:
if errors is not None:
errors.append(
f"Person/update failed for #{person_id} "
f"'{person_name}': {e}")
except requests.RequestException as exc:
errors.append(f"update failed #{person_id} "
f"'{person.get('name')}': {exc}")
# ------ Cover image upload ----------------------------------------
# Upload whenever:
# - caller requested cover updates
# - cover is NOT locked (user did not manually pin it)
# - we have not already uploaded this exact tracker entity's image
# (i.e. the tracked ID differs OR there is no cover yet).
# Cover: upload when not locked and not already set for this id.
if update_cover and not person.get("coverImageLocked"):
image_url = mal_entry.get("image_url")
already_uploaded = (
entity_id is not None
and (current_mal_id == mal_id or current_al_id == al_id)
and bool(person.get("coverImage"))
)
if image_url and not already_uploaded:
if self._upload_cover(person_id, image_url,
person_name=person_name,
errors=errors):
image_url = details.get("image_url")
already = bool(current) and bool(person.get("coverImage"))
if image_url and not already:
try:
with item.measure("cover_upload"):
self._client.upload_person_cover(person_id, image_url)
changed = True
except requests.RequestException as exc:
errors.append(f"cover upload failed #{person_id} "
f"'{person.get('name')}': {exc}")
return changed
# ------------------------------------------------------------------
# Internal: cover upload
# ------------------------------------------------------------------
def _upload_cover(self, person_id: int, image_url: str,
lock: bool = False, *,
person_name: str = "",
errors: "list | None" = None) -> bool:
"""
Uploads a cover image to a Kavita person.
The image is downloaded with the plain (header-less) image session
and posted to `POST /api/Upload/person` as a raw base64 string in
the `url` field.
Notes on protocol quirks discovered against Kavita 0.9.0.2:
- The two-step `upload-by-url` -> `Upload/person` flow returns
"Unable to save cover image to Person" (HTTP 400).
- A `data:image/jpeg;base64,...` data URI is rejected with the
same error.
- Only the raw base64 blob (no prefix) is accepted.
"""
label = (f"#{person_id} '{person_name}'"
if person_name else f"#{person_id}")
# 1) Download the image with a clean session — the Kavita session's
# `Accept: application/json` header makes some CDNs refuse to
# return image bytes.
try:
img_resp = self._image_session.get(image_url,
timeout=self._timeout)
img_resp.raise_for_status()
except requests.RequestException as e:
if errors is not None:
errors.append(
f"image download failed for {label} ({image_url}): {e}")
return False
b64 = base64.b64encode(img_resp.content).decode()
# 2) POST the raw base64 blob.
try:
resp = self._session.post(
f"{self._base}/api/Upload/person",
json={"id": person_id, "url": b64, "lockCover": lock},
timeout=self._timeout,
)
if resp.status_code >= 400:
if errors is not None:
errors.append(
f"Upload/person HTTP {resp.status_code} for {label}: "
f"{_short_body(resp)}")
return False
return True
except requests.RequestException as e:
if errors is not None:
errors.append(
f"Upload/person failed for {label}: {e}")
return False
return "updated" if changed else "skipped"
# --------------------------------------------------------------------------
# Module helpers: description builders
# Module helper: character description builder
# --------------------------------------------------------------------------
def _plain_to_html(text: str) -> str:
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
if not text:
return ""
parts: list[str] = []
for para in re.split(r"\n{2,}", text.strip()):
para = para.strip()
if para:
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
return "".join(parts)
def _format_birthday(birthday: str) -> str:
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
if not birthday:
return ""
try:
dt = datetime.date.fromisoformat(birthday.split("T")[0])
return f"{dt.day} {dt.strftime('%B %Y')}"
except (ValueError, AttributeError):
return ""
def _build_character_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL character.
Builds a Kavita-safe HTML description for a MAL / AniList character.
Top line: "Favorites: N" as a link to the character's MAL page.
Top line: "Favorites: N" linked to the character page (when available).
Remainder: the character's `about` text converted to HTML paragraphs.
"""
parts: list[str] = []
url = details.get("url") or ""
favorites = details.get("favorites")
if url and favorites is not None:
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
parts.append(f'<p><a href="{url}" target="_blank">'
f'Favorites: {favorites:,}</a></p>')
about = (details.get("about") or "").strip()
if about:
parts.append(_plain_to_html(about))
parts.append(paragraphs_to_html(about))
return "<br>".join(parts)
def _build_person_description(details: dict) -> str:
"""
Builds a Kavita-safe HTML description for a MAL person (mangaka / staff).
Renders a summary table (given name, family name, birthday, website,
member favorites) followed by the `about` biography as HTML paragraphs.
"""
_TD = 'style="padding-right:1.5em"'
rows: list[str] = []
given = (details.get("given_name") or "").strip()
family = (details.get("family_name") or "").strip()
birthday = details.get("birthday") or ""
favorites = details.get("favorites")
website = (details.get("website_url") or "").strip()
url = (details.get("url") or "").strip()
if given:
rows.append(f"<tr><td {_TD}>Given name</td><td>{given}</td></tr>")
if family:
rows.append(f"<tr><td {_TD}>Family name</td><td>{family}</td></tr>")
bday_str = _format_birthday(birthday)
if bday_str:
rows.append(f"<tr><td {_TD}>Birthday</td><td>{bday_str}</td></tr>")
if website:
rows.append(
f'<tr><td {_TD}>Website</td>'
f'<td><a href="{website}">{website}</a></td></tr>'
)
if favorites is not None:
fav_cell = (f'<a href="{url}" target="_blank">{favorites:,}</a>' if url
else f"{favorites:,}")
rows.append(
f"<tr><td {_TD}>Member Favorites</td><td>{fav_cell}</td></tr>")
parts: list[str] = []
if rows:
parts.append(f'<table>{"".join(rows)}</table>')
about = (details.get("about") or "").strip()
if about:
parts.append(_plain_to_html(about))
return "<br>".join(parts)
# --------------------------------------------------------------------------
# Module helper
# --------------------------------------------------------------------------
def _short_body(resp: requests.Response, limit: int = 400) -> str:
"""Returns the response body trimmed to `limit` chars for error logging."""
try:
text = resp.text or ""
except Exception:
return "<unreadable response body>"
text = text.strip().replace("\n", " ").replace("\r", " ")
if len(text) > limit:
text = text[:limit] + ""
return text or "<empty body>"
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
import os
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
mal = MALResolver()
mal_id = mal.find_mal_id("よふかしのうた")
print("MAL ID:", mal_id)
if mal_id:
result = updater.update_for_manga(mal_id)
print("Characters:", {k: v for k, v in result["characters"].items()
if k != "errors"})
print("Staff :", {k: v for k, v in result["staff"].items()
if k != "errors"})
# Surface any non-fatal upload / API errors for debugging
for section in ("characters", "staff"):
for err in result[section].get("errors", []):
print(f"[{section}] {err}")
client = KavitaClient(os.environ["KAVITA_URL"],
os.environ["KAVITA_API_KEY"])
updater = KavitaPersonUpdater(client)
report = updater.update_all_persons(trigger="cron")
print(report)
for err in report["errors"]:
print(" ", err)
+17 -15
View File
@@ -30,12 +30,12 @@ Dependencies
from __future__ import annotations
import datetime
import difflib
import time
import requests
from MediaResolver import MediaResolver
from TextUtils import best_similarity
class MALResolver(MediaResolver):
@@ -57,12 +57,21 @@ class MALResolver(MediaResolver):
cls._instance._initialized = False
return cls._instance
def __init__(self, *, request_timeout: int = 30):
def __init__(self, *, request_timeout: int = 30,
search_type: str = "manga"):
"""
search_type : Jikan `type` filter for title searches — "manga" for
the manga container, "lightnovel" for the LN container.
Only the FIRST construction in the process sets it
(singleton); construct the resolver with the correct
type in the entry point / orchestrator.
"""
if self._initialized:
return
self.JIKAN_BASE = "https://api.jikan.moe/v4"
self.request_timeout = request_timeout
self.search_type = search_type
self._session = requests.Session()
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
@@ -106,7 +115,7 @@ class MALResolver(MediaResolver):
try:
data = self._get(f"{self.JIKAN_BASE}/manga",
{"q": title, "limit": 5, "type": "manga"})
{"q": title, "limit": 5, "type": self.search_type})
results = data.get("data") or []
except requests.RequestException:
return None
@@ -404,19 +413,12 @@ def _clean_mal_name(name: str) -> str:
def _score_title(query: str, entry: dict) -> float:
"""Returns the best title-similarity score for a Jikan manga entry."""
candidates = [
entry.get("title") or "",
entry.get("title_english") or "",
entry.get("title_japanese") or "",
entry.get("title"),
entry.get("title_english"),
entry.get("title_japanese"),
]
for alt in (entry.get("titles") or []):
candidates.append(alt.get("title") or "")
best = 0.0
q = query.lower()
for t in candidates:
if t:
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
best = max(best, ratio)
return best
candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
return best_similarity(query, candidates)
# --------------------------------------------------------------------------
+31 -36
View File
@@ -119,26 +119,18 @@ class MangaBakaWorksResolver:
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def get_works(self, series_id: str) -> list[dict]:
def _fetch_all_pages(self, endpoint: str) -> list[dict]:
"""
Returns volume-level works for a series, filtered to those that have
a usable cover image. Results are cached per series.
Pages through the API (limit=50) until the response returns an empty
page, collecting all works before applying the cover filter.
Pages through a MangaBaka list endpoint (limit=50 per page) and
returns all collected `data` items. Network errors end the
pagination early; items fetched so far are returned.
"""
if not series_id:
return []
if series_id in self._cache:
return self._cache[series_id]
all_works: list[dict] = []
items: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/works",
f"{self.api_base_url}/series/{endpoint}",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
@@ -146,13 +138,31 @@ class MangaBakaWorksResolver:
page_data = resp.json().get("data") or []
if not page_data:
break
all_works.extend(page_data)
items.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
if not all_works:
return []
pass
return items
def get_works(self, series_id: str) -> list[dict]:
"""
Returns volume-level works for a series, filtered to those that have
a usable cover image.
Results are cached per series — including empty results, so a series
without works is not re-paginated for every chapter of a move run.
The periodic cover updater calls clear_cache() before each scan, so
works added on MangaBaka later are still picked up there.
"""
if not series_id:
return []
if series_id in self._cache:
return self._cache[series_id]
all_works = self._fetch_all_pages(f"{series_id}/works")
# Discard works that carry no usable cover
works_with_cover = [w for w in all_works if w.get("images")]
@@ -190,25 +200,7 @@ class MangaBakaWorksResolver:
if series_id in self._images_cache:
return self._images_cache[series_id]
raw_items: list[dict] = []
page = 1
try:
while True:
resp = self._session.get(
f"{self.api_base_url}/series/{series_id}/images",
params={"limit": 50, "page": page},
timeout=self.request_timeout,
)
resp.raise_for_status()
page_data = resp.json().get("data") or []
if not page_data:
break
raw_items.extend(page_data)
if len(page_data) < 50:
break
page += 1
except requests.RequestException:
pass
raw_items = self._fetch_all_pages(f"{series_id}/images")
# Group by normalised volume index; collect all languages per volume.
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
@@ -236,6 +228,9 @@ class MangaBakaWorksResolver:
if url:
result[norm] = url
# Cache even an empty result so a series without volume images is not
# re-paginated for every chapter. The periodic cover updater clears
# this cache before each scan, so newly added images are still found.
self._images_cache[series_id] = result
return result
+254
View File
@@ -0,0 +1,254 @@
"""
perf_stats.py
=============
Generic run/step performance profiler with JSON persistence, shared by the
move pipeline and the periodic updaters (volume/cover, persons).
Each run is a tree of *items* (e.g. series -> chapter, or one person) and
every item carries named *step* timings. A run also carries free-form
``meta`` (e.g. the trigger source ``"cron" | "web" | "ln"`` for the person
updater).
Data model (one entry per run, newest first)::
{
"runs": [
{
"runId": "",
"startedAt": 1700000000,
"finishedAt": 1700000123,
"totalSeconds": 123.4,
"meta": {"trigger": "cron"},
"itemCount": 2, # top-level items
"leafCount": 31, # items without children
"stepTotals": {"cover": 41.2, "image_dimensions": 55.8, ...},
"items": [
{"label": "Call of the Night", "totalSeconds": 60.2, "ok": true,
"steps": {"fetch_metadata": 1.2},
"items": [
{"label": "1", "totalSeconds": 11.5, "ok": true,
"steps": {"cover": 1.8, "pack_cbz": 2.9}, "items": []}
]}
]
}
]
}
Usage::
perf = PerfStats(path) # path=None -> disabled
run = perf.begin_run(meta={"trigger": "cron"})
item = run.begin_item("Call of the Night")
with item.measure("fetch_metadata"):
...
chap = item.begin_item("1")
with chap.measure("pack_cbz"):
...
chap.finish()
item.finish() # flushes the run to disk
run.finish()
When ``path`` is None every recorder is a no-op and nothing is written, so
the profiler can be left permanently wired in at negligible cost. The run
is flushed after every top-level item finishes, so a long run is observable
live and survives a crash mid-run.
"""
from __future__ import annotations
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
# Keep the JSON small: only the most recent runs are retained on disk.
_MAX_RUNS = 30
class _StepTimer:
"""
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
"""
def __init__(self, enabled: bool = True):
self.steps: dict[str, float] = {}
self._enabled = enabled
self._t0 = time.monotonic()
@contextmanager
def measure(self, name: str):
"""Context manager timing a named step (accumulates on repeat use)."""
if not self._enabled:
yield
return
start = time.monotonic()
try:
yield
finally:
self.steps[name] = round(
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
def elapsed(self) -> float:
return round(time.monotonic() - self._t0, 4)
class ItemRecorder(_StepTimer):
"""
One node in a run's item tree. Has its own step timings and may contain
nested child items (e.g. a series item containing chapter items).
"""
def __init__(self, run: "RunRecorder", label: str, *,
parent: "ItemRecorder | None" = None,
enabled: bool = True):
super().__init__(enabled)
self._run = run
self._label = label
self._parent = parent
self._children: list[dict] = []
def begin_item(self, label: str) -> "ItemRecorder":
return ItemRecorder(self._run, label, parent=self,
enabled=self._enabled)
def finish(self, *, ok: bool = True) -> None:
if not self._enabled:
return
node = {
"label": self._label,
"totalSeconds": self.elapsed(),
"ok": ok,
"steps": self.steps,
"items": self._children,
}
if self._parent is not None:
self._parent._children.append(node)
else:
# Top-level item: attach to the run and persist progress.
self._run._items.append(node)
self._run.flush()
class RunRecorder:
"""Top-level recorder for one full run."""
def __init__(self, stats: "PerfStats", meta: "dict | None" = None,
enabled: bool = True):
self._stats = stats
self._enabled = enabled
self._meta = meta or {}
self._items: list[dict] = []
self._started = time.time()
self._t0 = time.monotonic()
self._run_id = uuid.uuid4().hex
def begin_item(self, label: str) -> ItemRecorder:
return ItemRecorder(self, label, parent=None, enabled=self._enabled)
def _snapshot(self) -> dict:
step_totals: dict[str, float] = {}
leaf_count = 0
def walk(node: dict) -> None:
nonlocal leaf_count
for step, secs in node["steps"].items():
step_totals[step] = round(step_totals.get(step, 0.0) + secs, 4)
if node["items"]:
for child in node["items"]:
walk(child)
else:
leaf_count += 1
for item in self._items:
walk(item)
return {
"runId": self._run_id,
"startedAt": round(self._started),
"finishedAt": round(time.time()),
"totalSeconds": round(time.monotonic() - self._t0, 4),
"meta": self._meta,
"itemCount": len(self._items),
"leafCount": leaf_count,
"stepTotals": step_totals,
"items": self._items,
}
def flush(self) -> "dict | None":
"""Writes the run's current state to disk (upsert by runId)."""
if not self._enabled:
return None
run = self._snapshot()
self._stats._upsert_run(run)
return run
def finish(self) -> "dict | None":
"""Persists the final run state. Returns the run dict."""
return self.flush()
class PerfStats:
"""
Profiler facade + JSON persistence.
Parameters
----------
path : Destination JSON file. None disables the profiler entirely
(every recorder becomes a no-op and nothing is written).
"""
def __init__(self, path=None):
self._path = Path(path) if path else None
self._lock = threading.Lock()
@property
def enabled(self) -> bool:
return self._path is not None
def begin_run(self, meta: "dict | None" = None) -> RunRecorder:
return RunRecorder(self, meta=meta, enabled=self.enabled)
# ------------------------------------------------------------------
# Read / write
# ------------------------------------------------------------------
def all(self) -> dict:
"""Returns the persisted runs ({"runs": [...]}); newest first."""
if not self._path or not self._path.is_file():
return {"runs": []}
try:
with self._path.open("r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return {"runs": []}
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
return {"runs": []}
return data
def _upsert_run(self, run: dict) -> None:
"""
Inserts a new run (newest first) or replaces the existing entry with
the same runId — so incremental flushes during a run update one entry
rather than appending a duplicate after every item.
"""
if not self._path:
return
with self._lock:
runs = self.all()["runs"]
run_id = run.get("runId")
for i, existing in enumerate(runs):
if existing.get("runId") == run_id:
runs[i] = run
break
else:
runs.insert(0, run) # newest first
del runs[_MAX_RUNS:] # cap history
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+160
View File
@@ -0,0 +1,160 @@
"""
perf_web_page.py
================
Shared HTML page for browsing PerfStats output, used by both container web
UIs. ``render_perf_page(name, tabs)`` returns a standalone page that loads
``/api/perf/<name>`` and renders each run's step totals plus the nested item
tree (series -> chapter, or one person, …) and the run trigger from meta.
``tabs`` is a list of ``(label, name)`` pairs for cross-links between the
available perf datasets in that container.
"""
from __future__ import annotations
_PERF_PAGE = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>__PERF_NAME__ performance</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
a { color:#60a5fa; text-decoration:none; }
a:hover { text-decoration:underline; }
.tabs { margin-bottom:1rem; }
.tabs a { margin-right:1rem; }
.tabs a.active { font-weight:bold; text-decoration:underline; }
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
th { background:#1d1d1d; }
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
.barcell { position:relative; }
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
.barcell span { position:relative; z-index:1; }
details { margin:.2rem 0 .2rem 1rem; }
summary { cursor:pointer; padding:.2rem 0; }
.chip { color:#9ca3af; font-size:.85rem; }
.err { color:#f87171; }
</style>
</head>
<body>
<h1>Performance: __PERF_NAME__ <a href="/" style="font-size:.9rem;">&#9666; back</a></h1>
<div class="tabs">__PERF_TABS__</div>
<div class="bar">
<label>Run: <select id="runSelect"></select></label>
<button id="reload">Reload</button>
<span class="summary" id="summary"></span>
</div>
<div id="content"></div>
<script>
const PERF_NAME = "__PERF_NAME__";
let runs = [];
for (const a of document.querySelectorAll(".tabs a")) {
if (a.getAttribute("href") === "/perf/" + PERF_NAME) a.classList.add("active");
}
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
function esc(s) {
return String(s).replace(/[&<>]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;"}[c]));
}
function stepTable(totals, grandTotal) {
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
const max = entries[0][1] || 1;
let rows = "";
for (const [name, secs] of entries) {
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
const w = (secs / max * 100);
rows += "<tr><td>" + esc(name) + "</td>"
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
+ "<span>&nbsp;</span></td></tr>";
}
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
+ "<th class=num>% of run</th><th>&nbsp;</th></tr></thead><tbody>"
+ rows + "</tbody></table>";
}
// Renders one item node (and its children) as a nested <details> block.
function itemNode(it) {
const steps = Object.entries(it.steps || {}).sort((a, b) => b[1] - a[1])
.map(([n, v]) => esc(n) + " " + fmtSecs(v)).join(", ") || "";
const head = "<summary><b>" + esc(it.label) + "</b>"
+ (it.ok === false ? " <span class=err>(failed)</span>" : "")
+ " <span class=chip>" + fmtSecs(it.totalSeconds) + " · " + steps + "</span></summary>";
const kids = (it.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
const body = kids.map(itemNode).join("");
return "<details>" + head + body + "</details>";
}
function renderRun(run) {
const c = document.getElementById("content");
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
const trigger = (run.meta && run.meta.trigger) ? " · trigger: " + run.meta.trigger : "";
document.getElementById("summary").textContent =
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
+ run.itemCount + " items · " + run.leafCount + " leaves" + trigger;
let html = "<h2>Steps (summed over all items)</h2>"
+ stepTable(run.stepTotals, run.totalSeconds)
+ "<h2>Detail</h2>";
const items = (run.items || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
html += items.map(itemNode).join("") || "<p class=chip>(no items)</p>";
c.innerHTML = html;
}
function renderSelect() {
const sel = document.getElementById("runSelect");
sel.innerHTML = "";
runs.forEach((r, i) => {
const o = document.createElement("option");
o.value = i;
const trig = (r.meta && r.meta.trigger) ? " " + r.meta.trigger : "";
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")" + trig;
sel.appendChild(o);
});
}
async function load() {
const r = await fetch("/api/perf/" + PERF_NAME);
const data = await r.json();
runs = data.runs || [];
renderSelect();
renderRun(runs[0]);
}
document.getElementById("runSelect").addEventListener("change", e => {
renderRun(runs[e.target.value]);
});
document.getElementById("reload").addEventListener("click", load);
load();
</script>
</body>
</html>
"""
def render_perf_page(name: str, tabs: "list[tuple[str, str]]") -> str:
"""
Returns the perf page HTML for dataset ``name``.
tabs : list of (label, dataset_name) for the cross-link bar.
"""
tab_html = " ".join(
f'<a href="/perf/{n}">{label}</a>' for label, n in tabs)
return (_PERF_PAGE
.replace("__PERF_TABS__", tab_html)
.replace("__PERF_NAME__", name))
+99
View File
@@ -0,0 +1,99 @@
"""
text_utils.py
=============
Small text helpers shared across modules:
* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
breaks into compact HTML (used for Kavita summary / description fields,
which must not contain raw newlines).
* ``best_similarity`` — best difflib ratio between a query string and a
list of candidate strings (used for title / person-name matching).
"""
from __future__ import annotations
import difflib
import re
from typing import Iterable
def paragraphs_to_html(text: str) -> str:
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
if not text:
return ""
parts: list[str] = []
for para in re.split(r"\n{2,}", text.strip()):
para = para.strip()
if para:
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
return "".join(parts)
def best_similarity(query: str, candidates: Iterable[str]) -> float:
"""
Returns the best case-insensitive difflib similarity ratio (0..1)
between `query` and any non-empty candidate.
"""
q = (query or "").lower()
best = 0.0
for candidate in candidates:
if candidate:
ratio = difflib.SequenceMatcher(
None, q, str(candidate).lower()).ratio()
best = max(best, ratio)
return best
def person_name_with_id(name: str, *,
mal_id: "int | None" = None,
al_id: "int | None" = None) -> str:
"""
Disambiguates a character name with its tracker id: "Rem (MAL 118737)".
Kavita Person records are global and keyed by name only, so two
different characters who share a name would collapse into one record.
Suffixing the tracker *character* id keeps them apart while still
sharing the record across the manga and light-novel version of the
same series (MAL/AniList character ids are per character, not per
medium). MAL is preferred; AniList ids get an "AL" marker so the two
id spaces cannot collide. Without any id the name is returned as-is.
The format must stay in sync with the manga project so both tools
address the same Kavita person records.
"""
name = (name or "").strip()
if not name:
return name
if mal_id:
return f"{name} (MAL {mal_id})"
if al_id:
return f"{name} (AL {al_id})"
return name
# Matches the suffix produced by person_name_with_id at the end of a name.
_TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE)
def parse_person_tracker_id(name: str) -> "tuple[str, int] | None":
"""
Inverse of person_name_with_id: extracts the tracker id from a
disambiguated Kavita person name.
"Rem (MAL 118737)" -> ("mal", 118737)
"Subaru (AL 88311)" -> ("al", 88311)
"Kotoyama" -> None (no id suffix — e.g. an author/staff record)
Returns ("mal" | "al", id) or None.
"""
if not name:
return None
m = _TRACKER_ID_RE.search(name)
if not m:
return None
source = "mal" if m.group(1).upper() == "MAL" else "al"
try:
return source, int(m.group(2))
except ValueError:
return None
+313
View File
@@ -0,0 +1,313 @@
"""
kavita_series_updater.py
========================
Diff-based update of a single Kavita series record from a
LightNovelMetadataBuilder output dict.
Behaviour
---------
* Locked fields in Kavita (``*Locked`` flags) are never touched, no matter
what MangaBaka returns.
* Scalar fields (summary, releaseYear, ageRating, publicationStatus,
language, score, sortName, localizedName) are overwritten when the
newly-built value differs from the value currently stored in Kavita.
* List fields (genres, tags, characters, writers, coverArtists,
publishers, imprints) are diff-merged: a name appearing in the new
set but not in the current one is added (id=0 so Kavita creates the
record); a name that is in Kavita but no longer in the new set is
dropped. Comparison is case-insensitive on the ``name`` field.
* Web links are stored as a comma-separated string in Kavita; this
updater treats them as a set and re-joins on write.
* Series-level cover image (URL different from last time) is re-uploaded
whenever ``coverImageLocked`` is False. The MangaBaka cover URL is
stamped onto matches.json as ``imageUrl`` so a subsequent run can skip
the upload when nothing changed.
Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per
series so the WebApp can surface what happened.
"""
from __future__ import annotations
from typing import Iterable
from KavitaClient import KavitaClient
# Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key).
# `item_key` is the dict key Kavita uses for the display name on each item:
# GenreTagDto / TagDto use "title", PersonDto uses "name".
_LIST_FIELDS: list[tuple[str, str, str]] = [
("genres", "genresLocked", "title"),
("tags", "tagsLocked", "title"),
("characters", "characterLocked", "name"),
("writers", "writerLocked", "name"),
("coverArtists", "coverArtistLocked", "name"),
("publishers", "publisherLocked", "name"),
("imprints", "imprintLocked", "name"),
]
def _norm(name: str) -> str:
return (name or "").strip().lower()
def _merge_list(
current: list[dict],
new_names: Iterable[str],
item_key: str,
) -> "tuple[list[dict], bool]":
"""
Diff-merges a Kavita list field with the canonical name list from
MangaBaka. Returns (merged_list, changed_flag).
`item_key` is the dict key Kavita uses for the display name on each
item ("title" for GenreTagDto/TagDto, "name" for PersonDto).
* Items in `current` whose display value appears in `new_names` are
kept verbatim so existing ids and ancillary fields survive.
* New names (no matching entry in `current`) are appended with
``{"id": 0, <item_key>: <name>}`` — Kavita creates the record on save.
* Items in `current` whose display value is *not* in `new_names` are
dropped.
"""
new_set = [n for n in new_names if n and n.strip()]
new_index = {_norm(n): n.strip() for n in new_set}
merged: list[dict] = []
kept_keys: set[str] = set()
for item in (current or []):
key = _norm(item.get(item_key))
if key in new_index:
merged.append(item)
kept_keys.add(key)
added = False
for key, display in new_index.items():
if key not in kept_keys:
merged.append({"id": 0, item_key: display})
added = True
removed = len(current or []) != len(kept_keys)
return merged, added or removed
def _parse_web_links(value) -> list[str]:
if not value:
return []
if isinstance(value, list):
return [str(v).strip() for v in value if v]
return [p.strip() for p in str(value).split(",") if p.strip()]
def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]":
current = _parse_web_links(current_str)
new_norm = [l for l in new_links if l]
if not new_norm:
return ",".join(current), False
# Mirror MangaBaka's set: keep order from new_norm, then anything from
# current that's still in new_norm (already covered above). Anything
# in current that's not in new_norm is dropped.
new_set = set(new_norm)
merged = list(new_norm)
changed = sorted(new_set) != sorted(set(current))
return ",".join(merged), changed
class KavitaSeriesUpdater:
def __init__(self, client: KavitaClient):
self._client = client
# ------------------------------------------------------------------
# Public
# ------------------------------------------------------------------
def update_series(self, series_id: int, built: dict, *,
previous_cover_url: "str | None" = None) -> dict:
"""
Applies the diff between Kavita's current state for `series_id`
and the freshly-built MangaBaka dict. Returns a per-field diff
report.
"""
series = self._client.get_series(series_id)
metadata = self._client.get_series_metadata(series_id)
report: dict = {}
meta_changed = self._diff_metadata(metadata, built, report)
if meta_changed:
self._client.update_series_metadata(metadata)
series_changed = self._diff_series(series, built, report)
if series_changed:
self._client.update_series(series)
# Cover: only re-upload when not locked AND URL actually changed.
new_cover = built.get("coverUrl")
if (new_cover
and not series.get("coverImageLocked")
and new_cover != previous_cover_url):
try:
self._client.upload_series_cover(series_id, new_cover)
report["coverImage"] = "changed"
except Exception as exc:
report["coverImage"] = f"error: {exc}"
elif series.get("coverImageLocked"):
report["coverImage"] = "locked"
else:
report["coverImage"] = "skipped"
return report
# ------------------------------------------------------------------
# Internal: SeriesMetadataDto
# ------------------------------------------------------------------
def _diff_metadata(self, metadata: dict, built: dict,
report: dict) -> bool:
changed = False
# ----- Scalars ------------------------------------------------
# (built_key, metadata_key, locked_key, transform, skip_when_zero)
# `skip_when_zero` covers fields where 0 means "no data" rather
# than a real value (releaseYear, ageRating). publicationStatus 0
# is a valid "Ongoing" status — never skip it.
scalar_map = [
("summary", "summary", "summaryLocked", None, False),
("releaseYear", "releaseYear", "releaseYearLocked", int, True),
("ageRating", "ageRating", "ageRatingLocked", int, True),
("publicationStatus", "publicationStatus", "publicationStatusLocked", int, False),
("language", "language", "languageLocked", None, False),
]
for built_key, meta_key, locked_key, transform, skip_zero in scalar_map:
new_val = built.get(built_key)
if new_val is None or new_val == "":
report[meta_key] = "skipped"
continue
if transform is not None:
try:
new_val = transform(new_val)
except (TypeError, ValueError):
report[meta_key] = "skipped"
continue
if skip_zero and new_val == 0:
report[meta_key] = "skipped"
continue
if metadata.get(locked_key):
report[meta_key] = "locked"
continue
if metadata.get(meta_key) != new_val:
metadata[meta_key] = new_val
changed = True
report[meta_key] = "changed"
else:
report[meta_key] = "unchanged"
# ----- Web links (single comma-separated string) ---------------
# SeriesMetadataDto has no dedicated lock for webLinks — always update.
web_str, web_changed = _merge_web_links(
metadata.get("webLinks"), built.get("webLinks") or [])
if web_changed:
metadata["webLinks"] = web_str
changed = True
report["webLinks"] = "changed"
else:
report["webLinks"] = "unchanged"
# ----- List fields --------------------------------------------
list_map = {
"genres": built.get("genres"),
"tags": built.get("tags"),
"characters": built.get("characters"),
"writers": built.get("writers"),
"coverArtists": built.get("coverArtists"),
"publishers": built.get("publishers"),
"imprints": [built["imprint"]] if built.get("imprint") else [],
}
for meta_key, locked_key, item_key in _LIST_FIELDS:
new_names = list_map.get(meta_key) or []
if metadata.get(locked_key):
report[meta_key] = "locked"
continue
if not new_names and not (metadata.get(meta_key) or []):
report[meta_key] = "unchanged"
continue
merged, list_changed = _merge_list(
metadata.get(meta_key) or [], new_names, item_key)
if list_changed:
metadata[meta_key] = merged
changed = True
report[meta_key] = "changed"
else:
report[meta_key] = "unchanged"
return changed
# ------------------------------------------------------------------
# Internal: SeriesDto (sortName, userRating, tracker ids)
# ------------------------------------------------------------------
def _diff_series(self, series: dict, built: dict, report: dict) -> bool:
changed = False
# sortName / localizedName
if not series.get("sortNameLocked"):
new_sort = built.get("sortName") or ""
if new_sort and series.get("sortName") != new_sort:
series["sortName"] = new_sort
changed = True
report["sortName"] = "changed"
else:
report["sortName"] = "unchanged"
else:
report["sortName"] = "locked"
if not series.get("localizedNameLocked"):
new_loc = built.get("localizedName") or ""
if new_loc and series.get("localizedName") != new_loc:
series["localizedName"] = new_loc
changed = True
report["localizedName"] = "changed"
else:
report["localizedName"] = "unchanged"
else:
report["localizedName"] = "locked"
# Tracker ids — Kavita exposes malId, aniListId, mangaBakaId
for built_key, series_key in (
("malId", "malId"),
("anilistId", "aniListId"),
("mangabakaId", "mangaBakaId"),
):
new_val = built.get(built_key)
if new_val in (None, "", 0):
continue
try:
new_int = int(new_val)
except (TypeError, ValueError):
continue
if int(series.get(series_key) or 0) != new_int:
series[series_key] = new_int
changed = True
report[series_key] = "changed"
# userRating from MangaBaka (0..5)
new_score = built.get("score")
if new_score is not None:
try:
new_score = float(new_score)
except (TypeError, ValueError):
new_score = None
if new_score is not None:
current_score = series.get("userRating")
try:
current_score = float(current_score) if current_score is not None else None
except (TypeError, ValueError):
current_score = None
if current_score != new_score:
series["userRating"] = new_score
series["hasUserRated"] = True
changed = True
report["userRating"] = "changed"
else:
report["userRating"] = "unchanged"
return changed
+571
View File
@@ -0,0 +1,571 @@
"""
light_novel_metadata_builder.py
===============================
Fetches series-level metadata for a light novel from MangaBaka, enriches
it with MyAnimeList / AniList tracker statistics and character data, and
returns a structured dict ready to be diffed against Kavita's
SeriesMetadataDto.
Differences vs. the manga project's ComicInfoBuilder:
- No chapter / page handling — Kavita reads volumes from the files.
- No XML output — produces a plain dict.
- No MangaDex resolver — light novels don't have a chapter→volume
mapping problem.
- MangaBaka search type is fixed to ``novel`` so only light/web novels
are returned.
"""
from __future__ import annotations
import re
import requests
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
from TextUtils import paragraphs_to_html, person_name_with_id
# MangaBaka series type for the search endpoint.
_SEARCH_TYPES = ["novel"]
# MangaBaka content_rating -> Kavita AgeRating enum
# Kavita AgeRating values (from openapi.json):
# 0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly
_AGE_RATING_MAP = {
"safe": 3, # Everyone
"suggestive": 8, # Teen
"erotica": 10, # Mature17Plus
"pornographic": 13, # AdultsOnly
}
# MangaBaka status -> Kavita PublicationStatus enum
# Kavita PublicationStatus (from openapi.json):
# 0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended
_PUB_STATUS_MAP = {
"ongoing": 0,
"hiatus": 1,
"completed": 2,
"cancelled": 3,
"ended": 4,
}
# External-tracker URL templates used to enrich the web-links list.
_TRACKER_URL_TEMPLATES = {
"anilist": "https://anilist.co/manga/{id}",
"myanimelist": "https://myanimelist.net/manga/{id}",
"mal": "https://myanimelist.net/manga/{id}",
"mangaupdates": "https://www.mangaupdates.com/series.html?id={id}",
"kitsu": "https://kitsu.app/manga/{id}",
"animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
"ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
"animeplanet": "https://www.anime-planet.com/manga/{id}",
"shikimori": "https://shikimori.one/mangas/{id}",
"bookwalker": "https://bookwalker.jp/{id}",
}
_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
# --------------------------------------------------------------------------
# Helpers
# --------------------------------------------------------------------------
def _normalise_key(key) -> str:
return re.sub(r"[^a-z0-9]", "", str(key).lower())
def _format_term(value: str) -> str:
return str(value).replace("_", " ").strip().title() if value else ""
def _md_to_html(text: str) -> str:
"""Converts the subset of Markdown produced by MangaBaka to compact HTML."""
if not text:
return ""
text = _MD_ESCAPE_RE.sub(r'\1', text)
text = re.sub(
r'\[([^\]]+)\]\(([^)]+)\)',
lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
text,
)
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
return paragraphs_to_html(text)
def pick_cover_url(cover) -> "str | None":
"""Selects the best cover URL from a MangaBaka cover object."""
if not cover:
return None
if isinstance(cover, str):
return cover
if not isinstance(cover, dict):
return None
raw = cover.get("raw")
if isinstance(raw, dict):
url = raw.get("url")
if isinstance(url, str) and url:
return url
elif isinstance(raw, str) and raw:
return raw
for size_key in ("x350", "x250", "x150"):
variant = cover.get(size_key)
if isinstance(variant, dict):
for density in ("x3", "x2", "x1"):
url = variant.get(density)
if isinstance(url, str) and url:
return url
elif isinstance(variant, str) and variant:
return variant
for val in cover.values():
if isinstance(val, str) and val.startswith("http"):
return val
if isinstance(val, dict):
for sub in val.values():
if isinstance(sub, str) and sub.startswith("http"):
return sub
return None
def pick_thumbnail_url(cover) -> "str | None":
"""Picks a small cover variant suitable for a UI thumbnail."""
if not cover:
return None
if isinstance(cover, str):
return cover
if not isinstance(cover, dict):
return None
for size_key in ("x150", "x250", "x350"):
variant = cover.get(size_key)
if isinstance(variant, dict):
for density in ("x2", "x1", "x3"):
url = variant.get(density)
if isinstance(url, str) and url:
return url
elif isinstance(variant, str) and variant:
return variant
return pick_cover_url(cover)
def _id_from_source(md: dict, *names: str) -> "int | None":
target = {_normalise_key(n) for n in names}
for raw_key, info in (md.get("source") or {}).items():
if _normalise_key(raw_key) in target and isinstance(info, dict):
mid = info.get("id")
if mid is not None:
try:
return int(mid)
except (TypeError, ValueError):
pass
return None
# --------------------------------------------------------------------------
# Builder
# --------------------------------------------------------------------------
class LightNovelMetadataBuilder:
"""
Resolves a light-novel series on MangaBaka and produces a structured
metadata dict ready to be merged into Kavita.
"""
def __init__(self, *,
api_base_url: str = "https://api.mangabaka.dev/v1",
language: str = "en",
request_timeout: int = 30,
session: "requests.Session | None" = None,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None,
matches_cache: "MatchesCache | None" = None):
self.api_base_url = api_base_url.rstrip("/")
self.language = language
self.request_timeout = request_timeout
self._session = session or requests.Session()
self._session.headers.setdefault("User-Agent",
"LightNovelMetadataBuilder/1.0")
_apply_mangabaka_rate_limit(self._session)
self._mal = mal_resolver or MALResolver(
request_timeout=request_timeout, search_type="lightnovel")
self._al = al_resolver or AniListResolver(
request_timeout=request_timeout, media_format="novel")
self._matches_cache = matches_cache
# ------------------------------------------------------------------
# MangaBaka search / fetch
# ------------------------------------------------------------------
def search_series(self, title: str) -> "dict | None":
"""Returns the top MangaBaka novel hit for `title`, or None."""
if not title or not title.strip():
return None
url = f"{self.api_base_url}/series/search"
try:
resp = self._session.get(
url, params={"q": title, "type": _SEARCH_TYPES,
"page": 1, "limit": 1},
timeout=self.request_timeout)
resp.raise_for_status()
except requests.RequestException:
return None
data = resp.json().get("data") or []
return data[0] if data else None
def fetch_series(self, series_id) -> "dict | None":
"""
Returns the full MangaBaka series dict for the given id, following
``merged_with`` redirects. A seen-set guards against merge cycles.
"""
if series_id is None or str(series_id).strip() == "":
return None
seen: set[str] = set()
current = series_id
while str(current) not in seen:
seen.add(str(current))
url = f"{self.api_base_url}/series/{current}"
resp = self._session.get(url, timeout=self.request_timeout)
resp.raise_for_status()
data = resp.json().get("data")
if data and data.get("state") == "merged" and data.get("merged_with"):
current = data["merged_with"]
continue
return data
return None
# ------------------------------------------------------------------
# Resolve title -> MangaBaka series (caches the match)
# ------------------------------------------------------------------
def resolve(self, title: str) -> "dict | None":
"""
Returns the MangaBaka series for `title`.
Lookup order:
1. MatchesCache (uses stored mangabakaId, skips the search).
2. Fresh MangaBaka search — top hit. Result is persisted to the
cache so it survives a crash.
"""
if self._matches_cache is not None:
cached = self._matches_cache.get(title)
if cached and cached.get("mangabakaId"):
try:
series = self.fetch_series(cached["mangabakaId"])
if series:
return series
except Exception:
pass
series = self.search_series(title)
if series and self._matches_cache is not None:
self._matches_cache.upsert(
title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=pick_thumbnail_url(series.get("cover")),
)
return series
# ------------------------------------------------------------------
# Main entry point
# ------------------------------------------------------------------
def build(self, *, title: str = "",
mangabaka_id=None) -> "dict | None":
"""
Fetches and enriches metadata for one series, returning the
normalised dict described in the module docstring.
Pass either `title` (will resolve via cache/search) or
`mangabaka_id` (direct fetch).
"""
if mangabaka_id is not None and str(mangabaka_id).strip():
md = self.fetch_series(mangabaka_id)
else:
md = self.resolve(title)
if not md:
return None
return self._assemble(md)
# ------------------------------------------------------------------
# Internal: assemble the result dict
# ------------------------------------------------------------------
def _assemble(self, md: dict) -> dict:
mal_id = _id_from_source(md, "myanimelist", "mal")
al_id = _id_from_source(md, "anilist")
# Fall back to a title-based MAL lookup when the source map does
# not carry an id — Jikan is the only tracker that ships staff
# data we can use to enrich author / artist person records.
if mal_id is None:
mal_id = self._mal.find_mal_id(md.get("title") or "")
mal_stats = self._mal.get_stats(mal_id) if mal_id else None
characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else []
if not characters_detailed and al_id:
characters_detailed = self._al.get_characters_detailed(al_id)
staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else []
if not staff_detailed and al_id:
staff_detailed = self._al.get_staff_detailed(al_id)
# Character names for SeriesMetadata, disambiguated with the
# tracker character id ("Rem (MAL 118737)") because Kavita person
# records are global and keyed by name only.
character_names = [
person_name_with_id(c["name"],
mal_id=c.get("mal_id"),
al_id=c.get("al_id"))
for c in characters_detailed if c.get("name")
]
# Writers come from MangaBaka first (authoritative for novels)
writers = list(md.get("authors") or [])
# Illustrators / artists -> CoverArtists (Kavita has no dedicated
# illustrator field, and Pencillers is the wrong semantic for
# text-only novels).
cover_artists = list(md.get("artists") or [])
# Publisher: prefer English licence, else original. When both
# exist, the original publisher becomes the imprint.
english_pubs = self._publishers_by_type(md, "English")
original_pubs = self._publishers_by_type(md, "Original")
publishers = english_pubs or original_pubs
imprint = original_pubs[0] if english_pubs and original_pubs else None
# Release year
release_year = None
try:
if md.get("year") is not None:
release_year = int(md["year"])
except (TypeError, ValueError):
pass
# Score: MangaBaka rating is 0..100 -> Kavita userRating is 0..5
score = None
if md.get("rating") is not None:
try:
score = round(float(md["rating"]) / 20.0, 1)
except (TypeError, ValueError):
pass
# Tags / genres come back as snake_case slugs.
genres = [_format_term(g) for g in (md.get("genres") or []) if g]
tags = [_format_term(t) for t in (md.get("tags") or []) if t]
# Web links
web_links = self._collect_web_links(md)
# Summary HTML
summary = self._build_summary(md, mal_stats)
# Cover URL
cover_url = pick_cover_url(md.get("cover"))
# Title variants
all_alt = self._collect_all_alt_titles(md)
return {
"mangabakaId": str(md.get("id") or ""),
"mangabakaTitle": md.get("title") or "",
"originalName": md.get("native_title") or "",
"localizedName": md.get("romanized_title") or "",
"sortName": self._sort_title(md),
"altTitles": all_alt,
"summary": summary,
"genres": genres,
"tags": tags,
"characters": character_names,
"writers": writers,
"coverArtists": cover_artists,
"publishers": publishers,
"imprint": imprint,
"releaseYear": release_year,
"ageRating": _AGE_RATING_MAP.get(md.get("content_rating"), 0),
"publicationStatus": _PUB_STATUS_MAP.get(
(md.get("status") or "").lower(), 0),
"language": self.language,
"webLinks": web_links,
"score": score,
"coverUrl": cover_url,
"malId": mal_id,
"anilistId": al_id,
"relationships": list(md.get("relationships_v2") or []),
"charactersDetailed": characters_detailed,
"staffDetailed": staff_detailed,
"raw": md,
}
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
@staticmethod
def _publishers_by_type(md: dict, ptype: str) -> list[str]:
return [p.get("name") for p in (md.get("publishers") or [])
if p.get("type") == ptype and p.get("name")]
def _sort_title(self, md: dict) -> str:
lang = self.language.lower()
alts = self._collect_alt_titles(md)
return alts.get(lang) or md.get("title") or ""
def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
"""Returns one best title per language code (en/de/jp/romaji)."""
titles = md.get("titles") or md.get("alt_titles") or []
def pick(language_codes: tuple, prefer_trait: "str | None" = None
) -> "str | None":
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in language_codes:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if prefer_trait and prefer_trait in traits:
score += 4
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score, best_title = score, title
return best_title
result: dict[str, str] = {}
kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
if kanji:
result["jp"] = kanji
romaji = pick(("ja-latn", "ja-romaji"))
if not romaji:
rt = md.get("romanized_title") or ""
if rt and all(ord(c) < 128 for c in rt):
romaji = rt
if romaji:
result["romaji"] = romaji
en = pick(("en",)) or md.get("title")
if en:
result["en"] = en
de = pick(("de",))
if de:
result["de"] = de
return result
@staticmethod
def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]":
_GROUPS = {
"en": ("en",),
"de": ("de",),
"ja": ("ja",),
"ja-romaji": ("ja-latn", "ja-romaji"),
"ko": ("ko",),
"ko-romaji": ("ko-latn", "ko-romaji"),
"zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
"zh-romaji": ("zh-latn",),
}
lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls}
result: dict[str, list[str]] = {}
seen: dict[str, set] = {}
for entry in (md.get("titles") or md.get("alt_titles") or []):
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
group = lang_to_group.get(lang)
if not group:
continue
title = (entry.get("title") or "").strip()
if not title:
continue
result.setdefault(group, [])
seen.setdefault(group, set())
if title not in seen[group]:
result[group].append(title)
seen[group].add(title)
return result
def _collect_web_links(self, md: dict) -> list[str]:
links: list[str] = [l for l in (md.get("links") or []) if l]
for raw_key, info in (md.get("source") or {}).items():
template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
if not template or not isinstance(info, dict):
continue
source_id = info.get("id")
if source_id is not None:
links.append(template.format(id=source_id))
seen: set[str] = set()
unique: list[str] = []
for link in links:
if link not in seen:
seen.add(link)
unique.append(link)
return unique
def _build_summary(self, md: dict,
mal_stats: "dict | None") -> str:
"""Builds the HTML summary with stats table + description + alt titles."""
_TD = 'style="padding-right:1.5em"'
parts: list[str] = []
if mal_stats:
url = mal_stats.get("url", "")
as_of = mal_stats.get("as_of", "")
rows: list[str] = []
for label, key, fmt in (
("Score", "score", "{}"),
("Ranked", "rank", "#{}"),
("Scored by", "scored_by", "{:,} users"),
("Popularity","popularity", "#{}"),
("Members", "members", "{:,}"),
("Favorites", "favorites", "{:,}"),
):
v = mal_stats.get(key)
if v is None:
continue
try:
formatted = fmt.format(v)
except (TypeError, ValueError):
formatted = str(v)
rows.append(f"<tr><td {_TD}>{label}</td><td>{formatted}</td></tr>")
if rows:
link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
parts.append(f"<p>{link} stats as of {as_of}:</p>"
f"<table>{''.join(rows)}</table>")
desc_raw = (md.get("description") or "").strip()
if desc_raw:
parts.append(_md_to_html(desc_raw))
all_alt = self._collect_all_alt_titles(md)
if all_alt:
label_map = {
"en": "EN",
"de": "DE",
"ja": "JA",
"ja-romaji": "JA Romaji",
"ko": "KO",
"ko-romaji": "KO Romaji",
"zh": "ZH",
"zh-romaji": "ZH Romaji",
}
alt_rows: list[str] = []
for group in ("en", "de", "ja", "ja-romaji",
"ko", "ko-romaji", "zh", "zh-romaji"):
titles = all_alt.get(group)
if not titles:
continue
cell = "<br>".join(titles)
alt_rows.append(
f"<tr><td {_TD}>{label_map[group]}</td><td>{cell}</td></tr>")
if alt_rows:
parts.append(f"<table>{''.join(alt_rows)}</table>")
return "<br>".join(parts)
+265
View File
@@ -0,0 +1,265 @@
"""
light_novel_orchestrator.py
===========================
High-level workflow on top of the resolvers, the Kavita client and the
diff-based updaters. Exposes three operations to the WebApp:
- build_matches(library_ids):
Scan one or more Kavita libraries, resolve every series against
MangaBaka and persist the match in matches.json.
- update_series(kavita_series_id):
Re-fetch MangaBaka, MAL and AniList data for a single Kavita
series and apply the diff (metadata + persons + relationships).
- update_all(library_ids):
Run update_series for every series that has a match in the
cache and lives in the given libraries.
A single shared HTTP session (rate-limited for MangaBaka) and shared
resolver singletons are used across the whole run to maximise cache
hits.
"""
from __future__ import annotations
import requests
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
from KavitaClient import KavitaClient
from KavitaPersonUpdater import KavitaPersonUpdater
from KavitaSeriesUpdater import KavitaSeriesUpdater
from LightNovelMetadataBuilder import (
LightNovelMetadataBuilder,
pick_thumbnail_url,
)
from RelationshipSync import RelationshipSync
class LightNovelOrchestrator:
def __init__(self, *,
kavita_url: str,
kavita_api_key: str,
matches_cache: MatchesCache,
language: str = "en",
request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1"):
self._cache = matches_cache
self._timeout = request_timeout
session = requests.Session()
session.headers.setdefault("User-Agent",
"KavitaLightNovelOrchestrator/1.0")
_apply_mangabaka_rate_limit(session)
self._session = session
# First construction in the LN container — pins the singletons to
# light-novel search mode (manga container uses the defaults).
self._mal = MALResolver(request_timeout=request_timeout,
search_type="lightnovel")
self._al = AniListResolver(request_timeout=request_timeout,
media_format="novel")
self._client = KavitaClient(kavita_url, kavita_api_key,
request_timeout=request_timeout)
self._builder = LightNovelMetadataBuilder(
api_base_url=api_base_url,
language=language,
request_timeout=request_timeout,
session=session,
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=matches_cache,
)
self._series_updater = KavitaSeriesUpdater(self._client)
self._person_updater = KavitaPersonUpdater(
self._client,
mal_resolver=self._mal,
al_resolver=self._al,
)
self._relation_sync = RelationshipSync(
self._client, matches_cache, builder=self._builder)
# ------------------------------------------------------------------
# Library listings
# ------------------------------------------------------------------
def list_libraries(self) -> list[dict]:
return self._client.list_libraries()
def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]:
result: list[dict] = []
for lib_id in library_ids:
try:
result.extend(self._client.list_series_in_library(int(lib_id)))
except Exception as exc:
print(f"[orchestrator] library {lib_id} list failed: {exc}",
flush=True)
return result
# ------------------------------------------------------------------
# Matching
# ------------------------------------------------------------------
def build_matches(self, library_ids: list[int]) -> dict:
"""
Resolves every series in the given libraries against MangaBaka.
Series already present in matches.json keep their stored
mangabakaId; the kavitaSeriesId + libraryId fields are refreshed
in case the user moved a series between libraries.
"""
stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0}
for series in self.list_series_in_libraries(library_ids):
title = (series.get("name") or "").strip()
if not title:
continue
stats["checked"] += 1
kavita_id = int(series.get("id") or 0)
library_id = int(series.get("libraryId") or 0)
cached = self._cache.get(title)
if cached and cached.get("mangabakaId"):
self._cache.upsert(
title,
kavita_series_id=kavita_id,
library_id=library_id,
)
stats["skipped"] += 1
continue
mb_series = self._builder.search_series(title)
if not mb_series:
self._cache.upsert(
title,
kavita_series_id=kavita_id,
library_id=library_id,
)
stats["missing"] += 1
print(f"[match] {title!r}: no MangaBaka hit", flush=True)
continue
self._cache.upsert(
title,
mangabaka_id=mb_series.get("id"),
mangabaka_name=mb_series.get("title") or "",
image_url=pick_thumbnail_url(mb_series.get("cover")),
kavita_series_id=kavita_id,
library_id=library_id,
)
stats["matched"] += 1
print(f"[match] {title!r} -> {mb_series.get('title')!r} "
f"(id={mb_series.get('id')})", flush=True)
return stats
# ------------------------------------------------------------------
# Updating
# ------------------------------------------------------------------
def update_series(self, kavita_series_id: int) -> dict:
"""Runs a full metadata update for a single Kavita series."""
hit = self._cache.get_by_kavita_id(int(kavita_series_id))
if not hit:
# Try to resolve via the Kavita series name on the fly.
series = self._client.get_series(int(kavita_series_id))
title = (series.get("name") or "").strip()
if not title:
return {"ok": False, "error": "series not in matches.json"}
built = self._builder.build(title=title)
if not built:
return {"ok": False, "error": "no MangaBaka match"}
self._cache.upsert(
title,
mangabaka_id=built.get("mangabakaId"),
mangabaka_name=built.get("mangabakaTitle"),
image_url=built.get("coverUrl"),
kavita_series_id=int(kavita_series_id),
library_id=int(series.get("libraryId") or 0),
)
cached_title = title
cached_entry = self._cache.get(title) or {}
else:
cached_title, cached_entry = hit
built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId"))
if not built:
return {"ok": False, "error": "mangabaka id no longer resolvable"}
prev_cover = cached_entry.get("imageUrl") or ""
try:
series_report = self._series_updater.update_series(
int(kavita_series_id), built,
previous_cover_url=prev_cover,
)
except Exception as exc:
return {"ok": False, "error": f"series update failed: {exc}"}
# Person sync no longer runs per series — it has its own global,
# id-based updater (sync_persons / KavitaPersonUpdater.update_all_persons)
# on a separate cron schedule.
# Relationships + collection
try:
relation_report = self._relation_sync.sync(
int(kavita_series_id), built)
except Exception as exc:
relation_report = {"error": str(exc)}
# Stamp the new cover URL on the cache so the next run knows when
# to re-upload.
self._cache.upsert(
cached_title,
image_url=built.get("coverUrl") or prev_cover,
)
self._cache.mark_updated(cached_title)
return {
"ok": True,
"title": cached_title,
"mangabakaId": built.get("mangabakaId"),
"series": series_report,
"relationships": relation_report,
}
# ------------------------------------------------------------------
# Person sync (global, id-based — independent of series updates)
# ------------------------------------------------------------------
def sync_persons(self, *, trigger: str = "ln", perf=None) -> dict:
"""
Runs the global, id-based person updater over every Kavita person.
Covers both manga and light-novel libraries in one pass.
"""
return self._person_updater.update_all_persons(
trigger=trigger, perf=perf)
def update_all(self, library_ids: "list[int] | None") -> dict:
"""Updates every cached series in the given libraries."""
if library_ids is None:
entries = self._cache.all()["matches"]
else:
entries = self._cache.all_in_libraries(library_ids)["matches"]
results: list[dict] = []
ok = fail = 0
for title, entry in entries.items():
ksid = int(entry.get("kavitaSeriesId") or 0)
if not ksid or not entry.get("mangabakaId"):
continue
try:
res = self.update_series(ksid)
except Exception as exc:
res = {"ok": False, "error": str(exc)}
res["title"] = title
results.append(res)
if res.get("ok"):
ok += 1
else:
fail += 1
print(f"[update] {title!r}: "
f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}",
flush=True)
return {"ok": ok, "failed": fail, "results": results}
# ------------------------------------------------------------------
# Direct helpers exposed to the WebApp
# ------------------------------------------------------------------
def fetch_series(self, mangabaka_id) -> "dict | None":
return self._builder.fetch_series(mangabaka_id)
+187
View File
@@ -0,0 +1,187 @@
"""
matches_cache.py
================
Persistent JSON cache that maps a Kavita series title to the MangaBaka
series it was matched against, plus enough context to update the right
Kavita record later.
Structure on disk::
{
"matches": {
"<kavita series name>": {
"mangabakaId": "12345",
"mangabakaName": "Re:Zero",
"imageUrl": "https://.../cover.jpg",
"kavitaSeriesId": 42,
"libraryId": 3,
"firstMatchTime": 1700000000,
"lastUpdateTime": 1700100000
},
...
}
}
The cache is the source of truth for the WebUI's matches table and is
written back on every mutation so a crash mid-batch does not lose
matches that were resolved in the current run.
"""
from __future__ import annotations
import json
import threading
import time
from pathlib import Path
def _set_int(entry: dict, key: str, value) -> None:
"""Sets entry[key] = int(value); ignores values that don't coerce."""
try:
entry[key] = int(value)
except (TypeError, ValueError):
pass
class MatchesCache:
def __init__(self, path):
self._path = Path(path)
self._lock = threading.RLock()
self._data: dict = {"matches": {}}
self._load()
# ------------------------------------------------------------------
# Public lookup / mutation API
# ------------------------------------------------------------------
def get(self, title: str) -> "dict | None":
with self._lock:
entry = self._data["matches"].get(title)
return dict(entry) if entry else None
def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None":
with self._lock:
for title, entry in self._data["matches"].items():
if entry.get("kavitaSeriesId") == kavita_series_id:
return title, dict(entry)
return None
def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None":
target = str(mangabaka_id) if mangabaka_id is not None else ""
if not target:
return None
with self._lock:
for title, entry in self._data["matches"].items():
if str(entry.get("mangabakaId") or "") == target:
return title, dict(entry)
return None
def upsert(self, title: str, *,
mangabaka_id=None,
mangabaka_name=None,
image_url=None,
kavita_series_id=None,
library_id=None,
first_match_time=None,
last_update_time=None) -> dict:
"""
Inserts or updates an entry. Only fields passed explicitly are
modified; the rest are preserved.
"""
with self._lock:
entry = self._data["matches"].get(title)
if entry is None:
entry = {
"mangabakaId": "",
"mangabakaName": "",
"imageUrl": "",
"kavitaSeriesId": 0,
"libraryId": 0,
"firstMatchTime": int(time.time()),
"lastUpdateTime": 0,
}
self._data["matches"][title] = entry
if mangabaka_id is not None:
entry["mangabakaId"] = str(mangabaka_id)
if mangabaka_name is not None:
entry["mangabakaName"] = mangabaka_name
if image_url is not None:
entry["imageUrl"] = image_url
if kavita_series_id is not None:
_set_int(entry, "kavitaSeriesId", kavita_series_id)
if library_id is not None:
_set_int(entry, "libraryId", library_id)
if first_match_time is not None:
_set_int(entry, "firstMatchTime", first_match_time)
if last_update_time is not None:
_set_int(entry, "lastUpdateTime", last_update_time)
self._save_unlocked()
return dict(entry)
def mark_updated(self, title: str) -> None:
with self._lock:
entry = self._data["matches"].get(title)
if entry is not None:
entry["lastUpdateTime"] = int(time.time())
self._save_unlocked()
def rename(self, old_title: str, new_title: str) -> bool:
if not new_title or old_title == new_title:
return False
with self._lock:
entry = self._data["matches"].pop(old_title, None)
if entry is None:
return False
self._data["matches"][new_title] = entry
self._save_unlocked()
return True
def remove(self, title: str) -> bool:
with self._lock:
existed = title in self._data["matches"]
if existed:
del self._data["matches"][title]
self._save_unlocked()
return existed
def all(self) -> dict:
with self._lock:
return {"matches": {k: dict(v)
for k, v in self._data["matches"].items()}}
def all_in_libraries(self, library_ids: "list[int] | None") -> dict:
"""
Returns the cache filtered to entries whose libraryId is in
`library_ids`. Pass None to return everything.
"""
if library_ids is None:
return self.all()
ids = {int(i) for i in library_ids}
with self._lock:
return {"matches": {
k: dict(v) for k, v in self._data["matches"].items()
if int(v.get("libraryId") or 0) in ids
}}
# ------------------------------------------------------------------
# Internal IO
# ------------------------------------------------------------------
def _load(self) -> None:
if not self._path.is_file():
return
try:
with self._path.open("r", encoding="utf-8") as f:
loaded = json.load(f)
except (OSError, json.JSONDecodeError) as exc:
print(f"[MatchesCache] failed to load {self._path}: {exc}",
flush=True)
return
if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
self._data = loaded
def _save_unlocked(self) -> None:
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(self._data, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+815
View File
@@ -0,0 +1,815 @@
"""
matches_web_app.py
==================
Flask web UI for the Kavita light-novel metadata fetcher.
Pages
-----
GET / HTML UI (matches table + actions)
Match cache (JSON)
------------------
GET /api/libraries Lists Kavita libraries
GET /api/matches Full cache, optionally filtered by libraryIds=
POST /api/matches Upsert a single match
body: {title, mangabakaId}
POST /api/matches/delete Remove a match
body: {title}
Background jobs
---------------
POST /api/build Build matches for libraries
body: {libraryIds: [int, ...]}
POST /api/update Update a single series
body: {kavitaSeriesId}
POST /api/update-all Update every cached series in libraries
body: {libraryIds: [int, ...] | null}
GET /api/status Current background job status (status, log)
"""
from __future__ import annotations
import threading
import time
from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
from LightNovelMetadataBuilder import pick_thumbnail_url
from PerfWebPage import render_perf_page
# Only the person dataset exists in the LN container.
_PERF_TABS = [("persons", "person")]
def _int_list(values) -> list[int]:
"""Coerces an iterable of mixed values to a list of positive ints."""
out: list[int] = []
for v in (values or []):
try:
n = int(v)
except (TypeError, ValueError):
continue
if n > 0:
out.append(n)
return out
_INDEX_HTML = r"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Kavita light-novel metadata fetcher</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
.bar select[multiple] { background:#222; color:#eee; border:1px solid #444; min-width: 14rem; min-height: 4.2rem; }
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
button.success { background:#15803d; border-color:#15803d; color:white; }
button:disabled { opacity:.5; cursor:default; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
th.sortable { cursor: pointer; user-select: none; }
th.sortable:hover { background:#252525; }
th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
tr:nth-child(even) td { background: #161616; }
td.image img { max-width: 90px; max-height: 130px; display:block; }
td.id input { width: 12rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
td.title a { color: #60a5fa; text-decoration: none; }
td.title a:hover { text-decoration: underline; }
td.actions { white-space: nowrap; }
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
.dirty td { background: #1f2937 !important; }
.count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
pre.log { background:#0a0a0a; color:#9ca3af; padding:.5rem .75rem; max-height:18rem; overflow:auto; border:1px solid #333; font-size:.8rem; white-space:pre-wrap; }
label { font-size:.9rem; color:#9ca3af; }
</style>
</head>
<body>
<h1>Kavita light-novel metadata fetcher <span id="count" class="count"></span></h1>
<div class="bar">
<label>Libraries
<select id="libraries" multiple size="3"></select>
</label>
<button id="reload">Reload</button>
<button id="build">Match all in libraries</button>
<button id="updateAll" class="success">Update all in libraries</button>
<button id="syncPersons">Sync persons</button>
<button id="batchSave" class="primary">Save dirty (0)</button>
<a href="/perf/person" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
<span class="status" id="status"></span>
</div>
<div class="bar">
<input id="filter" type="search" placeholder="Filter by title…">
<span class="count" id="jobStatus"></span>
</div>
<pre id="jobLog" class="log" hidden></pre>
<table>
<thead>
<tr>
<th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
<th>mangabakaId</th>
<th>mangabakaName</th>
<th>library</th>
<th class="sortable" data-col="lastUpdateTime">Last update <span class="arrow" id="arrow-lastUpdateTime"></span></th>
<th>Image</th>
<th></th>
</tr>
</thead>
<tbody id="rows"></tbody>
</table>
<script>
const MB_SEARCH = "https://mangabaka.org/search?q=";
let matchesData = {};
let librariesById = {};
let currentSort = { col: "title", asc: true };
let jobPollHandle = null;
function fmtTime(unix) {
if (!unix) return "";
const d = new Date(unix * 1000);
return d.toLocaleString();
}
function setStatus(msg) { document.getElementById("status").textContent = msg; }
function selectedLibraryIds() {
const sel = document.getElementById("libraries");
return Array.from(sel.selectedOptions).map(o => parseInt(o.value, 10));
}
function updateDirtyCount() {
const n = document.querySelectorAll("#rows tr.dirty").length;
const btn = document.getElementById("batchSave");
btn.textContent = "Save dirty (" + n + ")";
btn.disabled = n === 0;
}
function makeRow(title, e) {
const tr = document.createElement("tr");
tr.dataset.title = title;
// Title — links to MangaBaka search
const titleTd = document.createElement("td");
titleTd.className = "title";
const a = document.createElement("a");
a.href = MB_SEARCH + encodeURIComponent(title) + "&type=novel";
a.target = "_blank";
a.rel = "noopener";
a.textContent = title;
titleTd.appendChild(a);
tr.appendChild(titleTd);
// mangabakaId (editable)
const idTd = document.createElement("td");
idTd.className = "id";
const idInp = document.createElement("input");
idInp.value = e.mangabakaId || "";
idInp.dataset.original = e.mangabakaId || "";
idInp.addEventListener("input", () => {
if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
else tr.classList.remove("dirty");
updateDirtyCount();
});
idTd.appendChild(idInp);
tr.appendChild(idTd);
// mangabakaName
const nameTd = document.createElement("td");
nameTd.textContent = e.mangabakaName || "";
tr.appendChild(nameTd);
// library
const libTd = document.createElement("td");
const libId = e.libraryId || 0;
libTd.textContent = librariesById[libId] || (libId ? "#" + libId : "");
tr.appendChild(libTd);
// lastUpdateTime
const timeTd = document.createElement("td");
timeTd.textContent = e.lastUpdateTime ? fmtTime(e.lastUpdateTime) : "";
tr.appendChild(timeTd);
// Image
const imgTd = document.createElement("td");
imgTd.className = "image";
const img = document.createElement("img");
img.src = e.imageUrl || "";
img.alt = "";
img.loading = "lazy";
imgTd.appendChild(img);
tr.appendChild(imgTd);
// Actions
const actTd = document.createElement("td");
actTd.className = "actions";
const save = document.createElement("button");
save.textContent = "Save";
save.className = "primary";
save.addEventListener("click", () => saveRow(tr));
actTd.appendChild(save);
const update = document.createElement("button");
update.textContent = "Update";
update.className = "success";
update.style.marginLeft = ".25rem";
update.disabled = !e.kavitaSeriesId;
update.title = e.kavitaSeriesId
? "Push metadata to Kavita series #" + e.kavitaSeriesId
: "Run a Match cycle first so we know the Kavita series id";
update.addEventListener("click", () => updateRow(tr));
actTd.appendChild(update);
const del = document.createElement("button");
del.textContent = "Delete";
del.className = "danger";
del.style.marginLeft = ".25rem";
del.addEventListener("click", () => deleteRow(tr));
actTd.appendChild(del);
tr.appendChild(actTd);
tr._idInp = idInp;
tr._nameTd = nameTd;
tr._img = img;
tr._timeTd = timeTd;
tr._update = update;
return tr;
}
async function saveRow(tr) {
const title = tr.dataset.title;
const newId = tr._idInp.value.trim();
setStatus("Saving " + title + "");
try {
const r = await fetch("/api/matches", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ title: title, mangabakaId: newId }),
});
if (!r.ok) throw new Error(await r.text());
const data = await r.json();
const entry = data.entry || {};
matchesData[title] = entry;
tr._idInp.value = entry.mangabakaId || "";
tr._idInp.dataset.original = entry.mangabakaId || "";
tr._nameTd.textContent = entry.mangabakaName || "";
tr._img.src = entry.imageUrl || "";
tr.classList.remove("dirty");
updateDirtyCount();
setStatus("Saved " + title);
return true;
} catch (err) {
setStatus("Save failed (" + title + "): " + err.message);
return false;
}
}
async function deleteRow(tr) {
const title = tr.dataset.title;
if (!confirm("Delete " + title + "?")) return;
setStatus("Deleting " + title + "");
try {
const r = await fetch("/api/matches/delete", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ title: title }),
});
if (!r.ok) throw new Error(await r.text());
delete matchesData[title];
tr.remove();
document.getElementById("count").textContent =
"(" + Object.keys(matchesData).length + " entries)";
setStatus("Deleted");
} catch (err) {
setStatus("Delete failed: " + err.message);
}
}
async function updateRow(tr) {
const title = tr.dataset.title;
const entry = matchesData[title] || {};
if (!entry.kavitaSeriesId) {
setStatus("No kavitaSeriesId for " + title + " — run match first");
return;
}
setStatus("Updating " + title + "");
tr._update.disabled = true;
try {
const r = await fetch("/api/update", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ kavitaSeriesId: entry.kavitaSeriesId }),
});
if (!r.ok) throw new Error(await r.text());
const res = await r.json();
setStatus(res.ok ? "Updated " + title : "Update failed: " + res.error);
if (res.ok) {
entry.lastUpdateTime = Math.floor(Date.now() / 1000);
tr._timeTd.textContent = fmtTime(entry.lastUpdateTime);
}
} catch (err) {
setStatus("Update failed: " + err.message);
} finally {
tr._update.disabled = false;
}
}
async function batchSave() {
const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
if (dirty.length === 0) return;
if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
setStatus("Batch saving " + dirty.length + " rows…");
let ok = 0, fail = 0;
for (const tr of dirty) {
const success = await saveRow(tr);
if (success) ok++; else fail++;
}
setStatus("Batch: " + ok + " ok, " + fail + " failed");
}
function sortedTitles() {
const titles = Object.keys(matchesData);
const dir = currentSort.asc ? 1 : -1;
if (currentSort.col === "title") {
return titles.sort((a, b) => a.localeCompare(b) * dir);
}
if (currentSort.col === "lastUpdateTime") {
return titles.sort((a, b) => {
const av = matchesData[a].lastUpdateTime || 0;
const bv = matchesData[b].lastUpdateTime || 0;
return (av - bv) * dir;
});
}
return titles;
}
function updateSortArrows() {
for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
const id = "arrow-" + currentSort.col;
const el = document.getElementById(id);
if (el) el.textContent = currentSort.asc ? "" : "";
}
function applyFilter() {
const q = document.getElementById("filter").value.toLowerCase();
const libs = new Set(selectedLibraryIds());
for (const tr of document.querySelectorAll("#rows tr")) {
const title = tr.dataset.title;
const entry = matchesData[title] || {};
const titleMatch = title.toLowerCase().includes(q);
const libMatch = libs.size === 0 || libs.has(entry.libraryId || 0);
tr.style.display = (titleMatch && libMatch) ? "" : "none";
}
}
function render() {
const tbody = document.getElementById("rows");
tbody.innerHTML = "";
for (const t of sortedTitles()) {
tbody.appendChild(makeRow(t, matchesData[t]));
}
updateSortArrows();
applyFilter();
updateDirtyCount();
document.getElementById("count").textContent =
"(" + Object.keys(matchesData).length + " entries)";
}
async function loadLibraries() {
try {
const r = await fetch("/api/libraries");
const data = await r.json();
const libs = data.libraries || [];
const defaults = new Set(data.defaults || []);
librariesById = {};
const sel = document.getElementById("libraries");
sel.innerHTML = "";
for (const lib of libs) {
librariesById[lib.id] = lib.name;
const opt = document.createElement("option");
opt.value = lib.id;
opt.textContent = lib.name + " (#" + lib.id + ")";
if (defaults.has(lib.id)) opt.selected = true;
sel.appendChild(opt);
}
} catch (err) {
setStatus("Failed to load libraries: " + err.message);
}
}
async function load() {
setStatus("Loading…");
try {
const r = await fetch("/api/matches");
const data = await r.json();
matchesData = data.matches || {};
render();
setStatus(Object.keys(matchesData).length + " entries");
} catch (err) {
setStatus("Load failed: " + err.message);
}
}
async function pollJob() {
try {
const r = await fetch("/api/status");
const s = await r.json();
const jobStatus = document.getElementById("jobStatus");
const jobLog = document.getElementById("jobLog");
if (!s.running && !s.lastFinished) {
jobStatus.textContent = "";
jobLog.hidden = true;
stopPolling();
return;
}
jobLog.hidden = false;
jobLog.textContent = (s.log || []).join("\n");
jobLog.scrollTop = jobLog.scrollHeight;
if (s.running) {
jobStatus.textContent = "Running: " + (s.label || "");
} else {
jobStatus.textContent = "Done: " + (s.label || "");
stopPolling();
load();
}
} catch (err) {
/* keep polling silently */
}
}
function startPolling() {
if (jobPollHandle) return;
jobPollHandle = setInterval(pollJob, 1000);
pollJob();
}
function stopPolling() {
if (jobPollHandle) clearInterval(jobPollHandle);
jobPollHandle = null;
}
async function startBuild() {
const libs = selectedLibraryIds();
if (libs.length === 0) {
setStatus("Pick at least one library");
return;
}
if (!confirm("Match every series in " + libs.length + " library(ies)?")) return;
setStatus("Build started");
try {
const r = await fetch("/api/build", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ libraryIds: libs }),
});
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Build failed: " + err.message);
}
}
async function startUpdateAll() {
const libs = selectedLibraryIds();
if (libs.length === 0) {
if (!confirm("No libraries selected — update every cached series?")) return;
} else if (!confirm("Update every cached series in " + libs.length + " library(ies)?")) {
return;
}
setStatus("Update-all started");
try {
const r = await fetch("/api/update-all", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ libraryIds: libs.length ? libs : null }),
});
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Update-all failed: " + err.message);
}
}
async function startSyncPersons() {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
setStatus("Person sync started");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
startPolling();
} catch (err) {
setStatus("Person sync failed: " + err.message);
}
}
document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("libraries").addEventListener("change", applyFilter);
document.getElementById("reload").addEventListener("click", load);
document.getElementById("batchSave").addEventListener("click", batchSave);
document.getElementById("build").addEventListener("click", startBuild);
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
document.getElementById("syncPersons").addEventListener("click", startSyncPersons);
for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => {
const col = th.dataset.col;
if (currentSort.col === col) currentSort.asc = !currentSort.asc;
else { currentSort.col = col; currentSort.asc = true; }
render();
});
}
(async () => {
await loadLibraries();
await load();
// Resume polling if there's a job running from a previous session
pollJob();
})();
</script>
</body>
</html>
"""
class _JobState:
"""Thread-safe container for the current background job's progress."""
def __init__(self):
self._lock = threading.Lock()
self._running = False
self._label = ""
self._log: list[str] = []
self._last_finished_at = 0
self._thread: "threading.Thread | None" = None
def start(self, label: str, target, *args, **kwargs) -> bool:
with self._lock:
if self._running:
return False
self._running = True
self._label = label
self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"]
def runner():
try:
target(self, *args, **kwargs)
except Exception as exc:
self.append(f"FATAL: {exc}")
finally:
with self._lock:
self._running = False
self._last_finished_at = int(time.time())
self.append(f"[{time.strftime('%H:%M:%S')}] finished")
self._thread = threading.Thread(target=runner,
name=f"job:{label}",
daemon=True)
self._thread.start()
return True
def append(self, line: str) -> None:
with self._lock:
self._log.append(line)
# Cap log length so the response stays bounded.
if len(self._log) > 1000:
self._log = self._log[-800:]
def snapshot(self) -> dict:
with self._lock:
return {
"running": self._running,
"label": self._label,
"log": list(self._log),
"lastFinished": self._last_finished_at,
}
class MatchesWebApp:
def __init__(self, cache: MatchesCache, *,
orchestrator=None,
default_library_ids: "list[int] | None" = None,
person_perf=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._orchestrator = orchestrator
self._defaults = list(default_library_ids or [])
self._person_perf = person_perf
self._host = host
self._port = port
self._job = _JobState()
self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None
self._register_routes()
@property
def app(self) -> Flask:
return self._app
def start(self) -> threading.Thread:
if self._thread is not None and self._thread.is_alive():
return self._thread
self._thread = threading.Thread(
target=self._app.run,
kwargs={"host": self._host, "port": self._port,
"debug": False, "use_reloader": False,
"threaded": True},
name="MatchesWebApp",
daemon=False,
)
self._thread.start()
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
flush=True)
return self._thread
def wait(self) -> None:
if self._thread is not None:
self._thread.join()
# ------------------------------------------------------------------
# Routes
# ------------------------------------------------------------------
def _register_routes(self) -> None:
app = self._app
cache = self._cache
@app.get("/")
def index() -> Response:
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
@app.get("/api/libraries")
def api_libraries():
if self._orchestrator is None:
return jsonify([])
try:
libs = self._orchestrator.list_libraries()
except Exception as exc:
return Response(f"libraries failed: {exc}", status=502)
return jsonify({"libraries": libs, "defaults": self._defaults})
@app.get("/api/matches")
def api_list():
raw = request.args.get("libraryIds") or ""
lib_ids = _int_list(raw.split(","))
if lib_ids:
return jsonify(cache.all_in_libraries(lib_ids))
return jsonify(cache.all())
@app.post("/api/matches")
def api_upsert():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
new_id_raw = body.get("mangabakaId")
new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
if not new_id:
return Response("mangabakaId is required", status=400)
new_name: "str | None" = None
new_image: "str | None" = None
if self._orchestrator is not None:
try:
series = self._orchestrator.fetch_series(new_id)
except Exception as exc:
return Response(f"resolve failed: {exc}", status=502)
if not series:
return Response(
f"MangaBaka has no series with id {new_id}",
status=404)
new_name = series.get("title") or ""
new_image = pick_thumbnail_url(series.get("cover")) or ""
entry = cache.upsert(
title,
mangabaka_id=new_id,
mangabaka_name=new_name,
image_url=new_image,
)
return jsonify({"title": title, "entry": entry})
@app.post("/api/matches/delete")
def api_delete():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
removed = cache.remove(title)
return jsonify({"removed": removed, "title": title})
@app.post("/api/build")
def api_build():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
body = request.get_json(silent=True) or {}
library_ids = _int_list(body.get("libraryIds"))
if not library_ids:
return Response("libraryIds required", status=400)
label = f"match libraries {library_ids}"
def task(job: _JobState, lib_ids):
stats = self._orchestrator.build_matches(lib_ids)
job.append(f"matched={stats.get('matched')} "
f"skipped={stats.get('skipped')} "
f"missing={stats.get('missing')} "
f"checked={stats.get('checked')}")
if not self._job.start(label, task, library_ids):
return Response("a job is already running", status=409)
return jsonify({"started": label})
@app.post("/api/update")
def api_update():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
body = request.get_json(silent=True) or {}
ksid = body.get("kavitaSeriesId")
try:
ksid_int = int(ksid)
except (TypeError, ValueError):
return Response("kavitaSeriesId required", status=400)
try:
res = self._orchestrator.update_series(ksid_int)
except Exception as exc:
return Response(f"update failed: {exc}", status=500)
return jsonify(res)
@app.post("/api/update-all")
def api_update_all():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
body = request.get_json(silent=True) or {}
raw = body.get("libraryIds")
library_ids = None if raw is None else _int_list(raw)
label = ("update all (every library)" if library_ids is None
else f"update all in libraries {library_ids}")
def task(job: _JobState, lib_ids):
summary = self._orchestrator.update_all(lib_ids)
job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}")
for res in summary.get("results", []):
title = res.get("title", "?")
if res.get("ok"):
flags = []
sr = res.get("series") or {}
for k, v in sr.items():
if v == "changed":
flags.append(k)
job.append(
f" {title}: changed=[{', '.join(flags) or '-'}]")
else:
job.append(f" {title}: FAIL {res.get('error')}")
if not self._job.start(label, task, library_ids):
return Response("a job is already running", status=409)
return jsonify({"started": label})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._orchestrator is None:
return Response("no orchestrator configured", status=503)
def task(job: _JobState):
report = self._orchestrator.sync_persons(
trigger="ln", perf=self._person_perf)
job.append(f"updated={report['updated']} "
f"skipped={report['skipped']} "
f"not_found={report['not_found']} "
f"conflicts={report['conflicts']}")
for err in report.get("errors", []):
job.append(f" {err}")
if not self._job.start("person sync", task):
return Response("a job is already running", status=409)
return jsonify({"started": "person sync"})
@app.get("/api/status")
def api_status():
snap = self._job.snapshot()
snap["defaults"] = self._defaults
return jsonify(snap)
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "person") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._person_perf if name == "person" else None
return jsonify(stats.all() if stats is not None else {"runs": []})
+174
View File
@@ -0,0 +1,174 @@
"""
relationship_sync.py
====================
Mirrors MangaBaka's ``relationships_v2`` graph into Kavita:
1. Every related MangaBaka series that is *also* present in Kavita
(resolved via MatchesCache) is added to a shared Kavita collection
so the whole franchise can be browsed in one place.
2. Series-level relationships (prequel / sequel / spin-off / …) are
written via ``POST /api/Series/update-related`` so navigating
between entries surfaces the right neighbours.
Only relationships where both endpoints exist in Kavita are written.
Relationships pointing to series that have not been imported yet are
silently skipped (the next match run picks them up).
"""
from __future__ import annotations
from KavitaClient import KavitaClient
from MatchesCache import MatchesCache
# MangaBaka relation_type -> Kavita UpdateRelatedSeriesDto bucket
_RELATION_MAP = {
"prequel": "prequels",
"sequel": "sequels",
"side_story": "sideStories",
"spin_off": "spinOffs",
"spinoff": "spinOffs",
"alternative_version": "alternativeVersions",
"alternative_story": "alternativeVersions",
"alternative_setting": "alternativeSettings",
"adapted_from": "adaptations",
"adaptation": "adaptations",
"doujinshi": "doujinshis",
"parent": "contains", # the parent "contains" the child
}
_ALL_BUCKETS = (
"adaptations", "characters", "contains", "others",
"prequels", "sequels", "sideStories", "spinOffs",
"alternativeSettings", "alternativeVersions", "doujinshis",
"editions", "annuals",
)
class RelationshipSync:
def __init__(self, client: KavitaClient, cache: MatchesCache, *,
builder=None):
"""
Parameters
----------
client : KavitaClient for collection / relation writes.
cache : MatchesCache to resolve mangabakaId -> kavitaSeriesId.
builder : optional LightNovelMetadataBuilder used to fetch parent
series titles when picking the collection name.
"""
self._client = client
self._cache = cache
self._builder = builder
# ------------------------------------------------------------------
# Public
# ------------------------------------------------------------------
def sync(self, kavita_series_id: int, built: dict) -> dict:
"""
Applies the relationship and collection links described by
`built["relationships"]` (raw MangaBaka relationships_v2 list)
for the given Kavita series. Returns a small status dict.
"""
report: dict = {"relations": {}, "collection": None,
"missing_series": []}
relationships = built.get("relationships") or []
if not relationships:
return report
# Resolve mangabakaId -> kavitaSeriesId for every related entry.
related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS}
all_kavita_ids: set[int] = set()
for rel in relationships:
mb_id = rel.get("to_series_id")
if mb_id is None:
continue
hit = self._cache.get_by_mangabaka_id(mb_id)
if not hit:
report["missing_series"].append(int(mb_id))
continue
_title, entry = hit
ksid = int(entry.get("kavitaSeriesId") or 0)
if not ksid:
report["missing_series"].append(int(mb_id))
continue
bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(),
"others")
if ksid not in related[bucket]:
related[bucket].append(ksid)
all_kavita_ids.add(ksid)
# ----- Relationships ------------------------------------------
if any(related.values()):
payload = {"seriesId": int(kavita_series_id)}
for bucket in _ALL_BUCKETS:
payload[bucket] = related[bucket]
try:
self._client.update_related(payload)
report["relations"] = {k: v for k, v in related.items() if v}
except Exception as exc:
report["relations"] = {"error": str(exc)}
# ----- Collection ---------------------------------------------
# Include the current series in the collection so it shows up too.
all_kavita_ids.add(int(kavita_series_id))
if len(all_kavita_ids) >= 2:
collection_name = self._collection_name(built, relationships)
collection_id = self._find_collection_id(collection_name)
try:
self._client.add_series_to_collection(
collection_id=collection_id,
title=collection_name,
series_ids=sorted(all_kavita_ids),
)
report["collection"] = collection_name
except Exception as exc:
report["collection"] = f"error: {exc}"
return report
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
def _find_collection_id(self, name: str) -> int:
"""Returns the id of an existing collection by title, or 0 to create."""
if not name:
return 0
target = name.strip().lower()
try:
for col in self._client.list_collections():
if (col.get("title") or "").strip().lower() == target:
try:
return int(col.get("id") or 0)
except (TypeError, ValueError):
return 0
except Exception:
pass
return 0
def _collection_name(self, built: dict,
relationships: list[dict]) -> str:
"""
Picks the collection name. Uses the parent series title from
MangaBaka if the current series has one; otherwise falls back to
the current series' own title.
"""
for rel in relationships:
if (rel.get("relation_type") or "").lower() == "parent":
parent_id = rel.get("to_series_id")
if parent_id is not None and self._builder is not None:
try:
parent_md = self._builder.fetch_series(parent_id)
if parent_md and parent_md.get("title"):
return parent_md["title"]
except Exception:
pass
# Even without a builder, the cache may know the parent.
hit = self._cache.get_by_mangabaka_id(parent_id)
if hit:
_title, entry = hit
name = entry.get("mangabakaName")
if name:
return name
return built.get("mangabakaTitle") or ""
@@ -37,19 +37,27 @@ Data source notes
from __future__ import annotations
import difflib
import re
import sys
import xml.etree.ElementTree as ET
from contextlib import contextmanager
from pathlib import Path
import requests
# Shared modules live one level up (src/); needed when a module in this
# folder is run directly as a script (the entry points set the path).
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache, _IMAGE_EXTS
from TextUtils import person_name_with_id
try:
from PIL import Image
@@ -58,11 +66,19 @@ except ImportError:
_HAS_PIL = False
@contextmanager
def _no_measure():
"""No-op stand-in for a perf recorder's measure() context manager."""
yield
# Sentinel marking a per-chapter memo slot as "not computed yet".
_UNSET = object()
# --------------------------------------------------------------------------
# Constants
# --------------------------------------------------------------------------
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
# Series types accepted by the MangaBaka search endpoint. Light/web novels
# are filtered out because this pipeline only handles image-based manga.
# Passed to `requests` as a list so each value becomes its own `&type=...`
@@ -179,7 +195,8 @@ class ComicInfoBuilder:
works_resolver: "MangaBakaWorksResolver | None" = None,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None,
matches_cache: "MatchesCache | None" = None):
matches_cache: "MatchesCache | None" = None,
cover_cache: "CoverCache | None" = None):
if not manga_title or not str(manga_title).strip():
raise ValueError("manga_title must not be empty.")
@@ -210,11 +227,24 @@ class ComicInfoBuilder:
self._al_resolver = al_resolver or AniListResolver(
request_timeout=request_timeout)
self._matches_cache = matches_cache
self._cover_cache = cover_cache or _default_cover_cache()
# Optional performance recorder (duck-typed: any object with a
# .measure(name) context manager). The mover sets this per chapter;
# when None, _measure() is a no-op so the builder stays decoupled
# from PerfStats and works standalone (e.g. the cover updater).
self.perf = None
self._metadata: "dict | None" = None
self._pages: list[dict] = []
self._cover_path: "Path | None" = None
self._suwayomi_data: dict = {}
# Per-chapter memo for _determine_volume (resolved up to 3x/chapter
# otherwise: cover download, explicit volume step, XML build).
self._volume_memo = _UNSET
# Per-series cache for full series fetches by id (parent series for
# SeriesGroup, merged-series redirects) — reused across all chapters.
self._series_by_id_cache: dict[str, dict] = {}
# ----- Repr -----------------------------------------------------------
def __repr__(self) -> str:
@@ -254,6 +284,13 @@ class ComicInfoBuilder:
self._pages = []
self._cover_path = None
self._suwayomi_data = {}
self._volume_memo = _UNSET
def _measure(self, name: str):
"""Times a named step on the attached recorder; no-op when unset."""
if self.perf is not None:
return self.perf.measure(name)
return _no_measure()
# ======================================================================
# Public XML functions
@@ -298,11 +335,13 @@ class ComicInfoBuilder:
if not folder.is_dir():
raise NotADirectoryError(f"Folder not found: {folder}")
self._suwayomi_data = self._read_existing_comicinfo(folder)
with self._measure("read_comicinfo"):
self._suwayomi_data = self._read_existing_comicinfo(folder)
self._cover_path = None
if download_cover:
self._cover_path = self._download_cover(folder, cover_filename)
with self._measure("cover"):
self._cover_path = self._download_cover(folder, cover_filename)
cover_resolved = self._cover_path.resolve() if self._cover_path else None
story_images: list[Path] = []
@@ -322,20 +361,23 @@ class ComicInfoBuilder:
ordered.extend((img, "Story") for img in story_images)
self._pages = []
for index, (img_path, page_type) in enumerate(ordered):
width, height = self._image_dimensions(img_path)
try:
size = img_path.stat().st_size
except OSError:
size = None
self._pages.append({
"image": index,
"type": page_type,
"width": width,
"height": height,
"size": size,
"double": bool(width and height and width > height),
})
# Probing every page for its pixel dimensions reads each file — on a
# network share this is often the dominant per-chapter cost.
with self._measure("image_dimensions"):
for index, (img_path, page_type) in enumerate(ordered):
width, height = self._image_dimensions(img_path)
try:
size = img_path.stat().st_size
except OSError:
size = None
self._pages.append({
"image": index,
"type": page_type,
"width": width,
"height": height,
"size": size,
"double": bool(width and height and width > height),
})
return {
"page_count": len(self._pages),
@@ -406,12 +448,20 @@ class ComicInfoBuilder:
return series
def _fetch_series_by_id(self, series_id) -> dict:
# Cached per builder (i.e. per series): SeriesGroup resolution calls
# this for the parent on every chapter — without the cache that is
# one MangaBaka request per chapter for the same parent id.
key = str(series_id)
cached = self._series_by_id_cache.get(key)
if cached is not None:
return cached
url = f"{self.api_base_url}/series/{series_id}"
resp = self._session.get(url, timeout=self.request_timeout)
resp.raise_for_status()
data = resp.json().get("data")
if not data:
raise RuntimeError(f"Series with ID {series_id} not found.")
self._series_by_id_cache[key] = data
return data
# ======================================================================
@@ -483,9 +533,19 @@ class ComicInfoBuilder:
add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or [])))
# ----- Characters — MAL first, AniList fallback ---------------------
characters = self._mal_resolver.get_characters(mal_id)
if not characters and al_id:
characters = self._al_resolver.get_characters(al_id)
# Names are disambiguated with the tracker *character* id
# ("Rem (MAL 118737)") so same-named characters from different
# series stay separate Kavita person records. The format is shared
# with the light-novel updater — see TextUtils.person_name_with_id.
char_entries = self._mal_resolver.get_characters_detailed(mal_id)
if not char_entries and al_id:
char_entries = self._al_resolver.get_characters_detailed(al_id)
characters = [
person_name_with_id(e.get("name"),
mal_id=e.get("mal_id"),
al_id=e.get("al_id"))
for e in char_entries if (e.get("name") or "").strip()
]
add("Characters", ", ".join(characters) if characters else None)
# ----- Web links ----------------------------------------------------
@@ -537,6 +597,18 @@ class ComicInfoBuilder:
# Volume determination
# ======================================================================
def _determine_volume(self) -> "str | None":
"""
Resolves the volume for the current chapter, memoized per chapter.
The result is reused across the three call sites per chapter (cover
download, explicit volume step, XML build); the memo is cleared
whenever the chapter or manga title changes (see _clear_results).
"""
if self._volume_memo is _UNSET:
self._volume_memo = self._resolve_volume()
return self._volume_memo
def _resolve_volume(self) -> "str | None":
"""
Resolves the volume for the current chapter via MangaDex.
Falls back to estimation when the chapter is absent from MangaDex.
@@ -580,11 +652,13 @@ class ComicInfoBuilder:
# ======================================================================
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
"""
Downloads the cover for the current chapter/volume.
Fetches the cover for the current chapter/volume and writes it into
`folder`.
If a volume is known and a volume-specific cover exists in MangaBaka
works, that cover is used. Otherwise the series default cover is
downloaded (raw variant preferred).
If a volume is known and a volume-specific cover exists in MangaBaka,
that cover is used; otherwise the series default cover. The image
itself comes from the CoverCache, so a cover shared by many chapters
is downloaded only once.
"""
md = self._get_metadata()
volume = self._determine_volume()
@@ -602,18 +676,13 @@ class ComicInfoBuilder:
if not cover_url:
cover_url = _pick_cover_url(md.get("cover"))
if not cover_url:
fetched = self._cover_cache.get(cover_url) if cover_url else None
if not fetched:
return None
try:
resp = self._session.get(cover_url, timeout=self.request_timeout)
resp.raise_for_status()
except requests.RequestException:
return None
ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
data, ext = fetched
target = folder / f"{cover_filename}{ext}"
target.write_bytes(resp.content)
target.write_bytes(data)
return target
# ======================================================================
@@ -656,6 +725,41 @@ class ComicInfoBuilder:
"manhua": ("zh-latn",),
}
@staticmethod
def _pick_best_title(titles, language_codes: tuple,
prefer_trait: "str | None" = None) -> "str | None":
"""
Picks the highest-scoring entry from a MangaBaka `titles` list for
any of the given language codes.
Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
(+1); first seen wins on ties. Returns None when no entry matches.
"""
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in language_codes:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if prefer_trait and prefer_trait in traits:
score += 4
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score, best_title = score, title
return best_title
@classmethod
def _romanized_for_native(cls, md: dict) -> "str | None":
"""
@@ -686,30 +790,7 @@ class ComicInfoBuilder:
return None
titles = md.get("titles") or md.get("alt_titles") or []
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in langs:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score = score
best_title = title
return best_title
return cls._pick_best_title(titles, langs)
def _get_sort_title(self, md: dict) -> "str | None":
"""
@@ -745,31 +826,7 @@ class ComicInfoBuilder:
def pick(language_codes: tuple, prefer_trait: "str | None" = None
) -> "str | None":
"""Picks the best title entry for any of the given language codes."""
if not isinstance(titles, list):
return None
best_score = -1
best_title: "str | None" = None
for entry in titles:
if not isinstance(entry, dict):
continue
lang = (entry.get("language") or entry.get("lang") or "").lower()
if lang not in language_codes:
continue
title = entry.get("title")
if not title:
continue
traits = entry.get("traits") or []
score = 0
if prefer_trait and prefer_trait in traits:
score += 4
if "official" in traits:
score += 2
if entry.get("is_primary"):
score += 1
if score > best_score:
best_score, best_title = score, title
return best_title
return self._pick_best_title(titles, language_codes, prefer_trait)
result: dict[str, str] = {}
@@ -1080,6 +1137,18 @@ class ComicInfoBuilder:
# generic image-block picker; _pick_cover_url is kept for backward compat.
_pick_cover_url = _pick_image_url
# Shared fallback CoverCache for builders constructed without an explicit
# one (temporary directory, removed at process exit). Created lazily so
# importing this module never touches the filesystem.
_shared_cover_cache: "CoverCache | None" = None
def _default_cover_cache() -> CoverCache:
global _shared_cover_cache
if _shared_cover_cache is None:
_shared_cover_cache = CoverCache()
return _shared_cover_cache
def _pick_thumbnail_url(cover) -> "str | None":
"""
@@ -1113,17 +1182,6 @@ def _pick_thumbnail_url(cover) -> "str | None":
return _pick_cover_url(cover)
def _guess_extension(url: str, content_type: str) -> str:
url_ext = Path(url.split("?")[0]).suffix.lower()
if url_ext in _IMAGE_EXTS:
return url_ext
ct = (content_type or "").lower()
if "png" in ct: return ".png"
if "webp" in ct: return ".webp"
if "gif" in ct: return ".gif"
return ".jpg"
# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
@@ -44,7 +44,7 @@ Dependencies
from __future__ import annotations
import io
import threading
import sys
import xml.etree.ElementTree as ET
import zipfile
from datetime import datetime
@@ -52,7 +52,12 @@ from pathlib import Path
import requests
from ComicInfoBuilder import (ComicInfoBuilder, _guess_extension, _IMAGE_EXTS)
# Shared modules live one level up (src/); needed when a module in this
# folder is run directly as a script (the entry points set the path).
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from ComicInfoBuilder import ComicInfoBuilder
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
@@ -61,7 +66,8 @@ from MatchesCache import MatchesCache
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
_sanitize_dirname, _normalise_volume_value)
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CronSchedule import CronSchedule
from PerfStats import PerfStats
from CoverCache import CoverCache, _IMAGE_EXTS
try:
from PIL import Image
@@ -129,10 +135,12 @@ class KavitaVolumeCoverUpdater:
request_timeout : HTTP timeout in seconds.
log_path : File that receives one line per updated chapter.
Default: <kavita_path>/volume_updater.log
schedule : Cron expression (5 fields) defining when scans run,
e.g. "0 19 * * 1,4" = 19:00 every Monday and
Thursday. Evaluated in local time set the TZ env
var inside Docker. Default: "0 19 * * 1,4".
cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing.
Scheduling lives outside this class (see CronRunner); call update_all()
on whatever cadence you like.
"""
def __init__(self,
@@ -143,7 +151,8 @@ class KavitaVolumeCoverUpdater:
request_timeout: int = 30,
api_base_url: str = "https://api.mangabaka.dev/v1",
log_path=None,
schedule: str = "0 19 * * 1,4"):
cover_cache_dir=None,
perf_stats: "PerfStats | None" = None):
self._dst = Path(kavita_path)
self._matches_cache = matches_cache
self._language = language
@@ -151,7 +160,7 @@ class KavitaVolumeCoverUpdater:
self._api_base_url = api_base_url.rstrip("/")
self._log_path = (Path(log_path) if log_path
else self._dst / "volume_updater.log")
self._cron = CronSchedule(schedule)
self._perf = perf_stats or PerfStats(None)
session = requests.Session()
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
@@ -165,51 +174,8 @@ class KavitaVolumeCoverUpdater:
self._works_resolver = MangaBakaWorksResolver(
api_base_url=api_base_url,
request_timeout=request_timeout, session=session)
self._stop = threading.Event()
self._thread: "threading.Thread | None" = None
# ------------------------------------------------------------------
# Cron API (mirrors SuwayomiFolderWatcher)
# ------------------------------------------------------------------
def start(self) -> None:
"""Starts the periodic scan thread. Non-blocking."""
if self._thread is not None and self._thread.is_alive():
return
self._stop.clear()
self._thread = threading.Thread(
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
self._thread.start()
print(f"[{_now()}] [updater] scanning {self._dst} "
f"on cron '{self._cron.expression}'", flush=True)
def stop(self) -> None:
"""Stops the scan thread (current scan finishes its series first)."""
self._stop.set()
if self._thread is not None:
self._thread.join(timeout=10)
def wait(self) -> None:
"""Blocks the calling thread until stop() is invoked."""
self._stop.wait()
def _loop(self) -> None:
while not self._stop.is_set():
next_run = self._cron.next_after(datetime.now())
wait = max(0.0, (next_run - datetime.now()).total_seconds())
print(f"[{_now()}] [updater] next scheduled scan: "
f"{next_run.isoformat(timespec='minutes')}", flush=True)
if self._stop.wait(wait):
break
try:
summary = self.update_all()
print(f"[{_now()}] [updater] scan done: "
f"{summary['series_updated']} series / "
f"{summary['chapters_updated']} chapters updated",
flush=True)
except Exception as exc:
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
# ------------------------------------------------------------------
# Public scan API
@@ -225,23 +191,31 @@ class KavitaVolumeCoverUpdater:
print(f"[updater] kavita path missing: {self._dst}", flush=True)
return summary
for series_dir in sorted(self._dst.iterdir()):
if self._stop.is_set():
break
if not series_dir.is_dir():
continue
summary["series_scanned"] += 1
try:
updated = self.update_series(series_dir)
except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue
if updated:
summary["series_updated"] += 1
summary["chapters_updated"] += updated
# The whole point of a scan is detecting volume assignments added
# since the previous run — start from fresh API data, not the
# process-lifetime resolver caches.
self._vol_resolver.clear_cache()
self._works_resolver.clear_cache()
run = self._perf.begin_run()
try:
for series_dir in sorted(self._dst.iterdir()):
if not series_dir.is_dir():
continue
summary["series_scanned"] += 1
try:
updated = self.update_series(series_dir, run)
except Exception as exc:
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
continue
if updated:
summary["series_updated"] += 1
summary["chapters_updated"] += updated
finally:
run.finish()
return summary
def update_series(self, series_dir: Path) -> int:
def update_series(self, series_dir: Path, run=None) -> int:
"""
Updates one series folder. Returns the number of updated chapters.
@@ -277,24 +251,29 @@ class KavitaVolumeCoverUpdater:
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
cover_cache=self._cover_cache,
)
md = builder.fetch_metadata()
series_id = str(md.get("id") or "")
series_rec = (run or self._perf.begin_run()).begin_item(series_dir.name)
# Resolve volumes for all null-volume chapters first (API only).
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num
try:
volume = builder._determine_volume()
except Exception:
volume = None
if not volume:
continue
updates[num] = {"volume": volume,
"cover": self._fetch_cover(series_id, volume)}
with series_rec.measure("resolve_volumes"):
for num in sorted(missing, key=_chapter_sort_value):
builder.chapter = num
try:
volume = builder._determine_volume()
except Exception:
volume = None
if not volume:
continue
updates[num] = {"volume": volume,
"cover": self._fetch_cover(series_id, volume)}
if not updates:
series_rec.finish(ok=True)
return 0
first = min(chapters, key=_chapter_sort_value)
@@ -309,10 +288,13 @@ class KavitaVolumeCoverUpdater:
continue
# The first chapter gets a full metadata rebuild (Kavita reads
# series metadata from it); other chapters only a volume edit.
ok, cover_swapped = self._apply_update(
cbz, builder, num,
volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first))
chap_rec = series_rec.begin_item(num)
with chap_rec.measure("archive_rewrite"):
ok, cover_swapped = self._apply_update(
cbz, builder, num,
volume=up["volume"], cover=up["cover"],
full_rebuild=(num == first))
chap_rec.finish(ok=ok)
if not ok:
continue
entry["volume"] = _normalise_volume_value(up["volume"])
@@ -327,15 +309,19 @@ class KavitaVolumeCoverUpdater:
first_entry = chapters.get(first) or {}
cbz = series_dir / (first_entry.get("archiveName") or "")
if first_entry.get("archiveName") and cbz.is_file():
ok, _ = self._apply_update(
cbz, builder, first,
volume=None, cover=None, full_rebuild=True)
chap_rec = series_rec.begin_item(f"{first} (refresh)")
with chap_rec.measure("archive_rewrite"):
ok, _ = self._apply_update(
cbz, builder, first,
volume=None, cover=None, full_rebuild=True)
chap_rec.finish(ok=ok)
if ok:
self._log(f"{series_dir.name} | chapter {first} | "
f"first-chapter metadata refreshed | {cbz.name}")
if updated:
_save_chapter_index(series_dir, index)
series_rec.finish(ok=True)
return updated
# ------------------------------------------------------------------
@@ -367,7 +353,8 @@ class KavitaVolumeCoverUpdater:
# ------------------------------------------------------------------
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
"""
Downloads the MangaBaka volume cover.
Fetches the MangaBaka volume cover via the CoverCache (one download
per unique URL, even across chapters sharing a volume).
Returns ("000<ext>", bytes) or None when no cover is available.
"""
try:
@@ -376,13 +363,11 @@ class KavitaVolumeCoverUpdater:
url = None
if not url:
return None
try:
resp = self._session.get(url, timeout=self._timeout)
resp.raise_for_status()
except requests.RequestException:
fetched = self._cover_cache.get(url)
if not fetched:
return None
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
return (f"000{ext}", resp.content)
data, ext = fetched
return (f"000{ext}", data)
# ------------------------------------------------------------------
# Archive update (single read + single write per archive)
@@ -527,10 +512,7 @@ if __name__ == "__main__":
matches_cache=MatchesCache(MATCHES_PATH),
)
# One-shot scan (no cron thread):
# One-shot scan. Scheduling is handled externally via CronRunner
# (see main_manga.py).
summary = updater.update_all()
print(f"\n[updater] {summary}")
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
# updater.start()
# updater.wait()
@@ -43,7 +43,6 @@ Dependencies
from __future__ import annotations
import difflib
import re
import requests
@@ -94,6 +93,9 @@ class MangaDexVolumeResolver:
self._cache: dict[str, dict] = {}
# Cache: manga_id -> {relation_type: [title, ...]}
self._relations_cache: dict[str, dict] = {}
# Cache: title_lower -> manga_id (or None) — avoids repeating the
# MangaDex search for every chapter of the same series.
self._id_cache: dict[str, "str | None"] = {}
# ----------------------------------------------------------------------
# Locate the manga ID
@@ -106,15 +108,25 @@ class MangaDexVolumeResolver:
if not title or not title.strip():
return None
resp = self._session.get(
f"{self.base_url}/manga",
params={"title": title, "limit": 5,
"contentRating[]": ["safe", "suggestive",
"erotica", "pornographic"]},
timeout=self.request_timeout)
resp.raise_for_status()
results = resp.json().get("data") or []
key = title.strip().lower()
if key in self._id_cache:
return self._id_cache[key]
try:
resp = self._session.get(
f"{self.base_url}/manga",
params={"title": title, "limit": 5,
"contentRating[]": ["safe", "suggestive",
"erotica", "pornographic"]},
timeout=self.request_timeout)
resp.raise_for_status()
results = resp.json().get("data") or []
except requests.RequestException:
# Don't cache transient failures — allow a retry next time.
return None
if not results:
self._id_cache[key] = None
return None
def score(entry) -> float:
@@ -131,7 +143,9 @@ class MangaDexVolumeResolver:
return best
results.sort(key=score, reverse=True)
return results[0].get("id")
manga_id = results[0].get("id")
self._id_cache[key] = manga_id
return manga_id
# ----------------------------------------------------------------------
# Main function: retrieve and return volume / chapter data
@@ -30,6 +30,10 @@ from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
from ComicInfoBuilder import _pick_thumbnail_url
from PerfWebPage import render_perf_page
# Cross-link tabs shown on every perf page in the manga container.
_PERF_TABS = [("move", "move"), ("volume/cover", "volume"), ("persons", "person")]
_INDEX_HTML = """<!doctype html>
@@ -71,6 +75,8 @@ _INDEX_HTML = """<!doctype html>
<button id="batchSave" class="primary">Save dirty (0)</button>
<button id="build">Build all (rescan)</button>
<button id="move">Start move</button>
<button id="syncPersons">Sync persons</button>
<a href="/perf/move" style="margin-left:.5rem;color:#60a5fa;">Performance </a>
<span class="status" id="status"></span>
</div>
@@ -341,6 +347,23 @@ document.getElementById("move").addEventListener("click", async () => {
btn.disabled = false;
}
});
document.getElementById("syncPersons").addEventListener("click", async () => {
if (!confirm("Sync all Kavita persons against MAL/AniList? May take a while.")) return;
const btn = document.getElementById("syncPersons");
btn.disabled = true;
setStatus("Syncing persons… (running on the server)");
try {
const r = await fetch("/api/persons/sync", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
const d = await r.json();
setStatus("Persons: " + d.updated + " updated, " + d.skipped + " skipped, "
+ d.not_found + " not found, " + d.conflicts + " conflicts");
} catch (err) {
setStatus("Person sync failed: " + err.message);
} finally {
btn.disabled = false;
}
});
for (const th of document.querySelectorAll("th.sortable")) {
th.addEventListener("click", () => {
const col = th.dataset.col;
@@ -357,6 +380,8 @@ load();
"""
class MatchesWebApp:
"""
Flask app exposing the MatchesCache. `mover` is required when you want
@@ -367,14 +392,22 @@ class MatchesWebApp:
def __init__(self, cache: MatchesCache, *,
mover=None,
person_updater=None,
person_trigger: str = "web",
perf_stats=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._mover = mover
self._person_updater = person_updater
self._person_trigger = person_trigger
# perf_stats: dict {name -> PerfStats}, e.g. {"move", "volume", "person"}.
self._perf = perf_stats or {}
self._host = host
self._port = port
self._build_lock = threading.Lock()
self._move_lock = threading.Lock()
self._person_lock = threading.Lock()
self._app = Flask(__name__)
self._thread: "threading.Thread | None" = None
self._register_routes()
@@ -498,3 +531,31 @@ class MatchesWebApp:
finally:
self._move_lock.release()
return jsonify({"results": results})
@app.post("/api/persons/sync")
def api_persons_sync():
if self._person_updater is None:
return Response("no person updater configured", status=503)
if not self._person_lock.acquire(blocking=False):
return Response("person sync already running", status=409)
try:
report = self._person_updater.update_all_persons(
trigger=self._person_trigger,
perf=self._perf.get("person"))
except Exception as exc:
return Response(f"person sync failed: {exc}", status=500)
finally:
self._person_lock.release()
return jsonify(report)
# Perf pages: /perf (move) + /perf/<name> for the updaters.
@app.get("/perf")
@app.get("/perf/<name>")
def perf_page(name: str = "move") -> Response:
return Response(render_perf_page(name, _PERF_TABS),
mimetype="text/html; charset=utf-8")
@app.get("/api/perf/<name>")
def api_perf(name: str):
stats = self._perf.get(name)
return jsonify(stats.all() if stats is not None else {"runs": []})
@@ -29,7 +29,6 @@ from __future__ import annotations
import queue
import threading
import time
from datetime import datetime
from pathlib import Path
@@ -46,23 +46,30 @@ from __future__ import annotations
import json
import re
import shutil
import sys
import xml.etree.ElementTree as ET
import zipfile
from pathlib import Path
import requests
from ComicInfoBuilder import (ComicInfoBuilder, _pick_cover_url, _pick_thumbnail_url, _SEARCH_TYPES)
# Shared modules live one level up (src/); needed when a module in this
# folder is run directly as a script (the entry points set the path).
if __name__ == "__main__":
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
_SEARCH_TYPES, _natural_key)
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
from CoverCache import CoverCache, _IMAGE_EXTS
from PerfStats import PerfStats
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
# JSON file written into each Kavita series folder, listing every chapter
@@ -133,11 +140,6 @@ _SOURCE_LABEL_RE = re.compile(
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
def _natural_key(name: str) -> list:
return [int(p) if p.isdigit() else p.lower()
for p in re.split(r"(\d+)", name)]
def _sanitize_dirname(name: str) -> str:
"""
Makes a string safe to use as a Windows (or SMB) directory name.
@@ -192,34 +194,6 @@ def _clean_suwayomi_title(title: str) -> str:
return _SOURCE_LABEL_RE.sub("", title).strip()
def _mal_id_from_metadata(md: dict) -> "int | None":
"""Extracts the MAL ID from a MangaBaka series dict's source map."""
for raw_key, info in (md.get("source") or {}).items():
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
if isinstance(info, dict):
mal_id = info.get("id")
if mal_id is not None:
try:
return int(mal_id)
except (TypeError, ValueError):
pass
return None
def _al_id_from_metadata(md: dict) -> "int | None":
"""Extracts the AniList ID from a MangaBaka series dict's source map."""
for raw_key, info in (md.get("source") or {}).items():
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
if isinstance(info, dict):
al_id = info.get("id")
if al_id is not None:
try:
return int(al_id)
except (TypeError, ValueError):
pass
return None
def _chapter_image_size(chapter_dir: Path) -> int:
"""Returns the total file size of all images in a chapter folder."""
return sum(
@@ -330,25 +304,30 @@ class SuwayomiMover:
Expected layout: <root>/<Source>/<Title>/<Chapter N>/
kavita_path : Root of the Kavita library.
Series sub-directories are created automatically.
kavita_base_url : Kavita server URL required only for person sync,
e.g. "http://192.168.2.2:5000".
kavita_api_key : Kavita API key required only for person sync.
language : ComicInfo LanguageISO and SeriesSort language ("en").
request_timeout : HTTP timeout in seconds for all API / image requests.
delete_source : Remove the source chapter folder after successful pack.
cover_cache_dir : Directory for the persistent cover cache. None ->
temporary cache, deleted at process exit.
perf_stats : Optional PerfStats instance for per-step timing. None
(default) disables profiling.
Note: Kavita person sync is no longer done here it runs as a separate,
global, id-based updater on its own cron schedule (KavitaPersonUpdater).
The mover only touches MangaBaka / MangaDex / MAL / AniList.
"""
def __init__(self,
suwayomi_path,
kavita_path,
*,
kavita_base_url: "str | None" = None,
kavita_api_key: "str | None" = None,
language: str = "en",
request_timeout: int = 30,
delete_source: bool = True,
matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1"):
api_base_url: str = "https://api.mangabaka.dev/v1",
cover_cache_dir=None,
perf_stats: "PerfStats | None" = None):
self._src = Path(suwayomi_path)
self._dst = Path(kavita_path)
self._language = language
@@ -356,6 +335,7 @@ class SuwayomiMover:
self._delete_source = delete_source
self._matches_cache = matches_cache
self._api_base_url = api_base_url.rstrip("/")
self._perf = perf_stats or PerfStats(None)
# Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips.
@@ -371,14 +351,8 @@ class SuwayomiMover:
request_timeout=request_timeout, session=session)
self._works_resolver = MangaBakaWorksResolver(
request_timeout=request_timeout, session=session)
self._person_updater: "KavitaPersonUpdater | None" = None
if kavita_base_url and kavita_api_key:
self._person_updater = KavitaPersonUpdater(
kavita_base_url, kavita_api_key,
mal_resolver=self._mal,
al_resolver=self._al,
request_timeout=request_timeout)
self._cover_cache = CoverCache(
cover_cache_dir, session=session, request_timeout=request_timeout)
# ------------------------------------------------------------------
# Public API
@@ -394,15 +368,19 @@ class SuwayomiMover:
dict from _process_series_dir.
"""
results: dict = {}
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
run = self._perf.begin_run()
try:
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
title = manga_dir.name
print(f"[SuwayomiMover] {title}")
results[title] = self._process_series_dir(manga_dir)
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
continue
title = manga_dir.name
print(f"[SuwayomiMover] {title}")
results[title] = self._process_series_dir(manga_dir, run)
finally:
run.finish()
return results
def process_series(self, manga_title: str) -> dict:
@@ -418,7 +396,11 @@ class SuwayomiMover:
continue
candidate = source_dir / manga_title
if candidate.is_dir():
return self._process_series_dir(candidate)
run = self._perf.begin_run()
try:
return self._process_series_dir(candidate, run)
finally:
run.finish()
raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
@@ -505,8 +487,9 @@ class SuwayomiMover:
# ------------------------------------------------------------------
# Internal: series
# ------------------------------------------------------------------
def _process_series_dir(self, manga_dir: Path) -> dict:
def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
manga_title = manga_dir.name
series_rec = (run or self._perf.begin_run()).begin_item(manga_title)
chapter_dirs = sorted(
(d for d in manga_dir.iterdir() if d.is_dir()),
@@ -550,13 +533,15 @@ class SuwayomiMover:
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
cover_cache=self._cover_cache,
)
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
md: "dict | None" = None
mangabaka_title = manga_title
try:
md = builder.fetch_metadata()
with series_rec.measure("fetch_metadata"):
md = builder.fetch_metadata()
mangabaka_title = md.get("title") or manga_title
except Exception as exc:
print(f" [warn] metadata fetch failed: {exc}")
@@ -588,7 +573,7 @@ class SuwayomiMover:
chapter_results: list[dict] = []
for chapter_dir, _fields, chapter_num in pending:
result = self._process_chapter(
builder, chapter_num, chapter_dir, dest_series)
builder, chapter_num, chapter_dir, dest_series, series_rec)
chapter_results.append(result)
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
print(f" Chapter {chapter_num}: {status}")
@@ -599,25 +584,11 @@ class SuwayomiMover:
}
_save_chapter_index(dest_series, chapter_index)
# Sync Kavita persons once per series.
# Both MAL and AniList IDs come from MangaBaka's source map;
# AniList is used as fallback when MAL returns no characters/staff.
person_result: "dict | None" = None
if self._person_updater:
mal_id = (_mal_id_from_metadata(md) if md else None
or self._mal.find_mal_id(builder_title))
al_id = _al_id_from_metadata(md) if md else None
if mal_id or al_id:
try:
person_result = self._person_updater.update_for_manga(
mal_id, al_manga_id=al_id)
print(f" Persons: chars={person_result['characters'].get('updated')} "
f"staff={person_result['staff'].get('updated')}")
except Exception as exc:
person_result = {"error": str(exc)}
print(f" Persons: ERROR {exc}")
return {"chapters": chapter_results, "persons": person_result}
# Person sync no longer runs here — it has its own global,
# id-based updater on a separate cron schedule (see
# KavitaPersonUpdater.update_all_persons).
series_rec.finish()
return {"chapters": chapter_results}
# ------------------------------------------------------------------
# Internal: chapter
@@ -626,7 +597,8 @@ class SuwayomiMover:
builder: ComicInfoBuilder,
chapter_num: str,
chapter_dir: Path,
dest_series: Path) -> dict:
dest_series: Path,
series_rec=None) -> dict:
"""
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
optionally removes the source folder.
@@ -636,6 +608,11 @@ class SuwayomiMover:
<Pages> element correctly points to the front cover).
"""
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
chap_rec = (series_rec or self._perf.begin_run().begin_item("")
).begin_item(chapter_num)
# add_pages_from_folder records its own sub-steps on this recorder.
builder.perf = chap_rec
ok = False
try:
builder.chapter = chapter_num
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
@@ -643,18 +620,26 @@ class SuwayomiMover:
# by add_pages_from_folder, so it's effectively free. Used by
# the chapter index in the Kavita destination folder.
try:
volume = builder._determine_volume()
with chap_rec.measure("volume"):
volume = builder._determine_volume()
except Exception:
volume = None
builder.save_xml(chapter_dir)
_pack_to_cbz(chapter_dir, cbz_path)
with chap_rec.measure("save_xml"):
builder.save_xml(chapter_dir)
with chap_rec.measure("pack_cbz"):
_pack_to_cbz(chapter_dir, cbz_path)
if self._delete_source:
shutil.rmtree(chapter_dir)
with chap_rec.measure("delete_source"):
shutil.rmtree(chapter_dir)
ok = True
return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": True, "volume": volume}
except Exception as exc:
return {"chapter": chapter_num, "cbz": str(cbz_path),
"ok": False, "error": str(exc)}
finally:
builder.perf = None
chap_rec.finish(ok=ok)
# --------------------------------------------------------------------------
@@ -664,8 +649,6 @@ if __name__ == "__main__":
# Local (no-Docker) smoke test. Adjust paths to your environment.
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
KAVITA_URL = "http://192.168.2.2:5000"
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
# matches.json lives next to this script during local testing.
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
@@ -674,8 +657,6 @@ if __name__ == "__main__":
mover = SuwayomiMover(
SUWAYOMI_PATH,
KAVITA_PATH,
kavita_base_url=KAVITA_URL,
kavita_api_key=KAVITA_KEY,
delete_source=False,
matches_cache=matches_cache,
)