Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b6d7f2d0af | |||
| b0692a6527 | |||
| 216771f709 | |||
| 8a44b85a48 | |||
| 4996026b91 | |||
| 7fbe5f94a5 | |||
| 4557137ad0 | |||
| 59ea1f8c8f | |||
| d724e9ffcd | |||
| 2f30ac4e05 | |||
| 97e4b10ac8 | |||
| 054f974ddc | |||
| 3288ab9de7 | |||
| 12ef254424 | |||
| 76050eeda9 | |||
| 7887892737 | |||
| 79d64d7ed5 |
@@ -0,0 +1,23 @@
|
|||||||
|
# Shared
|
||||||
|
KAVITA_URL=http://192.168.1.100:5000
|
||||||
|
KAVITA_API_KEY=your-api-key-here
|
||||||
|
LANGUAGE=en
|
||||||
|
TZ=Europe/Berlin
|
||||||
|
|
||||||
|
# Manga container (manga-mover-and-metadata-collector)
|
||||||
|
HOST_SUWAYOMI_PATH=/path/to/suwayomi/downloads
|
||||||
|
HOST_KAVITA_PATH=/path/to/kavita/library
|
||||||
|
HOST_MANGA_CONFIG_PATH=/path/to/manga-config
|
||||||
|
MANGA_WEB_PORT=8080
|
||||||
|
SETTLE_SECONDS=600
|
||||||
|
DELETE_SOURCE=true
|
||||||
|
UPDATER_ENABLED=true
|
||||||
|
UPDATER_SCHEDULE=0 19 * * 1,4
|
||||||
|
COVER_CACHE_PATH=/config/covers
|
||||||
|
PERF_PATH=/config/perf_stats.json
|
||||||
|
|
||||||
|
# Light-novel container (kavita-lightnovel-metadata-fetcher)
|
||||||
|
HOST_LN_CONFIG_PATH=/path/to/ln-config
|
||||||
|
LN_WEB_PORT=8081
|
||||||
|
LN_LIBRARY_IDS=3,5
|
||||||
|
|
||||||
@@ -5,6 +5,11 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: gitea.johannesbot.de/johannesbot
|
||||||
|
MANGA_IMAGE: manga-mover-and-metadata-collector
|
||||||
|
LN_IMAGE: kavita-lightnovel-metadata-fetcher
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -17,11 +22,16 @@ jobs:
|
|||||||
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
||||||
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||||
|
|
||||||
- name: Build Image
|
- name: Build Manga Image
|
||||||
run: docker build -t gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest .
|
run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest .
|
||||||
|
|
||||||
- name: Push Image
|
- name: Build LN Image
|
||||||
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
|
run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest .
|
||||||
|
|
||||||
|
- name: Push Images
|
||||||
|
run: |
|
||||||
|
docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:latest
|
||||||
|
docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:latest
|
||||||
|
|
||||||
deploy:
|
deploy:
|
||||||
needs: build
|
needs: build
|
||||||
@@ -37,7 +47,7 @@ jobs:
|
|||||||
username: ${{ secrets.SSH_USER }}
|
username: ${{ secrets.SSH_USER }}
|
||||||
password: ${{ secrets.SSH_PASSWORD }}
|
password: ${{ secrets.SSH_PASSWORD }}
|
||||||
port: ${{ secrets.SSH_PORT || 22 }}
|
port: ${{ secrets.SSH_PORT || 22 }}
|
||||||
script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
|
script: mkdir -p /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
|
||||||
|
|
||||||
- name: Copy docker-compose via SCP
|
- name: Copy docker-compose via SCP
|
||||||
uses: appleboy/scp-action@v0.1.7
|
uses: appleboy/scp-action@v0.1.7
|
||||||
@@ -47,7 +57,7 @@ jobs:
|
|||||||
password: ${{ secrets.SSH_PASSWORD }}
|
password: ${{ secrets.SSH_PASSWORD }}
|
||||||
port: ${{ secrets.SSH_PORT || 22 }}
|
port: ${{ secrets.SSH_PORT || 22 }}
|
||||||
source: "docker-compose.prod.yml"
|
source: "docker-compose.prod.yml"
|
||||||
target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
|
target: "/home/${{ secrets.SSH_USER }}/kavita-metadata-collector"
|
||||||
|
|
||||||
- name: Deploy via SSH
|
- name: Deploy via SSH
|
||||||
uses: appleboy/ssh-action@v1.0.3
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
@@ -57,7 +67,7 @@ jobs:
|
|||||||
password: ${{ secrets.SSH_PASSWORD }}
|
password: ${{ secrets.SSH_PASSWORD }}
|
||||||
port: ${{ secrets.SSH_PORT || 22 }}
|
port: ${{ secrets.SSH_PORT || 22 }}
|
||||||
script: |
|
script: |
|
||||||
cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
|
cd /home/${{ secrets.SSH_USER }}/kavita-metadata-collector
|
||||||
mv docker-compose.prod.yml docker-compose.yml
|
mv docker-compose.prod.yml docker-compose.yml
|
||||||
echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||||
sudo docker compose pull
|
sudo docker compose pull
|
||||||
|
|||||||
@@ -0,0 +1,38 @@
|
|||||||
|
name: Build Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: gitea.johannesbot.de/johannesbot
|
||||||
|
MANGA_IMAGE: manga-mover-and-metadata-collector
|
||||||
|
LN_IMAGE: kavita-lightnovel-metadata-fetcher
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Login to Gitea Registry
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.REGISTRY_PASSWORD }}" | \
|
||||||
|
docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||||
|
|
||||||
|
- name: Extract Tag
|
||||||
|
id: tag
|
||||||
|
run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Build Manga Image
|
||||||
|
run: docker build --build-arg APP=manga -t ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
|
||||||
|
|
||||||
|
- name: Build LN Image
|
||||||
|
run: docker build --build-arg APP=ln -t ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }} .
|
||||||
|
|
||||||
|
- name: Push Images
|
||||||
|
run: |
|
||||||
|
docker push ${{ env.REGISTRY }}/${{ env.MANGA_IMAGE }}:${{ steps.tag.outputs.VERSION }}
|
||||||
|
docker push ${{ env.REGISTRY }}/${{ env.LN_IMAGE }}:${{ steps.tag.outputs.VERSION }}
|
||||||
+18
-6
@@ -1,8 +1,18 @@
|
|||||||
|
# One Dockerfile, two images: the build arg APP selects the entry point.
|
||||||
|
#
|
||||||
|
# docker build --build-arg APP=manga -t .../manga-mover-and-metadata-collector .
|
||||||
|
# docker build --build-arg APP=ln -t .../kavita-lightnovel-metadata-fetcher .
|
||||||
|
#
|
||||||
|
# Both variants share src/; the variant-specific code lives in
|
||||||
|
# src/manga/ resp. src/ln/ and is selected by the entry point.
|
||||||
|
|
||||||
FROM python:3.12-slim
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
ARG APP=manga
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# System deps for Pillow (image dimensions); kept minimal.
|
# System deps for Pillow (image dimensions, manga variant); kept minimal.
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y --no-install-recommends \
|
||||||
libjpeg62-turbo \
|
libjpeg62-turbo \
|
||||||
@@ -12,14 +22,16 @@ COPY requirements.txt .
|
|||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY src/ /app/src/
|
COPY src/ /app/src/
|
||||||
COPY main.py /app/main.py
|
COPY main_manga.py main_ln.py /app/
|
||||||
|
|
||||||
ENV PYTHONUNBUFFERED=1 \
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
PYTHONDONTWRITEBYTECODE=1
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
APP_VARIANT=${APP}
|
||||||
|
|
||||||
# Mount points used by main.py defaults
|
# /config is used by both variants; the manga variant additionally mounts
|
||||||
VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
|
# /mnt/suwayomi and /mnt/kavita (see docker-compose.prod.yml).
|
||||||
|
VOLUME ["/config"]
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
||||||
CMD ["python", "/app/main.py"]
|
CMD python /app/main_${APP_VARIANT}.py
|
||||||
|
|||||||
+38
-5
@@ -1,5 +1,8 @@
|
|||||||
services:
|
services:
|
||||||
manga-mover:
|
# ------------------------------------------------------------------
|
||||||
|
# Manga: Suwayomi -> Kavita mover + metadata enrichment
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
manga-mover-and-metadata-collector:
|
||||||
image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
|
image: gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
|
||||||
container_name: manga-mover-and-metadata-collector
|
container_name: manga-mover-and-metadata-collector
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
@@ -9,11 +12,41 @@ services:
|
|||||||
LANGUAGE: "${LANGUAGE:-en}"
|
LANGUAGE: "${LANGUAGE:-en}"
|
||||||
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
|
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
|
||||||
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
|
||||||
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
|
MATCH_PATH: "/config/matches.json"
|
||||||
WEB_PORT: "${WEB_PORT:-8080}"
|
# Volume/cover back-fill updater
|
||||||
|
UPDATER_ENABLED: "${UPDATER_ENABLED:-true}"
|
||||||
|
# Cron expression: "0 19 * * 1,4" = 19:00 every Monday and Thursday
|
||||||
|
# (local time, see TZ)
|
||||||
|
UPDATER_SCHEDULE: "${UPDATER_SCHEDULE:-0 19 * * 1,4}"
|
||||||
|
UPDATER_LOG: "/config/volume_updater.log"
|
||||||
|
# Persistent cover cache (empty = temp dir, deleted on container stop)
|
||||||
|
COVER_CACHE_PATH: "${COVER_CACHE_PATH:-/config/covers}"
|
||||||
|
# Per-step move timing stats (viewable at /perf); empty disables it
|
||||||
|
PERF_PATH: "${PERF_PATH:-/config/perf_stats.json}"
|
||||||
|
# Timezone for the cron schedule — without this 19:00 means 19:00 UTC
|
||||||
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
ports:
|
ports:
|
||||||
- "${WEB_PORT:-8080}:${WEB_PORT:-8080}"
|
- "${MANGA_WEB_PORT:-8080}:8080"
|
||||||
volumes:
|
volumes:
|
||||||
- "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
|
- "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
|
||||||
- "${HOST_KAVITA_PATH}:/mnt/kavita"
|
- "${HOST_KAVITA_PATH}:/mnt/kavita"
|
||||||
- "${HOST_CONFIG_PATH}:/config"
|
- "${HOST_MANGA_CONFIG_PATH}:/config"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Light novels: Kavita metadata fetcher (HTTP only, no file mover)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
kavita-lightnovel-metadata-fetcher:
|
||||||
|
image: gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:latest
|
||||||
|
container_name: kavita-lightnovel-metadata-fetcher
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
KAVITA_URL: "${KAVITA_URL}"
|
||||||
|
KAVITA_API_KEY: "${KAVITA_API_KEY}"
|
||||||
|
LIBRARY_IDS: "${LN_LIBRARY_IDS}"
|
||||||
|
LANGUAGE: "${LANGUAGE:-en}"
|
||||||
|
MATCH_PATH: "/config/matches.json"
|
||||||
|
TZ: "${TZ:-Europe/Berlin}"
|
||||||
|
ports:
|
||||||
|
- "${LN_WEB_PORT:-8081}:8080"
|
||||||
|
volumes:
|
||||||
|
- "${HOST_LN_CONFIG_PATH}:/config"
|
||||||
|
|||||||
+129
@@ -0,0 +1,129 @@
|
|||||||
|
"""
|
||||||
|
main_ln.py
|
||||||
|
==========
|
||||||
|
|
||||||
|
Container entry point for the **light-novel** variant (Kavita metadata
|
||||||
|
fetcher). The manga variant has its own entry point (main_manga.py);
|
||||||
|
both share the modules in src/ and add their variant-specific code from
|
||||||
|
src/ln/ resp. src/manga/.
|
||||||
|
|
||||||
|
Reads configuration from environment variables, starts the orchestrator
|
||||||
|
and exposes the Flask WebApp on WEB_HOST:WEB_PORT. Everything happens
|
||||||
|
through HTTP — there is no folder watcher and no file mover (Kavita is
|
||||||
|
the source of truth for the library content; this service only writes
|
||||||
|
metadata back to it).
|
||||||
|
|
||||||
|
Environment variables
|
||||||
|
---------------------
|
||||||
|
Required:
|
||||||
|
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
|
||||||
|
KAVITA_API_KEY Kavita API key (Settings -> User -> API key)
|
||||||
|
|
||||||
|
Optional:
|
||||||
|
LIBRARY_IDS comma-separated default library ids (e.g. "3,5").
|
||||||
|
Empty = user picks in the WebUI each time.
|
||||||
|
LANGUAGE default "en"
|
||||||
|
REQUEST_TIMEOUT default 30
|
||||||
|
MATCH_PATH default /config/matches.json
|
||||||
|
WEB_PORT default 8080
|
||||||
|
WEB_HOST default 0.0.0.0
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Shared code in src/, LN-specific code in src/ln/. Modules are imported
|
||||||
|
# by their plain names so src-internal imports resolve to the same module
|
||||||
|
# objects (a `src.X` import would load everything twice).
|
||||||
|
_BASE = Path(__file__).resolve().parent
|
||||||
|
sys.path.insert(0, str(_BASE / "src"))
|
||||||
|
sys.path.insert(0, str(_BASE / "src" / "ln"))
|
||||||
|
|
||||||
|
from MatchesCache import MatchesCache # noqa: E402
|
||||||
|
from LightNovelOrchestrator import LightNovelOrchestrator # noqa: E402
|
||||||
|
from MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
|
required: bool = False) -> "str | None":
|
||||||
|
value = os.environ.get(name, default)
|
||||||
|
if required and not value:
|
||||||
|
print(f"[main] missing required env var: {name}", flush=True)
|
||||||
|
sys.exit(2)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _env_int(name: str, default: int) -> int:
|
||||||
|
raw = os.environ.get(name)
|
||||||
|
if raw is None or raw == "":
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except ValueError:
|
||||||
|
print(f"[main] {name}={raw!r} is not a valid integer; "
|
||||||
|
f"falling back to {default}", flush=True)
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _env_int_list(name: str) -> list[int]:
|
||||||
|
raw = os.environ.get(name) or ""
|
||||||
|
out: list[int] = []
|
||||||
|
for part in raw.split(","):
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
out.append(int(part))
|
||||||
|
except ValueError:
|
||||||
|
print(f"[main] {name}: ignoring non-integer value {part!r}",
|
||||||
|
flush=True)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
kavita_url = _env_str("KAVITA_URL", required=True)
|
||||||
|
kavita_api_key = _env_str("KAVITA_API_KEY", required=True)
|
||||||
|
language = _env_str("LANGUAGE", "en") or "en"
|
||||||
|
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
|
||||||
|
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
||||||
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
|
library_ids = _env_int_list("LIBRARY_IDS")
|
||||||
|
|
||||||
|
print(f"[main] kavita url = {kavita_url}", flush=True)
|
||||||
|
print(f"[main] language = {language}", flush=True)
|
||||||
|
print(f"[main] match path = {match_path}", flush=True)
|
||||||
|
print(f"[main] libraries = {library_ids or '(picked in WebUI)'}",
|
||||||
|
flush=True)
|
||||||
|
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
||||||
|
|
||||||
|
cache = MatchesCache(match_path)
|
||||||
|
orchestrator = LightNovelOrchestrator(
|
||||||
|
kavita_url=kavita_url,
|
||||||
|
kavita_api_key=kavita_api_key,
|
||||||
|
matches_cache=cache,
|
||||||
|
language=language,
|
||||||
|
request_timeout=request_timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
app = MatchesWebApp(
|
||||||
|
cache, orchestrator=orchestrator,
|
||||||
|
default_library_ids=library_ids,
|
||||||
|
host=web_host, port=web_port,
|
||||||
|
)
|
||||||
|
app.start()
|
||||||
|
app.wait()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
+64
-23
@@ -1,10 +1,11 @@
|
|||||||
"""
|
"""
|
||||||
main.py
|
main_manga.py
|
||||||
=======
|
=============
|
||||||
|
|
||||||
Container entry point. Watches the mounted Suwayomi download directory
|
Container entry point for the **manga** variant (Suwayomi -> Kavita mover
|
||||||
and, after a quiet period, triggers SuwayomiMover (which also runs the
|
plus metadata enrichment). The light-novel variant has its own entry
|
||||||
Kavita person sync for every processed series).
|
point (main_ln.py); both share the modules in src/ and add their
|
||||||
|
variant-specific code from src/manga/ resp. src/ln/.
|
||||||
|
|
||||||
Mount points (Docker)
|
Mount points (Docker)
|
||||||
---------------------
|
---------------------
|
||||||
@@ -15,7 +16,7 @@ Environment variables
|
|||||||
---------------------
|
---------------------
|
||||||
Required:
|
Required:
|
||||||
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
|
KAVITA_URL base URL of the Kavita server, e.g. http://kavita:5000
|
||||||
KAVITA_API_KEY Kavita API key (Settings → User → API key)
|
KAVITA_API_KEY Kavita API key (Settings -> User -> API key)
|
||||||
|
|
||||||
Optional:
|
Optional:
|
||||||
SUWAYOMI_PATH default /mnt/suwayomi
|
SUWAYOMI_PATH default /mnt/suwayomi
|
||||||
@@ -27,22 +28,42 @@ Environment variables
|
|||||||
MATCH_PATH default /config/matches.json
|
MATCH_PATH default /config/matches.json
|
||||||
WEB_PORT default 8080 (Flask web UI for matches.json)
|
WEB_PORT default 8080 (Flask web UI for matches.json)
|
||||||
WEB_HOST default 0.0.0.0
|
WEB_HOST default 0.0.0.0
|
||||||
|
UPDATER_ENABLED default true (volume/cover back-fill cron)
|
||||||
|
UPDATER_SCHEDULE cron expression for the updater scans,
|
||||||
|
default "0 19 * * 1,4" = 19:00 every Mon + Thu
|
||||||
|
(local time — set TZ inside the container!)
|
||||||
|
UPDATER_LOG default /config/volume_updater.log
|
||||||
|
COVER_CACHE_PATH directory for the persistent cover cache;
|
||||||
|
empty (default) = temporary cache, deleted on exit
|
||||||
|
PERF_PATH JSON file for per-step move timing stats;
|
||||||
|
empty disables profiling. Default /config/perf_stats.json
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import signal
|
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Make src/ importable when running as `python main.py`.
|
try:
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
from src.SuwayomiMover import SuwayomiMover # noqa: E402
|
# Shared code in src/, manga-specific code in src/manga/. Modules are
|
||||||
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
|
# imported by their plain names so src-internal imports resolve to the
|
||||||
from src.MatchesCache import MatchesCache # noqa: E402
|
# same module objects (a `src.X` import would load everything twice).
|
||||||
from src.MatchesWebApp import MatchesWebApp # noqa: E402
|
_BASE = Path(__file__).resolve().parent
|
||||||
|
sys.path.insert(0, str(_BASE / "src"))
|
||||||
|
sys.path.insert(0, str(_BASE / "src" / "manga"))
|
||||||
|
|
||||||
|
from SuwayomiMover import SuwayomiMover # noqa: E402
|
||||||
|
from SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402,F401
|
||||||
|
from MatchesCache import MatchesCache # noqa: E402
|
||||||
|
from MatchesWebApp import MatchesWebApp # noqa: E402
|
||||||
|
from KavitaVolumeCoverUpdater import KavitaVolumeCoverUpdater # noqa: E402
|
||||||
|
from PerfStats import PerfStats # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _env_str(name: str, default: "str | None" = None,
|
def _env_str(name: str, default: "str | None" = None,
|
||||||
@@ -76,8 +97,8 @@ def _env_bool(name: str, default: bool) -> bool:
|
|||||||
def main() -> int:
|
def main() -> int:
|
||||||
suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi")
|
suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi")
|
||||||
kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita")
|
kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita")
|
||||||
kavita_url = _env_str("KAVITA_URL", required=True)
|
kavita_url = _env_str("KAVITA_URL", "http://kavita:5000")
|
||||||
kavita_api_key = _env_str("KAVITA_API_KEY", required=True)
|
kavita_api_key = _env_str("KAVITA_API_KEY", "")
|
||||||
language = _env_str("LANGUAGE", "en") or "en"
|
language = _env_str("LANGUAGE", "en") or "en"
|
||||||
settle_seconds = _env_int("SETTLE_SECONDS", 600)
|
settle_seconds = _env_int("SETTLE_SECONDS", 600)
|
||||||
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
|
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
|
||||||
@@ -85,6 +106,11 @@ def main() -> int:
|
|||||||
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
match_path = _env_str("MATCH_PATH", "/config/matches.json")
|
||||||
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
|
||||||
web_port = _env_int("WEB_PORT", 8080)
|
web_port = _env_int("WEB_PORT", 8080)
|
||||||
|
updater_enabled = _env_bool("UPDATER_ENABLED", True)
|
||||||
|
updater_schedule = _env_str("UPDATER_SCHEDULE", "0 19 * * 1,4")
|
||||||
|
updater_log = _env_str("UPDATER_LOG", "/config/volume_updater.log")
|
||||||
|
cover_cache_path = _env_str("COVER_CACHE_PATH", "") or None
|
||||||
|
perf_path = _env_str("PERF_PATH", "/config/perf_stats.json") or None
|
||||||
|
|
||||||
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
|
||||||
print(f"[main] kavita = {kavita_path}", flush=True)
|
print(f"[main] kavita = {kavita_path}", flush=True)
|
||||||
@@ -96,6 +122,7 @@ def main() -> int:
|
|||||||
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
print(f"[main] web = {web_host}:{web_port}", flush=True)
|
||||||
|
|
||||||
matches_cache = MatchesCache(match_path)
|
matches_cache = MatchesCache(match_path)
|
||||||
|
perf_stats = PerfStats(perf_path)
|
||||||
|
|
||||||
mover = SuwayomiMover(
|
mover = SuwayomiMover(
|
||||||
suwayomi_path, kavita_path,
|
suwayomi_path, kavita_path,
|
||||||
@@ -105,20 +132,34 @@ def main() -> int:
|
|||||||
request_timeout=request_timeout,
|
request_timeout=request_timeout,
|
||||||
delete_source=delete_source,
|
delete_source=delete_source,
|
||||||
matches_cache=matches_cache,
|
matches_cache=matches_cache,
|
||||||
|
cover_cache_dir=cover_cache_path,
|
||||||
|
perf_stats=perf_stats,
|
||||||
)
|
)
|
||||||
|
|
||||||
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
|
||||||
|
|
||||||
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
|
web_app = MatchesWebApp(matches_cache, mover=mover, perf_stats=perf_stats,
|
||||||
|
host=web_host, port=web_port)
|
||||||
web_app.start()
|
web_app.start()
|
||||||
|
|
||||||
# def shutdown(signum, _frame):
|
if updater_enabled:
|
||||||
# print(f"[main] received signal {signum}", flush=True)
|
try:
|
||||||
# watcher.stop()
|
updater = KavitaVolumeCoverUpdater(
|
||||||
#
|
kavita_path,
|
||||||
# signal.signal(signal.SIGTERM, shutdown)
|
matches_cache=matches_cache,
|
||||||
# signal.signal(signal.SIGINT, shutdown)
|
language=language,
|
||||||
#
|
request_timeout=request_timeout,
|
||||||
|
log_path=updater_log,
|
||||||
|
schedule=updater_schedule,
|
||||||
|
cover_cache_dir=cover_cache_path,
|
||||||
|
)
|
||||||
|
updater.start()
|
||||||
|
except ValueError as exc:
|
||||||
|
# Invalid cron expression — keep the service up, just without
|
||||||
|
# the updater, and make the config error obvious in the logs.
|
||||||
|
print(f"[main] UPDATER_SCHEDULE invalid ({exc}); "
|
||||||
|
f"volume/cover updater DISABLED", flush=True)
|
||||||
|
|
||||||
# watcher.start()
|
# watcher.start()
|
||||||
# watcher.wait() # blocks until stop() is called via a signal
|
# watcher.wait() # blocks until stop() is called via a signal
|
||||||
web_app.wait() # keep process alive while the watcher is disabled
|
web_app.wait() # keep process alive while the watcher is disabled
|
||||||
@@ -2,3 +2,4 @@ requests>=2.31
|
|||||||
Pillow>=10.0
|
Pillow>=10.0
|
||||||
watchdog>=4.0
|
watchdog>=4.0
|
||||||
Flask>=3.0
|
Flask>=3.0
|
||||||
|
python-dotenv>=1.0
|
||||||
|
|||||||
+32
-17
@@ -32,27 +32,35 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from MediaResolver import MediaResolver
|
from MediaResolver import MediaResolver
|
||||||
|
from TextUtils import best_similarity
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# GraphQL query strings
|
# GraphQL query strings
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
_SEARCH_MANGA = """
|
# AniList models both manga and light novels as type MANGA; the format
|
||||||
|
# clause decides which of the two a search returns. The placeholder is
|
||||||
|
# substituted at construction time (see `media_format`).
|
||||||
|
_SEARCH_MANGA_TEMPLATE = """
|
||||||
query ($search: String) {
|
query ($search: String) {
|
||||||
Page(page: 1, perPage: 5) {
|
Page(page: 1, perPage: 5) {
|
||||||
media(search: $search, type: MANGA, format_not_in: [NOVEL]) {
|
media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
|
||||||
id title { romaji english native } siteUrl
|
id title { romaji english native } siteUrl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_FORMAT_CLAUSES = {
|
||||||
|
"manga": "format_not_in: [NOVEL]",
|
||||||
|
"novel": "format_in: [NOVEL]",
|
||||||
|
}
|
||||||
|
|
||||||
_MANGA_STATS = """
|
_MANGA_STATS = """
|
||||||
query ($id: Int) {
|
query ($id: Int) {
|
||||||
Media(id: $id, type: MANGA) {
|
Media(id: $id, type: MANGA) {
|
||||||
@@ -131,10 +139,24 @@ class AniListResolver(MediaResolver):
|
|||||||
cls._instance._initialized = False
|
cls._instance._initialized = False
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def __init__(self, *, request_timeout: int = 30):
|
def __init__(self, *, request_timeout: int = 30,
|
||||||
|
media_format: str = "manga"):
|
||||||
|
"""
|
||||||
|
media_format : "manga" (excludes novels) or "novel" (novels only).
|
||||||
|
Only the FIRST construction in the process sets it
|
||||||
|
(singleton); construct the resolver with the correct
|
||||||
|
format in the entry point / orchestrator.
|
||||||
|
"""
|
||||||
if self._initialized:
|
if self._initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if media_format not in _FORMAT_CLAUSES:
|
||||||
|
raise ValueError(f"media_format must be one of "
|
||||||
|
f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
|
||||||
|
self.media_format = media_format
|
||||||
|
self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
|
||||||
|
"__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])
|
||||||
|
|
||||||
self.request_timeout = request_timeout
|
self.request_timeout = request_timeout
|
||||||
|
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
@@ -178,7 +200,7 @@ class AniListResolver(MediaResolver):
|
|||||||
return self._id_cache[key]
|
return self._id_cache[key]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = self._gql(_SEARCH_MANGA, {"search": title})
|
data = self._gql(self._search_query, {"search": title})
|
||||||
results = ((data.get("data") or {})
|
results = ((data.get("data") or {})
|
||||||
.get("Page", {})
|
.get("Page", {})
|
||||||
.get("media") or [])
|
.get("media") or [])
|
||||||
@@ -469,18 +491,11 @@ class AniListResolver(MediaResolver):
|
|||||||
def _score_title(query: str, entry: dict) -> float:
|
def _score_title(query: str, entry: dict) -> float:
|
||||||
"""Returns the best title-similarity score for an AniList media entry."""
|
"""Returns the best title-similarity score for an AniList media entry."""
|
||||||
title_obj = entry.get("title") or {}
|
title_obj = entry.get("title") or {}
|
||||||
candidates = [
|
return best_similarity(query, (
|
||||||
title_obj.get("romaji") or "",
|
title_obj.get("romaji"),
|
||||||
title_obj.get("english") or "",
|
title_obj.get("english"),
|
||||||
title_obj.get("native") or "",
|
title_obj.get("native"),
|
||||||
]
|
))
|
||||||
best = 0.0
|
|
||||||
q = query.lower()
|
|
||||||
for t in candidates:
|
|
||||||
if t:
|
|
||||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
|
||||||
best = max(best, ratio)
|
|
||||||
return best
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -0,0 +1,148 @@
|
|||||||
|
"""
|
||||||
|
cover_cache.py
|
||||||
|
==============
|
||||||
|
|
||||||
|
Disk-backed cache for downloaded cover images, keyed by URL.
|
||||||
|
|
||||||
|
Why
|
||||||
|
---
|
||||||
|
The mover packs every chapter of a series individually, and each chapter
|
||||||
|
needs a cover image. Without caching, the same multi-megabyte cover is
|
||||||
|
downloaded once per chapter (20-chapter volume = 20 identical downloads).
|
||||||
|
This cache turns that into a single download per unique URL.
|
||||||
|
|
||||||
|
Persistence
|
||||||
|
-----------
|
||||||
|
* ``cache_dir`` given -> covers persist across runs in that directory.
|
||||||
|
* ``cache_dir`` omitted -> a temporary directory is used and removed
|
||||||
|
automatically when the process exits.
|
||||||
|
|
||||||
|
Files are stored as ``<sha256(url)[:32]><ext>``; the extension is derived
|
||||||
|
from the URL / Content-Type at download time so it can be reused when
|
||||||
|
writing the cover into a chapter folder.
|
||||||
|
|
||||||
|
Thread safety: downloads are serialised per cache instance, so concurrent
|
||||||
|
mover / updater threads never fetch the same URL twice.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
requests -> pip install requests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import atexit
|
||||||
|
import hashlib
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
||||||
|
|
||||||
|
|
||||||
|
def _guess_extension(url: str, content_type: str) -> str:
|
||||||
|
"""Derives an image file extension from a URL or HTTP Content-Type."""
|
||||||
|
url_ext = Path(url.split("?")[0]).suffix.lower()
|
||||||
|
if url_ext in _IMAGE_EXTS:
|
||||||
|
return url_ext
|
||||||
|
ct = (content_type or "").lower()
|
||||||
|
if "png" in ct: return ".png"
|
||||||
|
if "webp" in ct: return ".webp"
|
||||||
|
if "gif" in ct: return ".gif"
|
||||||
|
return ".jpg"
|
||||||
|
|
||||||
|
|
||||||
|
class CoverCache:
|
||||||
|
"""
|
||||||
|
URL-keyed image cache on disk.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cache_dir : Directory for cached covers. None -> temporary
|
||||||
|
directory, deleted automatically at process exit.
|
||||||
|
session : Optional shared requests.Session for downloads.
|
||||||
|
request_timeout : HTTP timeout in seconds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache_dir=None, *,
|
||||||
|
session: "requests.Session | None" = None,
|
||||||
|
request_timeout: int = 30):
|
||||||
|
self._persistent = cache_dir is not None
|
||||||
|
if self._persistent:
|
||||||
|
self._dir = Path(cache_dir)
|
||||||
|
self._dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
else:
|
||||||
|
self._dir = Path(tempfile.mkdtemp(prefix="cover_cache_"))
|
||||||
|
atexit.register(self.close)
|
||||||
|
|
||||||
|
self._session = session or requests.Session()
|
||||||
|
self._session.headers.setdefault("User-Agent", "CoverCache/1.0")
|
||||||
|
self._timeout = request_timeout
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get(self, url: str) -> "tuple[bytes, str] | None":
|
||||||
|
"""
|
||||||
|
Returns ``(image_bytes, extension)`` for the URL — from cache when
|
||||||
|
present, downloading (and caching) otherwise. Returns None when
|
||||||
|
the URL is empty or the download fails.
|
||||||
|
"""
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
cached = self._find_cached(url)
|
||||||
|
if cached is not None:
|
||||||
|
try:
|
||||||
|
return cached.read_bytes(), cached.suffix
|
||||||
|
except OSError:
|
||||||
|
pass # unreadable cache file -> re-download
|
||||||
|
|
||||||
|
return self._download(url)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""Removes all cached covers (the directory itself is kept)."""
|
||||||
|
with self._lock:
|
||||||
|
for f in self._dir.glob("*"):
|
||||||
|
if f.is_file():
|
||||||
|
f.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Deletes the cache directory when it is non-persistent."""
|
||||||
|
if not self._persistent:
|
||||||
|
shutil.rmtree(self._dir, ignore_errors=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def _key(url: str) -> str:
|
||||||
|
return hashlib.sha256(url.encode("utf-8")).hexdigest()[:32]
|
||||||
|
|
||||||
|
def _find_cached(self, url: str) -> "Path | None":
|
||||||
|
matches = list(self._dir.glob(self._key(url) + ".*"))
|
||||||
|
return matches[0] if matches else None
|
||||||
|
|
||||||
|
def _download(self, url: str) -> "tuple[bytes, str] | None":
|
||||||
|
try:
|
||||||
|
resp = self._session.get(url, timeout=self._timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException:
|
||||||
|
return None
|
||||||
|
|
||||||
|
ext = _guess_extension(url, resp.headers.get("Content-Type", ""))
|
||||||
|
|
||||||
|
target = self._dir / f"{self._key(url)}{ext}"
|
||||||
|
try:
|
||||||
|
tmp = target.with_suffix(target.suffix + ".tmp")
|
||||||
|
tmp.write_bytes(resp.content)
|
||||||
|
tmp.replace(target)
|
||||||
|
except OSError:
|
||||||
|
pass # cache write failure is non-fatal — still return the bytes
|
||||||
|
return resp.content, ext
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
"""
|
||||||
|
cron_schedule.py
|
||||||
|
================
|
||||||
|
|
||||||
|
Minimal cron-expression parser — no external dependency.
|
||||||
|
|
||||||
|
Supports the classic 5-field syntax::
|
||||||
|
|
||||||
|
┌──────── minute (0-59)
|
||||||
|
│ ┌────── hour (0-23)
|
||||||
|
│ │ ┌──── day of month (1-31)
|
||||||
|
│ │ │ ┌── month (1-12 or jan-dec)
|
||||||
|
│ │ │ │ ┌ day of week (0-7 or sun-sat; 0 and 7 = Sunday)
|
||||||
|
│ │ │ │ │
|
||||||
|
0 19 * * 1,4 -> 19:00 every Monday and Thursday
|
||||||
|
|
||||||
|
Field syntax: ``*``, single values, ranges (``a-b``), steps (``*/n``,
|
||||||
|
``a-b/n``) and comma lists. Month / weekday names (``jan``, ``mon``, …)
|
||||||
|
are accepted case-insensitively.
|
||||||
|
|
||||||
|
As in Vixie cron, when *both* day-of-month and day-of-week are restricted
|
||||||
|
the job runs when **either** matches.
|
||||||
|
|
||||||
|
Times are evaluated against the local system clock (``datetime.now()``) —
|
||||||
|
in Docker set the ``TZ`` environment variable so "19:00" means local time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
_MONTH_NAMES = {"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6,
|
||||||
|
"jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12}
|
||||||
|
_DAY_NAMES = {"sun": 0, "mon": 1, "tue": 2, "wed": 3, "thu": 4,
|
||||||
|
"fri": 5, "sat": 6}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_value(token: str, lo: int, hi: int,
|
||||||
|
names: "dict[str, int] | None") -> int:
|
||||||
|
token = token.strip().lower()
|
||||||
|
if names and token in names:
|
||||||
|
return names[token]
|
||||||
|
try:
|
||||||
|
value = int(token)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"invalid cron value {token!r}") from None
|
||||||
|
if not (lo <= value <= hi):
|
||||||
|
raise ValueError(f"cron value {value} out of range {lo}-{hi}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_field(field: str, lo: int, hi: int,
|
||||||
|
names: "dict[str, int] | None" = None) -> "set[int]":
|
||||||
|
"""Parses one cron field into the set of matching integer values."""
|
||||||
|
result: set[int] = set()
|
||||||
|
for part in field.split(","):
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
raise ValueError(f"empty element in cron field {field!r}")
|
||||||
|
|
||||||
|
step = 1
|
||||||
|
if "/" in part:
|
||||||
|
part, step_text = part.split("/", 1)
|
||||||
|
try:
|
||||||
|
step = int(step_text)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f"invalid cron step {step_text!r}") from None
|
||||||
|
if step < 1:
|
||||||
|
raise ValueError(f"cron step must be >= 1, got {step}")
|
||||||
|
|
||||||
|
if part == "*":
|
||||||
|
start, end = lo, hi
|
||||||
|
elif "-" in part:
|
||||||
|
a, b = part.split("-", 1)
|
||||||
|
start = _parse_value(a, lo, hi, names)
|
||||||
|
end = _parse_value(b, lo, hi, names)
|
||||||
|
if end < start:
|
||||||
|
raise ValueError(f"inverted cron range {part!r}")
|
||||||
|
else:
|
||||||
|
start = end = _parse_value(part, lo, hi, names)
|
||||||
|
|
||||||
|
result.update(range(start, end + 1, step))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class CronSchedule:
|
||||||
|
"""
|
||||||
|
Parsed 5-field cron expression with ``next_after()`` evaluation.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
cron = CronSchedule("0 19 * * mon,thu")
|
||||||
|
run_at = cron.next_after(datetime.now())
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, expression: str):
|
||||||
|
self.expression = expression.strip()
|
||||||
|
fields = self.expression.split()
|
||||||
|
if len(fields) != 5:
|
||||||
|
raise ValueError(
|
||||||
|
f"cron expression needs 5 fields "
|
||||||
|
f"(minute hour dom month dow), got {len(fields)}: "
|
||||||
|
f"{expression!r}")
|
||||||
|
|
||||||
|
minute, hour, dom, month, dow = fields
|
||||||
|
self._minutes = _parse_field(minute, 0, 59)
|
||||||
|
self._hours = _parse_field(hour, 0, 23)
|
||||||
|
self._dom = _parse_field(dom, 1, 31)
|
||||||
|
self._months = _parse_field(month, 1, 12, _MONTH_NAMES)
|
||||||
|
dow_values = _parse_field(dow, 0, 7, _DAY_NAMES)
|
||||||
|
# 7 is an alias for Sunday (= 0)
|
||||||
|
self._dow = {0 if v == 7 else v for v in dow_values}
|
||||||
|
|
||||||
|
# Vixie-cron rule: dom/dow are OR-combined when both are restricted.
|
||||||
|
self._dom_restricted = dom != "*"
|
||||||
|
self._dow_restricted = dow != "*"
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f"CronSchedule({self.expression!r})"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _day_matches(self, day: "datetime.date") -> bool:
|
||||||
|
if day.month not in self._months:
|
||||||
|
return False
|
||||||
|
dom_ok = day.day in self._dom
|
||||||
|
# Python: Monday=0 … Sunday=6 -> cron: Sunday=0 … Saturday=6
|
||||||
|
dow_ok = ((day.weekday() + 1) % 7) in self._dow
|
||||||
|
if self._dom_restricted and self._dow_restricted:
|
||||||
|
return dom_ok or dow_ok
|
||||||
|
if self._dom_restricted:
|
||||||
|
return dom_ok
|
||||||
|
if self._dow_restricted:
|
||||||
|
return dow_ok
|
||||||
|
return True
|
||||||
|
|
||||||
|
def next_after(self, dt: datetime) -> datetime:
|
||||||
|
"""
|
||||||
|
Returns the first matching time strictly after ``dt``
|
||||||
|
(second/microsecond precision is dropped).
|
||||||
|
"""
|
||||||
|
cand = (dt + timedelta(minutes=1)).replace(second=0, microsecond=0)
|
||||||
|
hours = sorted(self._hours)
|
||||||
|
minutes = sorted(self._minutes)
|
||||||
|
|
||||||
|
# Walk day by day (covers rare dom/month combos like Feb 29).
|
||||||
|
for _ in range(366 * 5):
|
||||||
|
if self._day_matches(cand.date()):
|
||||||
|
for h in hours:
|
||||||
|
if h < cand.hour:
|
||||||
|
continue
|
||||||
|
for m in minutes:
|
||||||
|
if h == cand.hour and m < cand.minute:
|
||||||
|
continue
|
||||||
|
return cand.replace(hour=h, minute=m)
|
||||||
|
cand = (cand + timedelta(days=1)).replace(hour=0, minute=0)
|
||||||
|
|
||||||
|
raise ValueError(
|
||||||
|
f"cron {self.expression!r}: no occurrence within 5 years")
|
||||||
@@ -0,0 +1,272 @@
|
|||||||
|
"""
|
||||||
|
kavita_client.py
|
||||||
|
================
|
||||||
|
|
||||||
|
Thin HTTP client for the Kavita server REST API (v0.9.x).
|
||||||
|
|
||||||
|
Authenticates via the ``x-api-key`` header. All series / library /
|
||||||
|
collection / metadata reads and writes used by the light-novel updater
|
||||||
|
go through this single client so request shaping (paging, content types,
|
||||||
|
timeouts, retries) is consistent.
|
||||||
|
|
||||||
|
The class is intentionally state-light: no caching layer, just one
|
||||||
|
``requests.Session``. Higher-level diff / update logic lives in
|
||||||
|
KavitaSeriesUpdater, KavitaPersonUpdater and RelationshipSync.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
class KavitaClient:
|
||||||
|
def __init__(self, base_url: str, api_key: str, *,
|
||||||
|
request_timeout: int = 30):
|
||||||
|
self._base = base_url.rstrip("/")
|
||||||
|
self._timeout = request_timeout
|
||||||
|
|
||||||
|
# API session: sends + receives JSON.
|
||||||
|
self._session = requests.Session()
|
||||||
|
self._session.headers.update({
|
||||||
|
"x-api-key": api_key,
|
||||||
|
"Accept": "application/json",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Plain session for downloading external images (covers). Must NOT
|
||||||
|
# carry the API headers — some CDNs refuse to return image bytes
|
||||||
|
# when the client sends Accept: application/json.
|
||||||
|
self._image_session = requests.Session()
|
||||||
|
self._image_session.headers.update({
|
||||||
|
"User-Agent": "KavitaLightNovelUpdater/1.0",
|
||||||
|
})
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Libraries
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def list_libraries(self) -> list[dict]:
|
||||||
|
"""Returns all libraries the authenticated user can access."""
|
||||||
|
r = self._session.get(f"{self._base}/api/Library/libraries",
|
||||||
|
timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Series
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def list_series_in_library(self, library_id: int, *,
|
||||||
|
page_size: int = 200) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Returns all SeriesDto entries in the given library.
|
||||||
|
|
||||||
|
Uses POST /api/Series/all-v2 with a FilterV2 that scopes by
|
||||||
|
library id. Pages through until an empty page is returned.
|
||||||
|
"""
|
||||||
|
results: list[dict] = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
body = {
|
||||||
|
"statements": [
|
||||||
|
{
|
||||||
|
"comparison": 0, # Equal
|
||||||
|
"field": 19, # Libraries field id (Kavita v0.9.x)
|
||||||
|
"value": str(library_id),
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"combination": 1, # And
|
||||||
|
"sortOptions": {"isAscending": True, "sortField": 1},
|
||||||
|
"limitTo": 0,
|
||||||
|
}
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}/api/Series/all-v2",
|
||||||
|
params={"PageNumber": page, "PageSize": page_size},
|
||||||
|
json=body, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
chunk = r.json() or []
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
results.extend(chunk)
|
||||||
|
if len(chunk) < page_size:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_series(self, series_id: int) -> dict:
|
||||||
|
"""Returns the SeriesDto for the given series id."""
|
||||||
|
r = self._session.get(f"{self._base}/api/Series/{series_id}",
|
||||||
|
timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or {}
|
||||||
|
|
||||||
|
def update_series(self, series: dict) -> None:
|
||||||
|
"""Updates the Series-level data (name, sortName, malId, …)."""
|
||||||
|
r = self._session.post(f"{self._base}/api/Series/update",
|
||||||
|
json=series, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Series metadata
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get_series_metadata(self, series_id: int) -> dict:
|
||||||
|
"""Returns the SeriesMetadataDto for a series."""
|
||||||
|
r = self._session.get(
|
||||||
|
f"{self._base}/api/Series/metadata",
|
||||||
|
params={"seriesId": series_id}, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or {}
|
||||||
|
|
||||||
|
def update_series_metadata(self, metadata: dict) -> None:
|
||||||
|
"""
|
||||||
|
Writes a SeriesMetadataDto back to Kavita.
|
||||||
|
|
||||||
|
Kavita expects the payload wrapped: {seriesMetadata: {...}}.
|
||||||
|
"""
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}/api/Series/metadata",
|
||||||
|
json={"seriesMetadata": metadata},
|
||||||
|
timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Related series
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get_related(self, series_id: int) -> dict:
|
||||||
|
"""Returns all related series grouped by relation type."""
|
||||||
|
r = self._session.get(
|
||||||
|
f"{self._base}/api/Series/all-related",
|
||||||
|
params={"seriesId": series_id}, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or {}
|
||||||
|
|
||||||
|
def update_related(self, payload: dict) -> None:
|
||||||
|
"""
|
||||||
|
Sets the related-series relationships for a series.
|
||||||
|
|
||||||
|
Payload shape (UpdateRelatedSeriesDto):
|
||||||
|
{seriesId, prequels, sequels, sideStories, spinOffs,
|
||||||
|
adaptations, characters, contains, others,
|
||||||
|
alternativeSettings, alternativeVersions, doujinshis,
|
||||||
|
editions, annuals}
|
||||||
|
Each *_ids list contains target series ids (ints).
|
||||||
|
"""
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}/api/Series/update-related",
|
||||||
|
json=payload, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Collections
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def list_collections(self) -> list[dict]:
|
||||||
|
"""Returns all collection tags visible to the authenticated user."""
|
||||||
|
r = self._session.get(
|
||||||
|
f"{self._base}/api/Collection",
|
||||||
|
params={"ownedOnly": "false", "sortByLastModified": "false"},
|
||||||
|
timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or []
|
||||||
|
|
||||||
|
def add_series_to_collection(self, *, collection_id: int,
|
||||||
|
title: str,
|
||||||
|
series_ids: Iterable[int]) -> dict:
|
||||||
|
"""
|
||||||
|
Adds (or creates) a collection and attaches series to it.
|
||||||
|
|
||||||
|
Pass collection_id=0 to create a new collection named `title`.
|
||||||
|
For an existing collection set collection_id to its id (title is
|
||||||
|
still required by the API but acts as no-op when the id matches).
|
||||||
|
"""
|
||||||
|
body = {
|
||||||
|
"collectionTagId": int(collection_id),
|
||||||
|
"collectionTagTitle": title,
|
||||||
|
"seriesIds": [int(s) for s in series_ids],
|
||||||
|
}
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}/api/Collection/update-for-series",
|
||||||
|
json=body, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
try:
|
||||||
|
return r.json() or {}
|
||||||
|
except ValueError:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Persons
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def search_persons(self, name: str) -> list[dict]:
|
||||||
|
"""Returns PersonDto entries matching `name` (Kavita's own search)."""
|
||||||
|
r = self._session.get(
|
||||||
|
f"{self._base}/api/Person/search",
|
||||||
|
params={"queryString": name}, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json() or []
|
||||||
|
|
||||||
|
def update_person(self, payload: dict) -> None:
|
||||||
|
"""Writes a person record (malId, aniListId, description, …)."""
|
||||||
|
r = self._session.post(f"{self._base}/api/Person/update",
|
||||||
|
json=payload, timeout=self._timeout)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cover uploads
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def upload_series_cover(self, series_id: int, image_url: str, *,
|
||||||
|
lock: bool = False) -> None:
|
||||||
|
"""Downloads an external image and uploads it as the series cover."""
|
||||||
|
self._upload_cover("/api/Upload/series", series_id, image_url, lock)
|
||||||
|
|
||||||
|
def upload_person_cover(self, person_id: int, image_url: str, *,
|
||||||
|
lock: bool = False) -> None:
|
||||||
|
"""Downloads an external image and uploads it as a person cover."""
|
||||||
|
self._upload_cover("/api/Upload/person", person_id, image_url, lock)
|
||||||
|
|
||||||
|
def _upload_cover(self, endpoint: str, entity_id: int,
|
||||||
|
image_url: str, lock: bool) -> None:
|
||||||
|
"""
|
||||||
|
Shared cover-upload path. Kavita's upload endpoints accept a raw
|
||||||
|
base64 blob (no ``data:`` prefix) in the ``url`` field — a data
|
||||||
|
URI or the two-step upload-by-url flow are rejected with HTTP 400
|
||||||
|
(verified against Kavita 0.9.0.2).
|
||||||
|
"""
|
||||||
|
img = self._image_session.get(image_url, timeout=self._timeout)
|
||||||
|
img.raise_for_status()
|
||||||
|
b64 = base64.b64encode(img.content).decode()
|
||||||
|
r = self._session.post(
|
||||||
|
f"{self._base}{endpoint}",
|
||||||
|
json={"id": entity_id, "url": b64, "lockCover": lock},
|
||||||
|
timeout=self._timeout)
|
||||||
|
if r.status_code >= 400:
|
||||||
|
# Include the body excerpt — Kavita's upload errors carry the
|
||||||
|
# actual reason there, not in the status line.
|
||||||
|
raise requests.HTTPError(
|
||||||
|
f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
|
||||||
|
response=r)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Generic GET helper (used by callers that need a response object)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get(self, path: str, params: "dict | None" = None) -> requests.Response:
|
||||||
|
return self._session.get(f"{self._base}{path}",
|
||||||
|
params=params, timeout=self._timeout)
|
||||||
|
|
||||||
|
def post(self, path: str, *,
|
||||||
|
json: "dict | list | None" = None,
|
||||||
|
params: "dict | None" = None) -> requests.Response:
|
||||||
|
return self._session.post(f"{self._base}{path}",
|
||||||
|
json=json, params=params,
|
||||||
|
timeout=self._timeout)
|
||||||
|
|
||||||
|
|
||||||
|
def _short_body(resp: requests.Response, limit: int = 400) -> str:
|
||||||
|
"""Returns the response body trimmed to `limit` chars for error messages."""
|
||||||
|
try:
|
||||||
|
text = resp.text or ""
|
||||||
|
except Exception:
|
||||||
|
return "<unreadable response body>"
|
||||||
|
text = text.strip().replace("\n", " ").replace("\r", " ")
|
||||||
|
if len(text) > limit:
|
||||||
|
text = text[:limit] + "…"
|
||||||
|
return text or "<empty body>"
|
||||||
+65
-175
@@ -15,46 +15,22 @@ the updater:
|
|||||||
an 'about' text (requires an extra Jikan request per character; only
|
an 'about' text (requires an extra Jikan request per character; only
|
||||||
performed when update_descriptions=True).
|
performed when update_descriptions=True).
|
||||||
|
|
||||||
Kavita API version
|
All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
|
||||||
------------------
|
(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
|
||||||
|
|
||||||
Tested against Kavita 0.9.0.2.
|
Tested against Kavita 0.9.0.2.
|
||||||
|
|
||||||
Authentication
|
|
||||||
--------------
|
|
||||||
Uses the `x-api-key` header (API key from Kavita user settings).
|
|
||||||
No JWT login is required.
|
|
||||||
|
|
||||||
Relevant endpoints (Kavita 0.9.0.2)
|
|
||||||
-------------------------------------
|
|
||||||
GET /api/Person/search find persons by name / alias
|
|
||||||
POST /api/Person/update write metadata (malId, description, …)
|
|
||||||
POST /api/Upload/person set cover image (base64 data URI)
|
|
||||||
POST /api/Upload/upload-by-url download an external URL to temp storage
|
|
||||||
(used as an alternative upload path)
|
|
||||||
|
|
||||||
Cover upload flow
|
|
||||||
-----------------
|
|
||||||
The image is downloaded locally, base64-encoded, and sent as a data URI
|
|
||||||
to POST /api/Upload/person. This is more reliable than the
|
|
||||||
upload-by-url → upload/person two-step because it avoids Kavita's temp
|
|
||||||
file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
|
|
||||||
|
|
||||||
Dependencies
|
|
||||||
------------
|
|
||||||
requests -> pip install requests
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import re
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
|
from TextUtils import best_similarity, paragraphs_to_html, person_name_with_id
|
||||||
|
|
||||||
|
|
||||||
class KavitaPersonUpdater:
|
class KavitaPersonUpdater:
|
||||||
@@ -63,41 +39,22 @@ class KavitaPersonUpdater:
|
|||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
|
client : Shared KavitaClient (session, auth, cover uploads)
|
||||||
api_key : Kavita API key (Settings → User → API key)
|
|
||||||
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
mal_resolver : Shared MALResolver singleton (created automatically if omitted)
|
||||||
request_timeout : HTTP timeout in seconds for both Kavita and image requests
|
al_resolver : Shared AniListResolver singleton (created automatically if omitted)
|
||||||
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
min_name_score : Minimum difflib similarity ratio (0–1) required to accept a
|
||||||
Kavita person as a match for a MAL name. Default 0.80.
|
Kavita person as a match for a MAL name. Default 0.80.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, kavita_base_url: str, api_key: str, *,
|
def __init__(self, client: KavitaClient, *,
|
||||||
mal_resolver: "MALResolver | None" = None,
|
mal_resolver: "MALResolver | None" = None,
|
||||||
al_resolver: "AniListResolver | None" = None,
|
al_resolver: "AniListResolver | None" = None,
|
||||||
request_timeout: int = 30,
|
|
||||||
min_name_score: float = 0.80):
|
min_name_score: float = 0.80):
|
||||||
self._base = kavita_base_url.rstrip("/")
|
self._client = client
|
||||||
self._timeout = request_timeout
|
|
||||||
self._min_score = min_name_score
|
self._min_score = min_name_score
|
||||||
self._mal = mal_resolver or MALResolver()
|
self._mal = mal_resolver or MALResolver()
|
||||||
self._al = al_resolver or AniListResolver()
|
self._al = al_resolver or AniListResolver()
|
||||||
|
|
||||||
# Session used for Kavita API calls.
|
|
||||||
self._session = requests.Session()
|
|
||||||
self._session.headers.update({
|
|
||||||
"x-api-key": api_key,
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Accept": "application/json",
|
|
||||||
})
|
|
||||||
|
|
||||||
# Plain session used to download external images (MAL CDN etc.).
|
|
||||||
# Must NOT carry the Kavita API headers — Accept: application/json
|
|
||||||
# would prevent MAL CDN from returning the image bytes.
|
|
||||||
self._image_session = requests.Session()
|
|
||||||
self._image_session.headers.update({
|
|
||||||
"User-Agent": "KavitaPersonUpdater/1.0",
|
|
||||||
})
|
|
||||||
|
|
||||||
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
# Cache: normalised name -> list of PersonDto dicts (best matches first)
|
||||||
self._person_search_cache: dict[str, list[dict]] = {}
|
self._person_search_cache: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
@@ -195,11 +152,28 @@ class KavitaPersonUpdater:
|
|||||||
if not name and not raw_name:
|
if not name and not raw_name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Search by the cleaned (XML-safe) name first; if Kavita stores
|
if kind == "character":
|
||||||
|
# Characters are stored under their disambiguated name
|
||||||
|
# ("Rem (MAL 118737)") — see person_name_with_id. The
|
||||||
|
# series metadata write creates the person under exactly
|
||||||
|
# this name, so only that form is searched.
|
||||||
|
search_names = [person_name_with_id(
|
||||||
|
name, mal_id=entry.get("mal_id"),
|
||||||
|
al_id=entry.get("al_id"))]
|
||||||
|
else:
|
||||||
|
# Staff: cleaned (XML-safe) name first; if Kavita stores
|
||||||
# the legacy comma form, retry with the raw MAL name.
|
# the legacy comma form, retry with the raw MAL name.
|
||||||
matches = self._find_kavita_person(name) if name else []
|
search_names = [name]
|
||||||
if not matches and raw_name and raw_name != name:
|
if raw_name and raw_name != name:
|
||||||
matches = self._find_kavita_person(raw_name)
|
search_names.append(raw_name)
|
||||||
|
|
||||||
|
matches: list[dict] = []
|
||||||
|
for search_name in search_names:
|
||||||
|
if not search_name:
|
||||||
|
continue
|
||||||
|
matches = self._find_kavita_person(search_name)
|
||||||
|
if matches:
|
||||||
|
break
|
||||||
|
|
||||||
if not matches:
|
if not matches:
|
||||||
result["not_found"] += 1
|
result["not_found"] += 1
|
||||||
@@ -230,29 +204,17 @@ class KavitaPersonUpdater:
|
|||||||
return self._person_search_cache[key]
|
return self._person_search_cache[key]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = self._session.get(
|
persons = self._client.search_persons(name)
|
||||||
f"{self._base}/api/Person/search",
|
|
||||||
params={"queryString": name},
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
persons: list[dict] = resp.json() or []
|
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
self._person_search_cache[key] = []
|
self._person_search_cache[key] = []
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def score(p: dict) -> float:
|
scored = []
|
||||||
candidates = [p.get("name") or ""]
|
for p in persons:
|
||||||
candidates += [a for a in (p.get("aliases") or []) if a]
|
candidates = [p.get("name")] + list(p.get("aliases") or [])
|
||||||
best = 0.0
|
scored.append((best_similarity(key, candidates), p))
|
||||||
q = key
|
scored.sort(key=lambda pair: pair[0], reverse=True)
|
||||||
for c in candidates:
|
filtered = [p for score, p in scored if score >= self._min_score]
|
||||||
r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
|
|
||||||
best = max(best, r)
|
|
||||||
return best
|
|
||||||
|
|
||||||
ranked = sorted(persons, key=score, reverse=True)
|
|
||||||
filtered = [p for p in ranked if score(p) >= self._min_score]
|
|
||||||
self._person_search_cache[key] = filtered
|
self._person_search_cache[key] = filtered
|
||||||
return filtered
|
return filtered
|
||||||
|
|
||||||
@@ -289,6 +251,20 @@ class KavitaPersonUpdater:
|
|||||||
|
|
||||||
current_mal_id: int = person.get("malId") or 0
|
current_mal_id: int = person.get("malId") or 0
|
||||||
current_al_id: int = person.get("aniListId") or 0
|
current_al_id: int = person.get("aniListId") or 0
|
||||||
|
|
||||||
|
# Collision guard: the Kavita person is already linked to a
|
||||||
|
# *different* tracker entity — same display name, different
|
||||||
|
# character/person. Never overwrite; first writer wins.
|
||||||
|
if ((mal_id and current_mal_id and current_mal_id != mal_id)
|
||||||
|
or (al_id and current_al_id and current_al_id != al_id)):
|
||||||
|
if errors is not None:
|
||||||
|
errors.append(
|
||||||
|
f"conflict: '{person_name}' (#{person_id}) is linked to "
|
||||||
|
f"malId={current_mal_id or '-'}/aniListId={current_al_id or '-'} "
|
||||||
|
f"but this entry has malId={mal_id or '-'}/aniListId={al_id or '-'} "
|
||||||
|
f"— skipped")
|
||||||
|
return False
|
||||||
|
|
||||||
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
|
needs_mal_id = bool(mal_id and current_mal_id != mal_id)
|
||||||
needs_al_id = bool(al_id and current_al_id != al_id)
|
needs_al_id = bool(al_id and current_al_id != al_id)
|
||||||
|
|
||||||
@@ -323,12 +299,7 @@ class KavitaPersonUpdater:
|
|||||||
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
"aniListId": al_id if needs_al_id else (current_al_id or None),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
resp = self._session.post(
|
self._client.update_person(payload)
|
||||||
f"{self._base}/api/Person/update",
|
|
||||||
json=payload,
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
changed = True
|
changed = True
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
if errors is not None:
|
if errors is not None:
|
||||||
@@ -350,88 +321,21 @@ class KavitaPersonUpdater:
|
|||||||
and bool(person.get("coverImage"))
|
and bool(person.get("coverImage"))
|
||||||
)
|
)
|
||||||
if image_url and not already_uploaded:
|
if image_url and not already_uploaded:
|
||||||
if self._upload_cover(person_id, image_url,
|
try:
|
||||||
person_name=person_name,
|
self._client.upload_person_cover(person_id, image_url)
|
||||||
errors=errors):
|
|
||||||
changed = True
|
changed = True
|
||||||
|
except requests.RequestException as e:
|
||||||
|
if errors is not None:
|
||||||
|
errors.append(
|
||||||
|
f"cover upload failed for #{person_id} "
|
||||||
|
f"'{person_name}' ({image_url}): {e}")
|
||||||
|
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Internal: cover upload
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def _upload_cover(self, person_id: int, image_url: str,
|
|
||||||
lock: bool = False, *,
|
|
||||||
person_name: str = "",
|
|
||||||
errors: "list | None" = None) -> bool:
|
|
||||||
"""
|
|
||||||
Uploads a cover image to a Kavita person.
|
|
||||||
|
|
||||||
The image is downloaded with the plain (header-less) image session
|
|
||||||
and posted to `POST /api/Upload/person` as a raw base64 string in
|
|
||||||
the `url` field.
|
|
||||||
|
|
||||||
Notes on protocol quirks discovered against Kavita 0.9.0.2:
|
|
||||||
- The two-step `upload-by-url` -> `Upload/person` flow returns
|
|
||||||
"Unable to save cover image to Person" (HTTP 400).
|
|
||||||
- A `data:image/jpeg;base64,...` data URI is rejected with the
|
|
||||||
same error.
|
|
||||||
- Only the raw base64 blob (no prefix) is accepted.
|
|
||||||
"""
|
|
||||||
label = (f"#{person_id} '{person_name}'"
|
|
||||||
if person_name else f"#{person_id}")
|
|
||||||
|
|
||||||
# 1) Download the image with a clean session — the Kavita session's
|
|
||||||
# `Accept: application/json` header makes some CDNs refuse to
|
|
||||||
# return image bytes.
|
|
||||||
try:
|
|
||||||
img_resp = self._image_session.get(image_url,
|
|
||||||
timeout=self._timeout)
|
|
||||||
img_resp.raise_for_status()
|
|
||||||
except requests.RequestException as e:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"image download failed for {label} ({image_url}): {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
b64 = base64.b64encode(img_resp.content).decode()
|
|
||||||
|
|
||||||
# 2) POST the raw base64 blob.
|
|
||||||
try:
|
|
||||||
resp = self._session.post(
|
|
||||||
f"{self._base}/api/Upload/person",
|
|
||||||
json={"id": person_id, "url": b64, "lockCover": lock},
|
|
||||||
timeout=self._timeout,
|
|
||||||
)
|
|
||||||
if resp.status_code >= 400:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"Upload/person HTTP {resp.status_code} for {label}: "
|
|
||||||
f"{_short_body(resp)}")
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
except requests.RequestException as e:
|
|
||||||
if errors is not None:
|
|
||||||
errors.append(
|
|
||||||
f"Upload/person failed for {label}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Module helpers: description builders
|
# Module helpers: description builders
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def _plain_to_html(text: str) -> str:
|
|
||||||
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
|
||||||
if not text:
|
|
||||||
return ""
|
|
||||||
parts: list[str] = []
|
|
||||||
for para in re.split(r"\n{2,}", text.strip()):
|
|
||||||
para = para.strip()
|
|
||||||
if para:
|
|
||||||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
|
||||||
return "".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def _format_birthday(birthday: str) -> str:
|
def _format_birthday(birthday: str) -> str:
|
||||||
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
"""Converts an ISO 8601 birthday string to "D Month YYYY"."""
|
||||||
if not birthday:
|
if not birthday:
|
||||||
@@ -457,7 +361,7 @@ def _build_character_description(details: dict) -> str:
|
|||||||
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
|
||||||
about = (details.get("about") or "").strip()
|
about = (details.get("about") or "").strip()
|
||||||
if about:
|
if about:
|
||||||
parts.append(_plain_to_html(about))
|
parts.append(paragraphs_to_html(about))
|
||||||
return "<br>".join(parts)
|
return "<br>".join(parts)
|
||||||
|
|
||||||
|
|
||||||
@@ -501,33 +405,19 @@ def _build_person_description(details: dict) -> str:
|
|||||||
parts.append(f'<table>{"".join(rows)}</table>')
|
parts.append(f'<table>{"".join(rows)}</table>')
|
||||||
about = (details.get("about") or "").strip()
|
about = (details.get("about") or "").strip()
|
||||||
if about:
|
if about:
|
||||||
parts.append(_plain_to_html(about))
|
parts.append(paragraphs_to_html(about))
|
||||||
return "<br>".join(parts)
|
return "<br>".join(parts)
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
|
||||||
# Module helper
|
|
||||||
# --------------------------------------------------------------------------
|
|
||||||
def _short_body(resp: requests.Response, limit: int = 400) -> str:
|
|
||||||
"""Returns the response body trimmed to `limit` chars for error logging."""
|
|
||||||
try:
|
|
||||||
text = resp.text or ""
|
|
||||||
except Exception:
|
|
||||||
return "<unreadable response body>"
|
|
||||||
text = text.strip().replace("\n", " ").replace("\r", " ")
|
|
||||||
if len(text) > limit:
|
|
||||||
text = text[:limit] + "…"
|
|
||||||
return text or "<empty body>"
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Usage example
|
# Usage example
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
KAVITA_URL = "http://192.168.2.2:5000"
|
import os
|
||||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
|
||||||
|
|
||||||
updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
|
client = KavitaClient(os.environ["KAVITA_URL"],
|
||||||
|
os.environ["KAVITA_API_KEY"])
|
||||||
|
updater = KavitaPersonUpdater(client)
|
||||||
|
|
||||||
mal = MALResolver()
|
mal = MALResolver()
|
||||||
mal_id = mal.find_mal_id("よふかしのうた")
|
mal_id = mal.find_mal_id("よふかしのうた")
|
||||||
|
|||||||
+17
-15
@@ -30,12 +30,12 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import difflib
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from MediaResolver import MediaResolver
|
from MediaResolver import MediaResolver
|
||||||
|
from TextUtils import best_similarity
|
||||||
|
|
||||||
|
|
||||||
class MALResolver(MediaResolver):
|
class MALResolver(MediaResolver):
|
||||||
@@ -57,12 +57,21 @@ class MALResolver(MediaResolver):
|
|||||||
cls._instance._initialized = False
|
cls._instance._initialized = False
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def __init__(self, *, request_timeout: int = 30):
|
def __init__(self, *, request_timeout: int = 30,
|
||||||
|
search_type: str = "manga"):
|
||||||
|
"""
|
||||||
|
search_type : Jikan `type` filter for title searches — "manga" for
|
||||||
|
the manga container, "lightnovel" for the LN container.
|
||||||
|
Only the FIRST construction in the process sets it
|
||||||
|
(singleton); construct the resolver with the correct
|
||||||
|
type in the entry point / orchestrator.
|
||||||
|
"""
|
||||||
if self._initialized:
|
if self._initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.JIKAN_BASE = "https://api.jikan.moe/v4"
|
self.JIKAN_BASE = "https://api.jikan.moe/v4"
|
||||||
self.request_timeout = request_timeout
|
self.request_timeout = request_timeout
|
||||||
|
self.search_type = search_type
|
||||||
|
|
||||||
self._session = requests.Session()
|
self._session = requests.Session()
|
||||||
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
|
self._session.headers.setdefault("User-Agent", "MALResolver/1.0")
|
||||||
@@ -106,7 +115,7 @@ class MALResolver(MediaResolver):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
data = self._get(f"{self.JIKAN_BASE}/manga",
|
data = self._get(f"{self.JIKAN_BASE}/manga",
|
||||||
{"q": title, "limit": 5, "type": "manga"})
|
{"q": title, "limit": 5, "type": self.search_type})
|
||||||
results = data.get("data") or []
|
results = data.get("data") or []
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
return None
|
return None
|
||||||
@@ -404,19 +413,12 @@ def _clean_mal_name(name: str) -> str:
|
|||||||
def _score_title(query: str, entry: dict) -> float:
|
def _score_title(query: str, entry: dict) -> float:
|
||||||
"""Returns the best title-similarity score for a Jikan manga entry."""
|
"""Returns the best title-similarity score for a Jikan manga entry."""
|
||||||
candidates = [
|
candidates = [
|
||||||
entry.get("title") or "",
|
entry.get("title"),
|
||||||
entry.get("title_english") or "",
|
entry.get("title_english"),
|
||||||
entry.get("title_japanese") or "",
|
entry.get("title_japanese"),
|
||||||
]
|
]
|
||||||
for alt in (entry.get("titles") or []):
|
candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
|
||||||
candidates.append(alt.get("title") or "")
|
return best_similarity(query, candidates)
|
||||||
best = 0.0
|
|
||||||
q = query.lower()
|
|
||||||
for t in candidates:
|
|
||||||
if t:
|
|
||||||
ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
|
|
||||||
best = max(best, ratio)
|
|
||||||
return best
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""
|
||||||
|
mangabaka_rate_limit.py
|
||||||
|
=======================
|
||||||
|
|
||||||
|
Process-wide rate limiter for the MangaBaka API.
|
||||||
|
|
||||||
|
Apply via:
|
||||||
|
|
||||||
|
from MangaBakaRateLimit import apply_to_session
|
||||||
|
apply_to_session(session)
|
||||||
|
|
||||||
|
This mounts a custom ``requests.adapters.HTTPAdapter`` on the given
|
||||||
|
``requests.Session`` for the ``api.mangabaka.dev`` host. Every request
|
||||||
|
going through that adapter is:
|
||||||
|
|
||||||
|
* throttled so that no two requests are dispatched within
|
||||||
|
``_MIN_INTERVAL`` seconds of one another, and
|
||||||
|
* retried on HTTP 429, honouring the ``Retry-After`` header when
|
||||||
|
present, otherwise exponential backoff capped at ``_MAX_BACKOFF``.
|
||||||
|
|
||||||
|
Throttle state is module-global, so even if several sessions exist in
|
||||||
|
the same process they share one budget — important because they all hit
|
||||||
|
the same upstream IP-based limit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
|
|
||||||
|
# Tune these if MangaBaka tightens or loosens limits.
|
||||||
|
_MIN_INTERVAL = 1.1 # seconds between consecutive requests
|
||||||
|
_MAX_RETRIES = 6 # retries on 429 before giving up
|
||||||
|
_MAX_BACKOFF = 60.0 # cap on per-attempt backoff sleep
|
||||||
|
|
||||||
|
|
||||||
|
# --- shared throttle state --------------------------------------------------
|
||||||
|
_state_lock = threading.Lock()
|
||||||
|
_last_request_time = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_for_slot() -> None:
|
||||||
|
"""Block until the next request slot is available, then reserve it."""
|
||||||
|
global _last_request_time
|
||||||
|
while True:
|
||||||
|
with _state_lock:
|
||||||
|
now = time.monotonic()
|
||||||
|
wait = _MIN_INTERVAL - (now - _last_request_time)
|
||||||
|
if wait <= 0:
|
||||||
|
_last_request_time = now
|
||||||
|
return
|
||||||
|
time.sleep(wait)
|
||||||
|
|
||||||
|
|
||||||
|
class _MangaBakaRateLimitAdapter(HTTPAdapter):
|
||||||
|
def send(self, request, **kwargs):
|
||||||
|
response = None
|
||||||
|
for attempt in range(_MAX_RETRIES + 1):
|
||||||
|
_wait_for_slot()
|
||||||
|
response = super().send(request, **kwargs)
|
||||||
|
if response.status_code != 429:
|
||||||
|
return response
|
||||||
|
|
||||||
|
retry_after = response.headers.get("Retry-After")
|
||||||
|
try:
|
||||||
|
wait = (float(retry_after) if retry_after
|
||||||
|
else min(_MAX_BACKOFF, 2.0 * (2 ** attempt)))
|
||||||
|
except ValueError:
|
||||||
|
wait = min(_MAX_BACKOFF, 2.0 * (2 ** attempt))
|
||||||
|
|
||||||
|
print(f"[MangaBaka] 429 — backing off {wait:.1f}s "
|
||||||
|
f"(attempt {attempt + 1}/{_MAX_RETRIES})",
|
||||||
|
flush=True)
|
||||||
|
response.close()
|
||||||
|
time.sleep(wait)
|
||||||
|
|
||||||
|
# Retries exhausted — let the caller deal with the last 429.
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def apply_to_session(session) -> None:
|
||||||
|
"""
|
||||||
|
Mount the rate-limit adapter on ``session`` so every MangaBaka call
|
||||||
|
is automatically throttled. Safe to call multiple times (later mounts
|
||||||
|
just replace the earlier adapter for the same prefix).
|
||||||
|
"""
|
||||||
|
adapter = _MangaBakaRateLimitAdapter()
|
||||||
|
session.mount("https://api.mangabaka.dev/", adapter)
|
||||||
|
session.mount("http://api.mangabaka.dev/", adapter)
|
||||||
+186
-80
@@ -2,7 +2,7 @@
|
|||||||
mangabaka_works_resolver.py
|
mangabaka_works_resolver.py
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
Fetches volume-level (work) data from the MangaBaka API.
|
Fetches volume-level (work) data and volume cover images from the MangaBaka API.
|
||||||
|
|
||||||
Each "work" is a physical tankobon volume and may carry:
|
Each "work" is a physical tankobon volume and may carry:
|
||||||
- volume number
|
- volume number
|
||||||
@@ -11,10 +11,16 @@ Each "work" is a physical tankobon volume and may carry:
|
|||||||
- release date
|
- release date
|
||||||
- cover image (raw / default / small variants)
|
- cover image (raw / default / small variants)
|
||||||
|
|
||||||
Only works that have a usable cover are kept in the cache.
|
Cover resolution order (per volume)
|
||||||
Works without a cover are discarded at fetch time.
|
------------------------------------
|
||||||
If no volume is assigned for a chapter, callers fall back to the
|
1. GET /v1/series/{id}/images — covers that exist independently of a work
|
||||||
default series cover from the series object itself.
|
(some series have covers but no works). English edition preferred;
|
||||||
|
original language used when no English cover is available.
|
||||||
|
2. GET /v1/series/{id}/works — physical tankobon data including covers.
|
||||||
|
Fallback when /images returns nothing for the requested volume.
|
||||||
|
|
||||||
|
If no volume cover is found at all, callers fall back to the series-level
|
||||||
|
default cover from the series object itself.
|
||||||
|
|
||||||
Dependencies
|
Dependencies
|
||||||
------------
|
------------
|
||||||
@@ -26,10 +32,75 @@ from __future__ import annotations
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Generic image-block URL picker (shared by /images and /works responses)
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
def _pick_image_url(image) -> "str | None":
|
||||||
|
"""
|
||||||
|
Returns the best URL from a MangaBaka image block.
|
||||||
|
|
||||||
|
Handles the common ``{raw, x150, x250, x350}`` structure used by both
|
||||||
|
the ``cover`` field on series/work objects and the ``image`` field on
|
||||||
|
``/images`` endpoint items::
|
||||||
|
|
||||||
|
{
|
||||||
|
"raw": {"url": "...", "size": ..., "height": ..., "width": ...},
|
||||||
|
"x150": {"x1": "...", "x2": "...", "x3": "..."},
|
||||||
|
"x250": {...},
|
||||||
|
"x350": {...}
|
||||||
|
}
|
||||||
|
|
||||||
|
Preference: raw original > x350@x3 > x250@x3 > x150@x3 > … (falling
|
||||||
|
through to lower densities and sizes as needed).
|
||||||
|
"""
|
||||||
|
if not image:
|
||||||
|
return None
|
||||||
|
if isinstance(image, str):
|
||||||
|
return image
|
||||||
|
if not isinstance(image, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 1) Raw / unscaled image
|
||||||
|
raw = image.get("raw")
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
url = raw.get("url")
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
return url
|
||||||
|
elif isinstance(raw, str) and raw:
|
||||||
|
return raw
|
||||||
|
|
||||||
|
# 2) Size-keyed CDN variants, largest first, highest density first
|
||||||
|
for size_key in ("x350", "x250", "x150"):
|
||||||
|
variant = image.get(size_key)
|
||||||
|
if isinstance(variant, dict):
|
||||||
|
for density in ("x3", "x2", "x1"):
|
||||||
|
url = variant.get(density)
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
return url
|
||||||
|
elif isinstance(variant, str) and variant:
|
||||||
|
return variant
|
||||||
|
|
||||||
|
# 3) Last-ditch: any HTTP URL anywhere in the structure
|
||||||
|
for val in image.values():
|
||||||
|
if isinstance(val, str) and val.startswith("http"):
|
||||||
|
return val
|
||||||
|
if isinstance(val, dict):
|
||||||
|
for sub_val in val.values():
|
||||||
|
if isinstance(sub_val, str) and sub_val.startswith("http"):
|
||||||
|
return sub_val
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class MangaBakaWorksResolver:
|
class MangaBakaWorksResolver:
|
||||||
"""
|
"""
|
||||||
Fetches and caches MangaBaka volume (work) data for a series.
|
Fetches and caches MangaBaka volume (work) data and cover images.
|
||||||
Only works that have a cover image are retained in the cache.
|
|
||||||
|
Cover lookup order per volume
|
||||||
|
------------------------------
|
||||||
|
1. ``/v1/series/{id}/images`` — edition covers (English > original).
|
||||||
|
2. ``/v1/series/{id}/works`` — physical tankobon covers.
|
||||||
|
|
||||||
|
Only works that carry a cover image are retained in the works cache.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
|
def __init__(self, api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
@@ -42,30 +113,24 @@ class MangaBakaWorksResolver:
|
|||||||
|
|
||||||
# Cache: series_id (str) -> list of work dicts (only those with covers)
|
# Cache: series_id (str) -> list of work dicts (only those with covers)
|
||||||
self._cache: dict[str, list[dict]] = {}
|
self._cache: dict[str, list[dict]] = {}
|
||||||
|
# Cache: series_id (str) -> {norm_vol (str): url (str)}
|
||||||
|
self._images_cache: dict[str, dict[str, str]] = {}
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public API
|
# Public API
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def get_works(self, series_id: str) -> list[dict]:
|
def _fetch_all_pages(self, endpoint: str) -> list[dict]:
|
||||||
"""
|
"""
|
||||||
Returns volume-level works for a series, filtered to those that have
|
Pages through a MangaBaka list endpoint (limit=50 per page) and
|
||||||
a usable cover image. Results are cached per series.
|
returns all collected `data` items. Network errors end the
|
||||||
|
pagination early; items fetched so far are returned.
|
||||||
Pages through the API (limit=50) until the response returns an empty
|
|
||||||
page, collecting all works before applying the cover filter.
|
|
||||||
"""
|
"""
|
||||||
if not series_id:
|
items: list[dict] = []
|
||||||
return []
|
|
||||||
|
|
||||||
if series_id in self._cache:
|
|
||||||
return self._cache[series_id]
|
|
||||||
|
|
||||||
all_works: list[dict] = []
|
|
||||||
page = 1
|
page = 1
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
resp = self._session.get(
|
resp = self._session.get(
|
||||||
f"{self.api_base_url}/series/{series_id}/works",
|
f"{self.api_base_url}/series/{endpoint}",
|
||||||
params={"limit": 50, "page": page},
|
params={"limit": 50, "page": page},
|
||||||
timeout=self.request_timeout,
|
timeout=self.request_timeout,
|
||||||
)
|
)
|
||||||
@@ -73,16 +138,34 @@ class MangaBakaWorksResolver:
|
|||||||
page_data = resp.json().get("data") or []
|
page_data = resp.json().get("data") or []
|
||||||
if not page_data:
|
if not page_data:
|
||||||
break
|
break
|
||||||
all_works.extend(page_data)
|
items.extend(page_data)
|
||||||
if len(page_data) < 50:
|
if len(page_data) < 50:
|
||||||
break
|
break
|
||||||
page += 1
|
page += 1
|
||||||
except requests.RequestException:
|
except requests.RequestException:
|
||||||
if not all_works:
|
pass
|
||||||
|
return items
|
||||||
|
|
||||||
|
def get_works(self, series_id: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Returns volume-level works for a series, filtered to those that have
|
||||||
|
a usable cover image.
|
||||||
|
|
||||||
|
Non-empty results are cached per series; empty results are not, so
|
||||||
|
works added on MangaBaka later become visible without restarting
|
||||||
|
the (long-running) process.
|
||||||
|
"""
|
||||||
|
if not series_id:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
if series_id in self._cache:
|
||||||
|
return self._cache[series_id]
|
||||||
|
|
||||||
|
all_works = self._fetch_all_pages(f"{series_id}/works")
|
||||||
|
|
||||||
# Discard works that carry no usable cover
|
# Discard works that carry no usable cover
|
||||||
works_with_cover = [w for w in all_works if w.get("images")]
|
works_with_cover = [w for w in all_works if w.get("images")]
|
||||||
|
if works_with_cover:
|
||||||
self._cache[series_id] = works_with_cover
|
self._cache[series_id] = works_with_cover
|
||||||
return works_with_cover
|
return works_with_cover
|
||||||
|
|
||||||
@@ -101,12 +184,85 @@ class MangaBakaWorksResolver:
|
|||||||
return work
|
return work
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
|
def get_volume_covers(self, series_id: str) -> "dict[str, str]":
|
||||||
"""Returns the cover URL for a specific volume, or None if not found."""
|
"""
|
||||||
work = self.get_work_for_volume(series_id, volume)
|
Fetches all volume-type cover images for a series from
|
||||||
if not work:
|
``/v1/series/{id}/images`` and returns a
|
||||||
|
``{normalised_volume_str: url}`` mapping.
|
||||||
|
|
||||||
|
English-edition covers are preferred; the first available language
|
||||||
|
is used as fallback when no English cover exists for a volume.
|
||||||
|
Results are cached per series.
|
||||||
|
"""
|
||||||
|
if not series_id:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
if series_id in self._images_cache:
|
||||||
|
return self._images_cache[series_id]
|
||||||
|
|
||||||
|
raw_items = self._fetch_all_pages(f"{series_id}/images")
|
||||||
|
|
||||||
|
# Group by normalised volume index; collect all languages per volume.
|
||||||
|
by_volume: dict[str, dict[str, str]] = {} # norm_vol -> {lang: url}
|
||||||
|
for item in raw_items:
|
||||||
|
if item.get("type") != "volume":
|
||||||
|
continue
|
||||||
|
idx = item.get("index_numeric")
|
||||||
|
if idx is None:
|
||||||
|
continue
|
||||||
|
norm = _norm_vol(idx)
|
||||||
|
lang = (item.get("language") or "").lower() or "unknown"
|
||||||
|
url = _pick_image_url(item.get("image"))
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
if norm not in by_volume:
|
||||||
|
by_volume[norm] = {}
|
||||||
|
# First entry per language wins (API order reflects quality/rank).
|
||||||
|
if lang not in by_volume[norm]:
|
||||||
|
by_volume[norm][lang] = url
|
||||||
|
|
||||||
|
# Pick best language per volume: English first, then first available.
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
for norm, lang_map in by_volume.items():
|
||||||
|
url = lang_map.get("en") or next(iter(lang_map.values()), None)
|
||||||
|
if url:
|
||||||
|
result[norm] = url
|
||||||
|
|
||||||
|
# Empty results are not cached — covers added on MangaBaka later
|
||||||
|
# become visible without restarting the long-running process.
|
||||||
|
if result:
|
||||||
|
self._images_cache[series_id] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_cover_for_volume_from_images(self, series_id: str,
|
||||||
|
volume) -> "str | None":
|
||||||
|
"""
|
||||||
|
Returns the cover URL for a specific volume from the /images endpoint,
|
||||||
|
or None if not available.
|
||||||
|
"""
|
||||||
|
covers = self.get_volume_covers(series_id)
|
||||||
|
if not covers:
|
||||||
return None
|
return None
|
||||||
return self._pick_cover_url(work.get("images")[0].get("image"))
|
return covers.get(_norm_vol(volume))
|
||||||
|
|
||||||
|
def get_cover_for_volume(self, series_id: str, volume) -> "str | None":
|
||||||
|
"""
|
||||||
|
Returns the best cover URL for a specific volume.
|
||||||
|
|
||||||
|
Tries the ``/images`` endpoint first (covers that exist even when no
|
||||||
|
physical work has been catalogued), then falls back to the ``/works``
|
||||||
|
endpoint. Returns None if neither source has a cover for the volume.
|
||||||
|
"""
|
||||||
|
# 1. /images endpoint (covers without works)
|
||||||
|
url = self.get_cover_for_volume_from_images(series_id, volume)
|
||||||
|
if url:
|
||||||
|
return url
|
||||||
|
|
||||||
|
# 2. /works endpoint fallback
|
||||||
|
work = self.get_work_for_volume(series_id, volume)
|
||||||
|
if not work or not work.get("images"):
|
||||||
|
return None
|
||||||
|
return _pick_image_url(work["images"][0].get("image"))
|
||||||
|
|
||||||
def get_page_counts(self, series_id: str) -> "dict[str, int]":
|
def get_page_counts(self, series_id: str) -> "dict[str, int]":
|
||||||
"""
|
"""
|
||||||
@@ -125,59 +281,9 @@ class MangaBakaWorksResolver:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def clear_cache(self) -> None:
|
def clear_cache(self) -> None:
|
||||||
"""Clears the internal works cache."""
|
"""Clears both the works cache and the images cover cache."""
|
||||||
self._cache.clear()
|
self._cache.clear()
|
||||||
|
self._images_cache.clear()
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Helpers
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
@staticmethod
|
|
||||||
def _pick_cover_url(cover) -> "str | None":
|
|
||||||
"""
|
|
||||||
Selects the best cover URL from a MangaBaka cover object.
|
|
||||||
|
|
||||||
Real API shape:
|
|
||||||
"raw": {"url": "...", "size": ..., "height": ..., "width": ...}
|
|
||||||
"x150": {"x1": "...", "x2": "...", "x3": "..."}
|
|
||||||
"x250": {...}
|
|
||||||
"x350": {...}
|
|
||||||
|
|
||||||
Order: raw original > x350@x3 > x250@x3 > x150@x3 ...
|
|
||||||
"""
|
|
||||||
if not cover:
|
|
||||||
return None
|
|
||||||
if isinstance(cover, str):
|
|
||||||
return cover
|
|
||||||
if not isinstance(cover, dict):
|
|
||||||
return None
|
|
||||||
|
|
||||||
raw = cover.get("raw")
|
|
||||||
if isinstance(raw, dict):
|
|
||||||
url = raw.get("url")
|
|
||||||
if isinstance(url, str) and url:
|
|
||||||
return url
|
|
||||||
elif isinstance(raw, str) and raw:
|
|
||||||
return raw
|
|
||||||
|
|
||||||
for size_key in ("x350", "x250", "x150"):
|
|
||||||
variant = cover.get(size_key)
|
|
||||||
if isinstance(variant, dict):
|
|
||||||
for density in ("x3", "x2", "x1"):
|
|
||||||
url = variant.get(density)
|
|
||||||
if isinstance(url, str) and url:
|
|
||||||
return url
|
|
||||||
elif isinstance(variant, str) and variant:
|
|
||||||
return variant
|
|
||||||
|
|
||||||
# Last-ditch: any HTTP URL anywhere in the structure
|
|
||||||
for val in cover.values():
|
|
||||||
if isinstance(val, str) and val.startswith("http"):
|
|
||||||
return val
|
|
||||||
if isinstance(val, dict):
|
|
||||||
for sub_val in val.values():
|
|
||||||
if isinstance(sub_val, str) and sub_val.startswith("http"):
|
|
||||||
return sub_val
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -1,355 +0,0 @@
|
|||||||
"""
|
|
||||||
matches_web_app.py
|
|
||||||
==================
|
|
||||||
|
|
||||||
Flask web UI for inspecting and editing the matches.json file produced by
|
|
||||||
MatchesCache.
|
|
||||||
|
|
||||||
Routes
|
|
||||||
------
|
|
||||||
GET / HTML table view (one row per cached match)
|
|
||||||
GET /api/matches JSON dump of the full cache
|
|
||||||
POST /api/matches Upsert / rename an entry
|
|
||||||
body: {originalTitle?, title, mangabakaId,
|
|
||||||
mangabakaName, imageUrl, firstMatchTime?}
|
|
||||||
POST /api/matches/delete Remove an entry body: {title}
|
|
||||||
POST /api/build Trigger a full re-scan via SuwayomiMover.build_matches_only
|
|
||||||
(only available if a mover is wired in)
|
|
||||||
|
|
||||||
The Title cell is rendered as a link to MangaBaka's search page, restricted
|
|
||||||
to the manga / manhwa / manhua types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import threading
|
|
||||||
from urllib.parse import quote_plus
|
|
||||||
|
|
||||||
from flask import Flask, jsonify, request, Response
|
|
||||||
|
|
||||||
from MatchesCache import MatchesCache
|
|
||||||
|
|
||||||
|
|
||||||
_INDEX_HTML = """<!doctype html>
|
|
||||||
<html lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
<title>MangaBaka matches</title>
|
|
||||||
<style>
|
|
||||||
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
|
||||||
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
|
||||||
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
|
|
||||||
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
|
|
||||||
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
|
|
||||||
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
|
|
||||||
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
|
|
||||||
table { border-collapse: collapse; width: 100%; }
|
|
||||||
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
|
|
||||||
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
|
|
||||||
tr:nth-child(even) td { background: #161616; }
|
|
||||||
td.image img { max-width: 90px; max-height: 130px; display:block; }
|
|
||||||
td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
|
|
||||||
td.title a { color: #60a5fa; text-decoration: none; }
|
|
||||||
td.title a:hover { text-decoration: underline; }
|
|
||||||
td.actions { white-space: nowrap; }
|
|
||||||
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
|
|
||||||
.dirty td { background: #1f2937 !important; }
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>MangaBaka matches</h1>
|
|
||||||
<div class="bar">
|
|
||||||
<input id="filter" type="search" placeholder="Filter by title…">
|
|
||||||
<button id="reload">Reload</button>
|
|
||||||
<button id="build" class="primary">Build all (rescan)</button>
|
|
||||||
<span class="status" id="status"></span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Title</th>
|
|
||||||
<th>mangabakaId</th>
|
|
||||||
<th>mangabakaName</th>
|
|
||||||
<th>firstMatchTime</th>
|
|
||||||
<th>Image</th>
|
|
||||||
<th></th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody id="rows"></tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
const TYPES = "&type=manhwa&type=manhua&type=manga";
|
|
||||||
|
|
||||||
function fmtTime(unix) {
|
|
||||||
if (!unix) return "";
|
|
||||||
const d = new Date(unix * 1000);
|
|
||||||
return d.toLocaleString();
|
|
||||||
}
|
|
||||||
|
|
||||||
function searchUrl(title) {
|
|
||||||
return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
|
|
||||||
}
|
|
||||||
|
|
||||||
function setStatus(msg) { document.getElementById("status").textContent = msg; }
|
|
||||||
|
|
||||||
function makeRow(title, e) {
|
|
||||||
const tr = document.createElement("tr");
|
|
||||||
tr.dataset.originalTitle = title;
|
|
||||||
|
|
||||||
const titleTd = document.createElement("td");
|
|
||||||
titleTd.className = "title";
|
|
||||||
const titleLink = document.createElement("a");
|
|
||||||
titleLink.href = searchUrl(title);
|
|
||||||
titleLink.target = "_blank";
|
|
||||||
titleLink.rel = "noopener";
|
|
||||||
titleLink.textContent = title;
|
|
||||||
const titleInput = document.createElement("input");
|
|
||||||
titleInput.value = title;
|
|
||||||
titleInput.style.marginTop = ".25rem";
|
|
||||||
titleInput.addEventListener("input", () => {
|
|
||||||
titleLink.textContent = titleInput.value;
|
|
||||||
titleLink.href = searchUrl(titleInput.value);
|
|
||||||
tr.classList.add("dirty");
|
|
||||||
});
|
|
||||||
titleTd.append(titleLink, titleInput);
|
|
||||||
tr.appendChild(titleTd);
|
|
||||||
|
|
||||||
function field(value) {
|
|
||||||
const td = document.createElement("td");
|
|
||||||
const inp = document.createElement("input");
|
|
||||||
inp.value = value || "";
|
|
||||||
inp.addEventListener("input", () => tr.classList.add("dirty"));
|
|
||||||
td.appendChild(inp);
|
|
||||||
return [td, inp];
|
|
||||||
}
|
|
||||||
|
|
||||||
const [idTd, idInp] = field(e.mangabakaId);
|
|
||||||
const [nameTd, nameInp] = field(e.mangabakaName);
|
|
||||||
const [urlTd, urlInp] = field(e.imageUrl);
|
|
||||||
tr.appendChild(idTd);
|
|
||||||
tr.appendChild(nameTd);
|
|
||||||
|
|
||||||
const timeTd = document.createElement("td");
|
|
||||||
timeTd.textContent = fmtTime(e.firstMatchTime);
|
|
||||||
tr.appendChild(timeTd);
|
|
||||||
|
|
||||||
const imgTd = document.createElement("td");
|
|
||||||
imgTd.className = "image";
|
|
||||||
const img = document.createElement("img");
|
|
||||||
img.src = e.imageUrl || "";
|
|
||||||
img.alt = "";
|
|
||||||
img.loading = "lazy";
|
|
||||||
urlInp.addEventListener("input", () => { img.src = urlInp.value; });
|
|
||||||
imgTd.append(img, urlInp);
|
|
||||||
tr.appendChild(imgTd);
|
|
||||||
|
|
||||||
const actTd = document.createElement("td");
|
|
||||||
actTd.className = "actions";
|
|
||||||
const save = document.createElement("button");
|
|
||||||
save.textContent = "Save";
|
|
||||||
save.className = "primary";
|
|
||||||
save.addEventListener("click", async () => {
|
|
||||||
save.disabled = true;
|
|
||||||
setStatus("Saving " + titleInput.value + "…");
|
|
||||||
const body = {
|
|
||||||
originalTitle: tr.dataset.originalTitle,
|
|
||||||
title: titleInput.value,
|
|
||||||
mangabakaId: idInp.value,
|
|
||||||
mangabakaName: nameInp.value,
|
|
||||||
imageUrl: urlInp.value,
|
|
||||||
};
|
|
||||||
try {
|
|
||||||
const r = await fetch("/api/matches", {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json" },
|
|
||||||
body: JSON.stringify(body),
|
|
||||||
});
|
|
||||||
if (!r.ok) throw new Error(await r.text());
|
|
||||||
tr.dataset.originalTitle = titleInput.value;
|
|
||||||
tr.classList.remove("dirty");
|
|
||||||
setStatus("Saved " + titleInput.value);
|
|
||||||
} catch (err) {
|
|
||||||
setStatus("Save failed: " + err.message);
|
|
||||||
} finally {
|
|
||||||
save.disabled = false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
const del = document.createElement("button");
|
|
||||||
del.textContent = "Delete";
|
|
||||||
del.className = "danger";
|
|
||||||
del.style.marginLeft = ".25rem";
|
|
||||||
del.addEventListener("click", async () => {
|
|
||||||
if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
|
|
||||||
setStatus("Deleting " + tr.dataset.originalTitle + "…");
|
|
||||||
try {
|
|
||||||
const r = await fetch("/api/matches/delete", {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json" },
|
|
||||||
body: JSON.stringify({ title: tr.dataset.originalTitle }),
|
|
||||||
});
|
|
||||||
if (!r.ok) throw new Error(await r.text());
|
|
||||||
tr.remove();
|
|
||||||
setStatus("Deleted");
|
|
||||||
} catch (err) {
|
|
||||||
setStatus("Delete failed: " + err.message);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
actTd.append(save, del);
|
|
||||||
tr.appendChild(actTd);
|
|
||||||
return tr;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function load() {
|
|
||||||
setStatus("Loading…");
|
|
||||||
const tbody = document.getElementById("rows");
|
|
||||||
tbody.innerHTML = "";
|
|
||||||
try {
|
|
||||||
const r = await fetch("/api/matches");
|
|
||||||
const data = await r.json();
|
|
||||||
const matches = data.matches || {};
|
|
||||||
const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
|
|
||||||
for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
|
|
||||||
setStatus(titles.length + " entries");
|
|
||||||
applyFilter();
|
|
||||||
} catch (err) {
|
|
||||||
setStatus("Load failed: " + err.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function applyFilter() {
|
|
||||||
const q = document.getElementById("filter").value.toLowerCase();
|
|
||||||
for (const tr of document.querySelectorAll("#rows tr")) {
|
|
||||||
const t = tr.dataset.originalTitle.toLowerCase();
|
|
||||||
tr.style.display = t.includes(q) ? "" : "none";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
document.getElementById("filter").addEventListener("input", applyFilter);
|
|
||||||
document.getElementById("reload").addEventListener("click", load);
|
|
||||||
document.getElementById("build").addEventListener("click", async () => {
|
|
||||||
if (!confirm("Run full scan? This may take several minutes.")) return;
|
|
||||||
setStatus("Building… (running on the server)");
|
|
||||||
try {
|
|
||||||
const r = await fetch("/api/build", { method: "POST" });
|
|
||||||
if (!r.ok) throw new Error(await r.text());
|
|
||||||
setStatus("Build finished");
|
|
||||||
load();
|
|
||||||
} catch (err) {
|
|
||||||
setStatus("Build failed: " + err.message);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
load();
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class MatchesWebApp:
|
|
||||||
"""
|
|
||||||
Flask app exposing the MatchesCache. `mover` is optional — if provided,
|
|
||||||
POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
|
|
||||||
thread.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, cache: MatchesCache, *,
|
|
||||||
mover=None,
|
|
||||||
host: str = "0.0.0.0",
|
|
||||||
port: int = 8080):
|
|
||||||
self._cache = cache
|
|
||||||
self._mover = mover
|
|
||||||
self._host = host
|
|
||||||
self._port = port
|
|
||||||
self._build_lock = threading.Lock()
|
|
||||||
self._app = Flask(__name__)
|
|
||||||
self._thread: "threading.Thread | None" = None
|
|
||||||
self._register_routes()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def app(self) -> Flask:
|
|
||||||
return self._app
|
|
||||||
|
|
||||||
def start(self) -> threading.Thread:
|
|
||||||
"""
|
|
||||||
Starts the Flask server on a background thread and returns it.
|
|
||||||
|
|
||||||
The thread is non-daemon so the process stays alive even when the
|
|
||||||
caller does not explicitly join() — important when this is the
|
|
||||||
only foreground task (e.g. watcher disabled for testing).
|
|
||||||
"""
|
|
||||||
if self._thread is not None and self._thread.is_alive():
|
|
||||||
return self._thread
|
|
||||||
self._thread = threading.Thread(
|
|
||||||
target=self._app.run,
|
|
||||||
kwargs={"host": self._host, "port": self._port,
|
|
||||||
"debug": False, "use_reloader": False,
|
|
||||||
"threaded": True},
|
|
||||||
name="MatchesWebApp",
|
|
||||||
daemon=False,
|
|
||||||
)
|
|
||||||
self._thread.start()
|
|
||||||
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
|
|
||||||
flush=True)
|
|
||||||
return self._thread
|
|
||||||
|
|
||||||
def wait(self) -> None:
|
|
||||||
"""Blocks until the Flask thread exits (or returns immediately if not started)."""
|
|
||||||
if self._thread is not None:
|
|
||||||
self._thread.join()
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Routes
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
def _register_routes(self) -> None:
|
|
||||||
app = self._app
|
|
||||||
cache = self._cache
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
def index() -> Response:
|
|
||||||
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
|
|
||||||
|
|
||||||
@app.get("/api/matches")
|
|
||||||
def api_list():
|
|
||||||
return jsonify(cache.all())
|
|
||||||
|
|
||||||
@app.post("/api/matches")
|
|
||||||
def api_upsert():
|
|
||||||
body = request.get_json(silent=True) or {}
|
|
||||||
title = (body.get("title") or "").strip()
|
|
||||||
if not title:
|
|
||||||
return Response("title is required", status=400)
|
|
||||||
original = (body.get("originalTitle") or "").strip() or title
|
|
||||||
if original != title:
|
|
||||||
cache.rename(original, title)
|
|
||||||
entry = cache.upsert(
|
|
||||||
title,
|
|
||||||
mangabaka_id=body.get("mangabakaId"),
|
|
||||||
mangabaka_name=body.get("mangabakaName"),
|
|
||||||
image_url=body.get("imageUrl"),
|
|
||||||
first_match_time=body.get("firstMatchTime"),
|
|
||||||
)
|
|
||||||
return jsonify({"title": title, "entry": entry})
|
|
||||||
|
|
||||||
@app.post("/api/matches/delete")
|
|
||||||
def api_delete():
|
|
||||||
body = request.get_json(silent=True) or {}
|
|
||||||
title = (body.get("title") or "").strip()
|
|
||||||
if not title:
|
|
||||||
return Response("title is required", status=400)
|
|
||||||
removed = cache.remove(title)
|
|
||||||
return jsonify({"removed": removed, "title": title})
|
|
||||||
|
|
||||||
@app.post("/api/build")
|
|
||||||
def api_build():
|
|
||||||
if self._mover is None:
|
|
||||||
return Response("no mover configured", status=503)
|
|
||||||
if not self._build_lock.acquire(blocking=False):
|
|
||||||
return Response("build already running", status=409)
|
|
||||||
try:
|
|
||||||
result = self._mover.build_matches_only()
|
|
||||||
finally:
|
|
||||||
self._build_lock.release()
|
|
||||||
return jsonify(result)
|
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
"""
|
||||||
|
text_utils.py
|
||||||
|
=============
|
||||||
|
|
||||||
|
Small text helpers shared across modules:
|
||||||
|
|
||||||
|
* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
|
||||||
|
breaks into compact HTML (used for Kavita summary / description fields,
|
||||||
|
which must not contain raw newlines).
|
||||||
|
* ``best_similarity`` — best difflib ratio between a query string and a
|
||||||
|
list of candidate strings (used for title / person-name matching).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
import re
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
|
||||||
|
def paragraphs_to_html(text: str) -> str:
|
||||||
|
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
parts: list[str] = []
|
||||||
|
for para in re.split(r"\n{2,}", text.strip()):
|
||||||
|
para = para.strip()
|
||||||
|
if para:
|
||||||
|
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def best_similarity(query: str, candidates: Iterable[str]) -> float:
|
||||||
|
"""
|
||||||
|
Returns the best case-insensitive difflib similarity ratio (0..1)
|
||||||
|
between `query` and any non-empty candidate.
|
||||||
|
"""
|
||||||
|
q = (query or "").lower()
|
||||||
|
best = 0.0
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate:
|
||||||
|
ratio = difflib.SequenceMatcher(
|
||||||
|
None, q, str(candidate).lower()).ratio()
|
||||||
|
best = max(best, ratio)
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def person_name_with_id(name: str, *,
|
||||||
|
mal_id: "int | None" = None,
|
||||||
|
al_id: "int | None" = None) -> str:
|
||||||
|
"""
|
||||||
|
Disambiguates a character name with its tracker id: "Rem (MAL 118737)".
|
||||||
|
|
||||||
|
Kavita Person records are global and keyed by name only, so two
|
||||||
|
different characters who share a name would collapse into one record.
|
||||||
|
Suffixing the tracker *character* id keeps them apart while still
|
||||||
|
sharing the record across the manga and light-novel version of the
|
||||||
|
same series (MAL/AniList character ids are per character, not per
|
||||||
|
medium). MAL is preferred; AniList ids get an "AL" marker so the two
|
||||||
|
id spaces cannot collide. Without any id the name is returned as-is.
|
||||||
|
|
||||||
|
The format must stay in sync with the manga project so both tools
|
||||||
|
address the same Kavita person records.
|
||||||
|
"""
|
||||||
|
name = (name or "").strip()
|
||||||
|
if not name:
|
||||||
|
return name
|
||||||
|
if mal_id:
|
||||||
|
return f"{name} (MAL {mal_id})"
|
||||||
|
if al_id:
|
||||||
|
return f"{name} (AL {al_id})"
|
||||||
|
return name
|
||||||
@@ -0,0 +1,313 @@
|
|||||||
|
"""
|
||||||
|
kavita_series_updater.py
|
||||||
|
========================
|
||||||
|
|
||||||
|
Diff-based update of a single Kavita series record from a
|
||||||
|
LightNovelMetadataBuilder output dict.
|
||||||
|
|
||||||
|
Behaviour
|
||||||
|
---------
|
||||||
|
* Locked fields in Kavita (``*Locked`` flags) are never touched, no matter
|
||||||
|
what MangaBaka returns.
|
||||||
|
* Scalar fields (summary, releaseYear, ageRating, publicationStatus,
|
||||||
|
language, score, sortName, localizedName) are overwritten when the
|
||||||
|
newly-built value differs from the value currently stored in Kavita.
|
||||||
|
* List fields (genres, tags, characters, writers, coverArtists,
|
||||||
|
publishers, imprints) are diff-merged: a name appearing in the new
|
||||||
|
set but not in the current one is added (id=0 so Kavita creates the
|
||||||
|
record); a name that is in Kavita but no longer in the new set is
|
||||||
|
dropped. Comparison is case-insensitive on the ``name`` field.
|
||||||
|
* Web links are stored as a comma-separated string in Kavita; this
|
||||||
|
updater treats them as a set and re-joins on write.
|
||||||
|
* Series-level cover image (URL different from last time) is re-uploaded
|
||||||
|
whenever ``coverImageLocked`` is False. The MangaBaka cover URL is
|
||||||
|
stamped onto matches.json as ``imageUrl`` so a subsequent run can skip
|
||||||
|
the upload when nothing changed.
|
||||||
|
|
||||||
|
Returns a small diff report ({field: 'changed'/'skipped'/'locked'}) per
|
||||||
|
series so the WebApp can surface what happened.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
|
|
||||||
|
|
||||||
|
# Maps Kavita "list" fields on SeriesMetadataDto to (lock_flag, item_key).
|
||||||
|
# `item_key` is the dict key Kavita uses for the display name on each item:
|
||||||
|
# GenreTagDto / TagDto use "title", PersonDto uses "name".
|
||||||
|
_LIST_FIELDS: list[tuple[str, str, str]] = [
|
||||||
|
("genres", "genresLocked", "title"),
|
||||||
|
("tags", "tagsLocked", "title"),
|
||||||
|
("characters", "characterLocked", "name"),
|
||||||
|
("writers", "writerLocked", "name"),
|
||||||
|
("coverArtists", "coverArtistLocked", "name"),
|
||||||
|
("publishers", "publisherLocked", "name"),
|
||||||
|
("imprints", "imprintLocked", "name"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _norm(name: str) -> str:
|
||||||
|
return (name or "").strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_list(
|
||||||
|
current: list[dict],
|
||||||
|
new_names: Iterable[str],
|
||||||
|
item_key: str,
|
||||||
|
) -> "tuple[list[dict], bool]":
|
||||||
|
"""
|
||||||
|
Diff-merges a Kavita list field with the canonical name list from
|
||||||
|
MangaBaka. Returns (merged_list, changed_flag).
|
||||||
|
|
||||||
|
`item_key` is the dict key Kavita uses for the display name on each
|
||||||
|
item ("title" for GenreTagDto/TagDto, "name" for PersonDto).
|
||||||
|
|
||||||
|
* Items in `current` whose display value appears in `new_names` are
|
||||||
|
kept verbatim so existing ids and ancillary fields survive.
|
||||||
|
* New names (no matching entry in `current`) are appended with
|
||||||
|
``{"id": 0, <item_key>: <name>}`` — Kavita creates the record on save.
|
||||||
|
* Items in `current` whose display value is *not* in `new_names` are
|
||||||
|
dropped.
|
||||||
|
"""
|
||||||
|
new_set = [n for n in new_names if n and n.strip()]
|
||||||
|
new_index = {_norm(n): n.strip() for n in new_set}
|
||||||
|
|
||||||
|
merged: list[dict] = []
|
||||||
|
kept_keys: set[str] = set()
|
||||||
|
for item in (current or []):
|
||||||
|
key = _norm(item.get(item_key))
|
||||||
|
if key in new_index:
|
||||||
|
merged.append(item)
|
||||||
|
kept_keys.add(key)
|
||||||
|
|
||||||
|
added = False
|
||||||
|
for key, display in new_index.items():
|
||||||
|
if key not in kept_keys:
|
||||||
|
merged.append({"id": 0, item_key: display})
|
||||||
|
added = True
|
||||||
|
|
||||||
|
removed = len(current or []) != len(kept_keys)
|
||||||
|
return merged, added or removed
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_web_links(value) -> list[str]:
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [str(v).strip() for v in value if v]
|
||||||
|
return [p.strip() for p in str(value).split(",") if p.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_web_links(current_str, new_links: list[str]) -> "tuple[str, bool]":
|
||||||
|
current = _parse_web_links(current_str)
|
||||||
|
new_norm = [l for l in new_links if l]
|
||||||
|
if not new_norm:
|
||||||
|
return ",".join(current), False
|
||||||
|
|
||||||
|
# Mirror MangaBaka's set: keep order from new_norm, then anything from
|
||||||
|
# current that's still in new_norm (already covered above). Anything
|
||||||
|
# in current that's not in new_norm is dropped.
|
||||||
|
new_set = set(new_norm)
|
||||||
|
merged = list(new_norm)
|
||||||
|
changed = sorted(new_set) != sorted(set(current))
|
||||||
|
return ",".join(merged), changed
|
||||||
|
|
||||||
|
|
||||||
|
class KavitaSeriesUpdater:
|
||||||
|
def __init__(self, client: KavitaClient):
|
||||||
|
self._client = client
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def update_series(self, series_id: int, built: dict, *,
|
||||||
|
previous_cover_url: "str | None" = None) -> dict:
|
||||||
|
"""
|
||||||
|
Applies the diff between Kavita's current state for `series_id`
|
||||||
|
and the freshly-built MangaBaka dict. Returns a per-field diff
|
||||||
|
report.
|
||||||
|
"""
|
||||||
|
series = self._client.get_series(series_id)
|
||||||
|
metadata = self._client.get_series_metadata(series_id)
|
||||||
|
report: dict = {}
|
||||||
|
|
||||||
|
meta_changed = self._diff_metadata(metadata, built, report)
|
||||||
|
if meta_changed:
|
||||||
|
self._client.update_series_metadata(metadata)
|
||||||
|
|
||||||
|
series_changed = self._diff_series(series, built, report)
|
||||||
|
if series_changed:
|
||||||
|
self._client.update_series(series)
|
||||||
|
|
||||||
|
# Cover: only re-upload when not locked AND URL actually changed.
|
||||||
|
new_cover = built.get("coverUrl")
|
||||||
|
if (new_cover
|
||||||
|
and not series.get("coverImageLocked")
|
||||||
|
and new_cover != previous_cover_url):
|
||||||
|
try:
|
||||||
|
self._client.upload_series_cover(series_id, new_cover)
|
||||||
|
report["coverImage"] = "changed"
|
||||||
|
except Exception as exc:
|
||||||
|
report["coverImage"] = f"error: {exc}"
|
||||||
|
elif series.get("coverImageLocked"):
|
||||||
|
report["coverImage"] = "locked"
|
||||||
|
else:
|
||||||
|
report["coverImage"] = "skipped"
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal: SeriesMetadataDto
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _diff_metadata(self, metadata: dict, built: dict,
|
||||||
|
report: dict) -> bool:
|
||||||
|
changed = False
|
||||||
|
|
||||||
|
# ----- Scalars ------------------------------------------------
|
||||||
|
# (built_key, metadata_key, locked_key, transform, skip_when_zero)
|
||||||
|
# `skip_when_zero` covers fields where 0 means "no data" rather
|
||||||
|
# than a real value (releaseYear, ageRating). publicationStatus 0
|
||||||
|
# is a valid "Ongoing" status — never skip it.
|
||||||
|
scalar_map = [
|
||||||
|
("summary", "summary", "summaryLocked", None, False),
|
||||||
|
("releaseYear", "releaseYear", "releaseYearLocked", int, True),
|
||||||
|
("ageRating", "ageRating", "ageRatingLocked", int, True),
|
||||||
|
("publicationStatus", "publicationStatus", "publicationStatusLocked", int, False),
|
||||||
|
("language", "language", "languageLocked", None, False),
|
||||||
|
]
|
||||||
|
for built_key, meta_key, locked_key, transform, skip_zero in scalar_map:
|
||||||
|
new_val = built.get(built_key)
|
||||||
|
if new_val is None or new_val == "":
|
||||||
|
report[meta_key] = "skipped"
|
||||||
|
continue
|
||||||
|
if transform is not None:
|
||||||
|
try:
|
||||||
|
new_val = transform(new_val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
report[meta_key] = "skipped"
|
||||||
|
continue
|
||||||
|
if skip_zero and new_val == 0:
|
||||||
|
report[meta_key] = "skipped"
|
||||||
|
continue
|
||||||
|
if metadata.get(locked_key):
|
||||||
|
report[meta_key] = "locked"
|
||||||
|
continue
|
||||||
|
if metadata.get(meta_key) != new_val:
|
||||||
|
metadata[meta_key] = new_val
|
||||||
|
changed = True
|
||||||
|
report[meta_key] = "changed"
|
||||||
|
else:
|
||||||
|
report[meta_key] = "unchanged"
|
||||||
|
|
||||||
|
# ----- Web links (single comma-separated string) ---------------
|
||||||
|
# SeriesMetadataDto has no dedicated lock for webLinks — always update.
|
||||||
|
web_str, web_changed = _merge_web_links(
|
||||||
|
metadata.get("webLinks"), built.get("webLinks") or [])
|
||||||
|
if web_changed:
|
||||||
|
metadata["webLinks"] = web_str
|
||||||
|
changed = True
|
||||||
|
report["webLinks"] = "changed"
|
||||||
|
else:
|
||||||
|
report["webLinks"] = "unchanged"
|
||||||
|
|
||||||
|
# ----- List fields --------------------------------------------
|
||||||
|
list_map = {
|
||||||
|
"genres": built.get("genres"),
|
||||||
|
"tags": built.get("tags"),
|
||||||
|
"characters": built.get("characters"),
|
||||||
|
"writers": built.get("writers"),
|
||||||
|
"coverArtists": built.get("coverArtists"),
|
||||||
|
"publishers": built.get("publishers"),
|
||||||
|
"imprints": [built["imprint"]] if built.get("imprint") else [],
|
||||||
|
}
|
||||||
|
for meta_key, locked_key, item_key in _LIST_FIELDS:
|
||||||
|
new_names = list_map.get(meta_key) or []
|
||||||
|
if metadata.get(locked_key):
|
||||||
|
report[meta_key] = "locked"
|
||||||
|
continue
|
||||||
|
if not new_names and not (metadata.get(meta_key) or []):
|
||||||
|
report[meta_key] = "unchanged"
|
||||||
|
continue
|
||||||
|
merged, list_changed = _merge_list(
|
||||||
|
metadata.get(meta_key) or [], new_names, item_key)
|
||||||
|
if list_changed:
|
||||||
|
metadata[meta_key] = merged
|
||||||
|
changed = True
|
||||||
|
report[meta_key] = "changed"
|
||||||
|
else:
|
||||||
|
report[meta_key] = "unchanged"
|
||||||
|
|
||||||
|
return changed
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal: SeriesDto (sortName, userRating, tracker ids)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _diff_series(self, series: dict, built: dict, report: dict) -> bool:
|
||||||
|
changed = False
|
||||||
|
|
||||||
|
# sortName / localizedName
|
||||||
|
if not series.get("sortNameLocked"):
|
||||||
|
new_sort = built.get("sortName") or ""
|
||||||
|
if new_sort and series.get("sortName") != new_sort:
|
||||||
|
series["sortName"] = new_sort
|
||||||
|
changed = True
|
||||||
|
report["sortName"] = "changed"
|
||||||
|
else:
|
||||||
|
report["sortName"] = "unchanged"
|
||||||
|
else:
|
||||||
|
report["sortName"] = "locked"
|
||||||
|
|
||||||
|
if not series.get("localizedNameLocked"):
|
||||||
|
new_loc = built.get("localizedName") or ""
|
||||||
|
if new_loc and series.get("localizedName") != new_loc:
|
||||||
|
series["localizedName"] = new_loc
|
||||||
|
changed = True
|
||||||
|
report["localizedName"] = "changed"
|
||||||
|
else:
|
||||||
|
report["localizedName"] = "unchanged"
|
||||||
|
else:
|
||||||
|
report["localizedName"] = "locked"
|
||||||
|
|
||||||
|
# Tracker ids — Kavita exposes malId, aniListId, mangaBakaId
|
||||||
|
for built_key, series_key in (
|
||||||
|
("malId", "malId"),
|
||||||
|
("anilistId", "aniListId"),
|
||||||
|
("mangabakaId", "mangaBakaId"),
|
||||||
|
):
|
||||||
|
new_val = built.get(built_key)
|
||||||
|
if new_val in (None, "", 0):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
new_int = int(new_val)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if int(series.get(series_key) or 0) != new_int:
|
||||||
|
series[series_key] = new_int
|
||||||
|
changed = True
|
||||||
|
report[series_key] = "changed"
|
||||||
|
|
||||||
|
# userRating from MangaBaka (0..5)
|
||||||
|
new_score = built.get("score")
|
||||||
|
if new_score is not None:
|
||||||
|
try:
|
||||||
|
new_score = float(new_score)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
new_score = None
|
||||||
|
if new_score is not None:
|
||||||
|
current_score = series.get("userRating")
|
||||||
|
try:
|
||||||
|
current_score = float(current_score) if current_score is not None else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
current_score = None
|
||||||
|
if current_score != new_score:
|
||||||
|
series["userRating"] = new_score
|
||||||
|
series["hasUserRated"] = True
|
||||||
|
changed = True
|
||||||
|
report["userRating"] = "changed"
|
||||||
|
else:
|
||||||
|
report["userRating"] = "unchanged"
|
||||||
|
|
||||||
|
return changed
|
||||||
@@ -0,0 +1,571 @@
|
|||||||
|
"""
|
||||||
|
light_novel_metadata_builder.py
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Fetches series-level metadata for a light novel from MangaBaka, enriches
|
||||||
|
it with MyAnimeList / AniList tracker statistics and character data, and
|
||||||
|
returns a structured dict ready to be diffed against Kavita's
|
||||||
|
SeriesMetadataDto.
|
||||||
|
|
||||||
|
Differences vs. the manga project's ComicInfoBuilder:
|
||||||
|
- No chapter / page handling — Kavita reads volumes from the files.
|
||||||
|
- No XML output — produces a plain dict.
|
||||||
|
- No MangaDex resolver — light novels don't have a chapter→volume
|
||||||
|
mapping problem.
|
||||||
|
- MangaBaka search type is fixed to ``novel`` so only light/web novels
|
||||||
|
are returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from MALResolver import MALResolver
|
||||||
|
from AniListResolver import AniListResolver
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from TextUtils import paragraphs_to_html, person_name_with_id
|
||||||
|
|
||||||
|
|
||||||
|
# MangaBaka series type for the search endpoint.
|
||||||
|
_SEARCH_TYPES = ["novel"]
|
||||||
|
|
||||||
|
# MangaBaka content_rating -> Kavita AgeRating enum
|
||||||
|
# Kavita AgeRating values (from openapi.json):
|
||||||
|
# 0=Unknown, 3=Everyone, 8=Teen, 10=Mature17Plus, 13=AdultsOnly
|
||||||
|
_AGE_RATING_MAP = {
|
||||||
|
"safe": 3, # Everyone
|
||||||
|
"suggestive": 8, # Teen
|
||||||
|
"erotica": 10, # Mature17Plus
|
||||||
|
"pornographic": 13, # AdultsOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
# MangaBaka status -> Kavita PublicationStatus enum
|
||||||
|
# Kavita PublicationStatus (from openapi.json):
|
||||||
|
# 0=OnGoing, 1=Hiatus, 2=Completed, 3=Cancelled, 4=Ended
|
||||||
|
_PUB_STATUS_MAP = {
|
||||||
|
"ongoing": 0,
|
||||||
|
"hiatus": 1,
|
||||||
|
"completed": 2,
|
||||||
|
"cancelled": 3,
|
||||||
|
"ended": 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
# External-tracker URL templates used to enrich the web-links list.
|
||||||
|
_TRACKER_URL_TEMPLATES = {
|
||||||
|
"anilist": "https://anilist.co/manga/{id}",
|
||||||
|
"myanimelist": "https://myanimelist.net/manga/{id}",
|
||||||
|
"mal": "https://myanimelist.net/manga/{id}",
|
||||||
|
"mangaupdates": "https://www.mangaupdates.com/series.html?id={id}",
|
||||||
|
"kitsu": "https://kitsu.app/manga/{id}",
|
||||||
|
"animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||||||
|
"ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||||||
|
"animeplanet": "https://www.anime-planet.com/manga/{id}",
|
||||||
|
"shikimori": "https://shikimori.one/mangas/{id}",
|
||||||
|
"bookwalker": "https://bookwalker.jp/{id}",
|
||||||
|
}
|
||||||
|
|
||||||
|
_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
def _normalise_key(key) -> str:
|
||||||
|
return re.sub(r"[^a-z0-9]", "", str(key).lower())
|
||||||
|
|
||||||
|
|
||||||
|
def _format_term(value: str) -> str:
|
||||||
|
return str(value).replace("_", " ").strip().title() if value else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _md_to_html(text: str) -> str:
|
||||||
|
"""Converts the subset of Markdown produced by MangaBaka to compact HTML."""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
text = _MD_ESCAPE_RE.sub(r'\1', text)
|
||||||
|
text = re.sub(
|
||||||
|
r'\[([^\]]+)\]\(([^)]+)\)',
|
||||||
|
lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
|
||||||
|
text,
|
||||||
|
)
|
||||||
|
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
|
||||||
|
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
|
||||||
|
return paragraphs_to_html(text)
|
||||||
|
|
||||||
|
|
||||||
|
def pick_cover_url(cover) -> "str | None":
|
||||||
|
"""Selects the best cover URL from a MangaBaka cover object."""
|
||||||
|
if not cover:
|
||||||
|
return None
|
||||||
|
if isinstance(cover, str):
|
||||||
|
return cover
|
||||||
|
if not isinstance(cover, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
raw = cover.get("raw")
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
url = raw.get("url")
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
return url
|
||||||
|
elif isinstance(raw, str) and raw:
|
||||||
|
return raw
|
||||||
|
|
||||||
|
for size_key in ("x350", "x250", "x150"):
|
||||||
|
variant = cover.get(size_key)
|
||||||
|
if isinstance(variant, dict):
|
||||||
|
for density in ("x3", "x2", "x1"):
|
||||||
|
url = variant.get(density)
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
return url
|
||||||
|
elif isinstance(variant, str) and variant:
|
||||||
|
return variant
|
||||||
|
|
||||||
|
for val in cover.values():
|
||||||
|
if isinstance(val, str) and val.startswith("http"):
|
||||||
|
return val
|
||||||
|
if isinstance(val, dict):
|
||||||
|
for sub in val.values():
|
||||||
|
if isinstance(sub, str) and sub.startswith("http"):
|
||||||
|
return sub
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def pick_thumbnail_url(cover) -> "str | None":
|
||||||
|
"""Picks a small cover variant suitable for a UI thumbnail."""
|
||||||
|
if not cover:
|
||||||
|
return None
|
||||||
|
if isinstance(cover, str):
|
||||||
|
return cover
|
||||||
|
if not isinstance(cover, dict):
|
||||||
|
return None
|
||||||
|
for size_key in ("x150", "x250", "x350"):
|
||||||
|
variant = cover.get(size_key)
|
||||||
|
if isinstance(variant, dict):
|
||||||
|
for density in ("x2", "x1", "x3"):
|
||||||
|
url = variant.get(density)
|
||||||
|
if isinstance(url, str) and url:
|
||||||
|
return url
|
||||||
|
elif isinstance(variant, str) and variant:
|
||||||
|
return variant
|
||||||
|
return pick_cover_url(cover)
|
||||||
|
|
||||||
|
|
||||||
|
def _id_from_source(md: dict, *names: str) -> "int | None":
|
||||||
|
target = {_normalise_key(n) for n in names}
|
||||||
|
for raw_key, info in (md.get("source") or {}).items():
|
||||||
|
if _normalise_key(raw_key) in target and isinstance(info, dict):
|
||||||
|
mid = info.get("id")
|
||||||
|
if mid is not None:
|
||||||
|
try:
|
||||||
|
return int(mid)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Builder
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
class LightNovelMetadataBuilder:
|
||||||
|
"""
|
||||||
|
Resolves a light-novel series on MangaBaka and produces a structured
|
||||||
|
metadata dict ready to be merged into Kavita.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *,
|
||||||
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
|
language: str = "en",
|
||||||
|
request_timeout: int = 30,
|
||||||
|
session: "requests.Session | None" = None,
|
||||||
|
mal_resolver: "MALResolver | None" = None,
|
||||||
|
al_resolver: "AniListResolver | None" = None,
|
||||||
|
matches_cache: "MatchesCache | None" = None):
|
||||||
|
self.api_base_url = api_base_url.rstrip("/")
|
||||||
|
self.language = language
|
||||||
|
self.request_timeout = request_timeout
|
||||||
|
|
||||||
|
self._session = session or requests.Session()
|
||||||
|
self._session.headers.setdefault("User-Agent",
|
||||||
|
"LightNovelMetadataBuilder/1.0")
|
||||||
|
_apply_mangabaka_rate_limit(self._session)
|
||||||
|
|
||||||
|
self._mal = mal_resolver or MALResolver(
|
||||||
|
request_timeout=request_timeout, search_type="lightnovel")
|
||||||
|
self._al = al_resolver or AniListResolver(
|
||||||
|
request_timeout=request_timeout, media_format="novel")
|
||||||
|
self._matches_cache = matches_cache
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# MangaBaka search / fetch
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def search_series(self, title: str) -> "dict | None":
|
||||||
|
"""Returns the top MangaBaka novel hit for `title`, or None."""
|
||||||
|
if not title or not title.strip():
|
||||||
|
return None
|
||||||
|
url = f"{self.api_base_url}/series/search"
|
||||||
|
try:
|
||||||
|
resp = self._session.get(
|
||||||
|
url, params={"q": title, "type": _SEARCH_TYPES,
|
||||||
|
"page": 1, "limit": 1},
|
||||||
|
timeout=self.request_timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except requests.RequestException:
|
||||||
|
return None
|
||||||
|
data = resp.json().get("data") or []
|
||||||
|
return data[0] if data else None
|
||||||
|
|
||||||
|
def fetch_series(self, series_id) -> "dict | None":
|
||||||
|
"""
|
||||||
|
Returns the full MangaBaka series dict for the given id, following
|
||||||
|
``merged_with`` redirects. A seen-set guards against merge cycles.
|
||||||
|
"""
|
||||||
|
if series_id is None or str(series_id).strip() == "":
|
||||||
|
return None
|
||||||
|
seen: set[str] = set()
|
||||||
|
current = series_id
|
||||||
|
while str(current) not in seen:
|
||||||
|
seen.add(str(current))
|
||||||
|
url = f"{self.api_base_url}/series/{current}"
|
||||||
|
resp = self._session.get(url, timeout=self.request_timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json().get("data")
|
||||||
|
if data and data.get("state") == "merged" and data.get("merged_with"):
|
||||||
|
current = data["merged_with"]
|
||||||
|
continue
|
||||||
|
return data
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Resolve title -> MangaBaka series (caches the match)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def resolve(self, title: str) -> "dict | None":
|
||||||
|
"""
|
||||||
|
Returns the MangaBaka series for `title`.
|
||||||
|
|
||||||
|
Lookup order:
|
||||||
|
1. MatchesCache (uses stored mangabakaId, skips the search).
|
||||||
|
2. Fresh MangaBaka search — top hit. Result is persisted to the
|
||||||
|
cache so it survives a crash.
|
||||||
|
"""
|
||||||
|
if self._matches_cache is not None:
|
||||||
|
cached = self._matches_cache.get(title)
|
||||||
|
if cached and cached.get("mangabakaId"):
|
||||||
|
try:
|
||||||
|
series = self.fetch_series(cached["mangabakaId"])
|
||||||
|
if series:
|
||||||
|
return series
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
series = self.search_series(title)
|
||||||
|
if series and self._matches_cache is not None:
|
||||||
|
self._matches_cache.upsert(
|
||||||
|
title,
|
||||||
|
mangabaka_id=series.get("id"),
|
||||||
|
mangabaka_name=series.get("title") or "",
|
||||||
|
image_url=pick_thumbnail_url(series.get("cover")),
|
||||||
|
)
|
||||||
|
return series
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Main entry point
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def build(self, *, title: str = "",
|
||||||
|
mangabaka_id=None) -> "dict | None":
|
||||||
|
"""
|
||||||
|
Fetches and enriches metadata for one series, returning the
|
||||||
|
normalised dict described in the module docstring.
|
||||||
|
|
||||||
|
Pass either `title` (will resolve via cache/search) or
|
||||||
|
`mangabaka_id` (direct fetch).
|
||||||
|
"""
|
||||||
|
if mangabaka_id is not None and str(mangabaka_id).strip():
|
||||||
|
md = self.fetch_series(mangabaka_id)
|
||||||
|
else:
|
||||||
|
md = self.resolve(title)
|
||||||
|
if not md:
|
||||||
|
return None
|
||||||
|
return self._assemble(md)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal: assemble the result dict
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _assemble(self, md: dict) -> dict:
|
||||||
|
mal_id = _id_from_source(md, "myanimelist", "mal")
|
||||||
|
al_id = _id_from_source(md, "anilist")
|
||||||
|
|
||||||
|
# Fall back to a title-based MAL lookup when the source map does
|
||||||
|
# not carry an id — Jikan is the only tracker that ships staff
|
||||||
|
# data we can use to enrich author / artist person records.
|
||||||
|
if mal_id is None:
|
||||||
|
mal_id = self._mal.find_mal_id(md.get("title") or "")
|
||||||
|
|
||||||
|
mal_stats = self._mal.get_stats(mal_id) if mal_id else None
|
||||||
|
|
||||||
|
characters_detailed = self._mal.get_characters_detailed(mal_id) if mal_id else []
|
||||||
|
if not characters_detailed and al_id:
|
||||||
|
characters_detailed = self._al.get_characters_detailed(al_id)
|
||||||
|
|
||||||
|
staff_detailed = self._mal.get_staff_detailed(mal_id) if mal_id else []
|
||||||
|
if not staff_detailed and al_id:
|
||||||
|
staff_detailed = self._al.get_staff_detailed(al_id)
|
||||||
|
|
||||||
|
# Character names for SeriesMetadata, disambiguated with the
|
||||||
|
# tracker character id ("Rem (MAL 118737)") because Kavita person
|
||||||
|
# records are global and keyed by name only.
|
||||||
|
character_names = [
|
||||||
|
person_name_with_id(c["name"],
|
||||||
|
mal_id=c.get("mal_id"),
|
||||||
|
al_id=c.get("al_id"))
|
||||||
|
for c in characters_detailed if c.get("name")
|
||||||
|
]
|
||||||
|
# Writers come from MangaBaka first (authoritative for novels)
|
||||||
|
writers = list(md.get("authors") or [])
|
||||||
|
# Illustrators / artists -> CoverArtists (Kavita has no dedicated
|
||||||
|
# illustrator field, and Pencillers is the wrong semantic for
|
||||||
|
# text-only novels).
|
||||||
|
cover_artists = list(md.get("artists") or [])
|
||||||
|
|
||||||
|
# Publisher: prefer English licence, else original. When both
|
||||||
|
# exist, the original publisher becomes the imprint.
|
||||||
|
english_pubs = self._publishers_by_type(md, "English")
|
||||||
|
original_pubs = self._publishers_by_type(md, "Original")
|
||||||
|
publishers = english_pubs or original_pubs
|
||||||
|
imprint = original_pubs[0] if english_pubs and original_pubs else None
|
||||||
|
|
||||||
|
# Release year
|
||||||
|
release_year = None
|
||||||
|
try:
|
||||||
|
if md.get("year") is not None:
|
||||||
|
release_year = int(md["year"])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Score: MangaBaka rating is 0..100 -> Kavita userRating is 0..5
|
||||||
|
score = None
|
||||||
|
if md.get("rating") is not None:
|
||||||
|
try:
|
||||||
|
score = round(float(md["rating"]) / 20.0, 1)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Tags / genres come back as snake_case slugs.
|
||||||
|
genres = [_format_term(g) for g in (md.get("genres") or []) if g]
|
||||||
|
tags = [_format_term(t) for t in (md.get("tags") or []) if t]
|
||||||
|
|
||||||
|
# Web links
|
||||||
|
web_links = self._collect_web_links(md)
|
||||||
|
|
||||||
|
# Summary HTML
|
||||||
|
summary = self._build_summary(md, mal_stats)
|
||||||
|
|
||||||
|
# Cover URL
|
||||||
|
cover_url = pick_cover_url(md.get("cover"))
|
||||||
|
|
||||||
|
# Title variants
|
||||||
|
all_alt = self._collect_all_alt_titles(md)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"mangabakaId": str(md.get("id") or ""),
|
||||||
|
"mangabakaTitle": md.get("title") or "",
|
||||||
|
"originalName": md.get("native_title") or "",
|
||||||
|
"localizedName": md.get("romanized_title") or "",
|
||||||
|
"sortName": self._sort_title(md),
|
||||||
|
"altTitles": all_alt,
|
||||||
|
"summary": summary,
|
||||||
|
"genres": genres,
|
||||||
|
"tags": tags,
|
||||||
|
"characters": character_names,
|
||||||
|
"writers": writers,
|
||||||
|
"coverArtists": cover_artists,
|
||||||
|
"publishers": publishers,
|
||||||
|
"imprint": imprint,
|
||||||
|
"releaseYear": release_year,
|
||||||
|
"ageRating": _AGE_RATING_MAP.get(md.get("content_rating"), 0),
|
||||||
|
"publicationStatus": _PUB_STATUS_MAP.get(
|
||||||
|
(md.get("status") or "").lower(), 0),
|
||||||
|
"language": self.language,
|
||||||
|
"webLinks": web_links,
|
||||||
|
"score": score,
|
||||||
|
"coverUrl": cover_url,
|
||||||
|
"malId": mal_id,
|
||||||
|
"anilistId": al_id,
|
||||||
|
"relationships": list(md.get("relationships_v2") or []),
|
||||||
|
"charactersDetailed": characters_detailed,
|
||||||
|
"staffDetailed": staff_detailed,
|
||||||
|
"raw": md,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def _publishers_by_type(md: dict, ptype: str) -> list[str]:
|
||||||
|
return [p.get("name") for p in (md.get("publishers") or [])
|
||||||
|
if p.get("type") == ptype and p.get("name")]
|
||||||
|
|
||||||
|
def _sort_title(self, md: dict) -> str:
|
||||||
|
lang = self.language.lower()
|
||||||
|
alts = self._collect_alt_titles(md)
|
||||||
|
return alts.get(lang) or md.get("title") or ""
|
||||||
|
|
||||||
|
def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
|
||||||
|
"""Returns one best title per language code (en/de/jp/romaji)."""
|
||||||
|
titles = md.get("titles") or md.get("alt_titles") or []
|
||||||
|
|
||||||
|
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
||||||
|
) -> "str | None":
|
||||||
|
best_score = -1
|
||||||
|
best_title: "str | None" = None
|
||||||
|
for entry in titles:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||||
|
if lang not in language_codes:
|
||||||
|
continue
|
||||||
|
title = entry.get("title")
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
traits = entry.get("traits") or []
|
||||||
|
score = 0
|
||||||
|
if prefer_trait and prefer_trait in traits:
|
||||||
|
score += 4
|
||||||
|
if "official" in traits:
|
||||||
|
score += 2
|
||||||
|
if entry.get("is_primary"):
|
||||||
|
score += 1
|
||||||
|
if score > best_score:
|
||||||
|
best_score, best_title = score, title
|
||||||
|
return best_title
|
||||||
|
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
|
||||||
|
if kanji:
|
||||||
|
result["jp"] = kanji
|
||||||
|
romaji = pick(("ja-latn", "ja-romaji"))
|
||||||
|
if not romaji:
|
||||||
|
rt = md.get("romanized_title") or ""
|
||||||
|
if rt and all(ord(c) < 128 for c in rt):
|
||||||
|
romaji = rt
|
||||||
|
if romaji:
|
||||||
|
result["romaji"] = romaji
|
||||||
|
en = pick(("en",)) or md.get("title")
|
||||||
|
if en:
|
||||||
|
result["en"] = en
|
||||||
|
de = pick(("de",))
|
||||||
|
if de:
|
||||||
|
result["de"] = de
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _collect_all_alt_titles(md: dict) -> "dict[str, list[str]]":
|
||||||
|
_GROUPS = {
|
||||||
|
"en": ("en",),
|
||||||
|
"de": ("de",),
|
||||||
|
"ja": ("ja",),
|
||||||
|
"ja-romaji": ("ja-latn", "ja-romaji"),
|
||||||
|
"ko": ("ko",),
|
||||||
|
"ko-romaji": ("ko-latn", "ko-romaji"),
|
||||||
|
"zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
|
||||||
|
"zh-romaji": ("zh-latn",),
|
||||||
|
}
|
||||||
|
lang_to_group = {l: g for g, ls in _GROUPS.items() for l in ls}
|
||||||
|
result: dict[str, list[str]] = {}
|
||||||
|
seen: dict[str, set] = {}
|
||||||
|
for entry in (md.get("titles") or md.get("alt_titles") or []):
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||||
|
group = lang_to_group.get(lang)
|
||||||
|
if not group:
|
||||||
|
continue
|
||||||
|
title = (entry.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
result.setdefault(group, [])
|
||||||
|
seen.setdefault(group, set())
|
||||||
|
if title not in seen[group]:
|
||||||
|
result[group].append(title)
|
||||||
|
seen[group].add(title)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _collect_web_links(self, md: dict) -> list[str]:
|
||||||
|
links: list[str] = [l for l in (md.get("links") or []) if l]
|
||||||
|
for raw_key, info in (md.get("source") or {}).items():
|
||||||
|
template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
|
||||||
|
if not template or not isinstance(info, dict):
|
||||||
|
continue
|
||||||
|
source_id = info.get("id")
|
||||||
|
if source_id is not None:
|
||||||
|
links.append(template.format(id=source_id))
|
||||||
|
seen: set[str] = set()
|
||||||
|
unique: list[str] = []
|
||||||
|
for link in links:
|
||||||
|
if link not in seen:
|
||||||
|
seen.add(link)
|
||||||
|
unique.append(link)
|
||||||
|
return unique
|
||||||
|
|
||||||
|
def _build_summary(self, md: dict,
|
||||||
|
mal_stats: "dict | None") -> str:
|
||||||
|
"""Builds the HTML summary with stats table + description + alt titles."""
|
||||||
|
_TD = 'style="padding-right:1.5em"'
|
||||||
|
parts: list[str] = []
|
||||||
|
|
||||||
|
if mal_stats:
|
||||||
|
url = mal_stats.get("url", "")
|
||||||
|
as_of = mal_stats.get("as_of", "")
|
||||||
|
rows: list[str] = []
|
||||||
|
for label, key, fmt in (
|
||||||
|
("Score", "score", "{}"),
|
||||||
|
("Ranked", "rank", "#{}"),
|
||||||
|
("Scored by", "scored_by", "{:,} users"),
|
||||||
|
("Popularity","popularity", "#{}"),
|
||||||
|
("Members", "members", "{:,}"),
|
||||||
|
("Favorites", "favorites", "{:,}"),
|
||||||
|
):
|
||||||
|
v = mal_stats.get(key)
|
||||||
|
if v is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
formatted = fmt.format(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
formatted = str(v)
|
||||||
|
rows.append(f"<tr><td {_TD}>{label}</td><td>{formatted}</td></tr>")
|
||||||
|
if rows:
|
||||||
|
link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
|
||||||
|
parts.append(f"<p>{link} stats as of {as_of}:</p>"
|
||||||
|
f"<table>{''.join(rows)}</table>")
|
||||||
|
|
||||||
|
desc_raw = (md.get("description") or "").strip()
|
||||||
|
if desc_raw:
|
||||||
|
parts.append(_md_to_html(desc_raw))
|
||||||
|
|
||||||
|
all_alt = self._collect_all_alt_titles(md)
|
||||||
|
if all_alt:
|
||||||
|
label_map = {
|
||||||
|
"en": "EN",
|
||||||
|
"de": "DE",
|
||||||
|
"ja": "JA",
|
||||||
|
"ja-romaji": "JA Romaji",
|
||||||
|
"ko": "KO",
|
||||||
|
"ko-romaji": "KO Romaji",
|
||||||
|
"zh": "ZH",
|
||||||
|
"zh-romaji": "ZH Romaji",
|
||||||
|
}
|
||||||
|
alt_rows: list[str] = []
|
||||||
|
for group in ("en", "de", "ja", "ja-romaji",
|
||||||
|
"ko", "ko-romaji", "zh", "zh-romaji"):
|
||||||
|
titles = all_alt.get(group)
|
||||||
|
if not titles:
|
||||||
|
continue
|
||||||
|
cell = "<br>".join(titles)
|
||||||
|
alt_rows.append(
|
||||||
|
f"<tr><td {_TD}>{label_map[group]}</td><td>{cell}</td></tr>")
|
||||||
|
if alt_rows:
|
||||||
|
parts.append(f"<table>{''.join(alt_rows)}</table>")
|
||||||
|
|
||||||
|
return "<br>".join(parts)
|
||||||
@@ -0,0 +1,260 @@
|
|||||||
|
"""
|
||||||
|
light_novel_orchestrator.py
|
||||||
|
===========================
|
||||||
|
|
||||||
|
High-level workflow on top of the resolvers, the Kavita client and the
|
||||||
|
diff-based updaters. Exposes three operations to the WebApp:
|
||||||
|
|
||||||
|
- build_matches(library_ids):
|
||||||
|
Scan one or more Kavita libraries, resolve every series against
|
||||||
|
MangaBaka and persist the match in matches.json.
|
||||||
|
- update_series(kavita_series_id):
|
||||||
|
Re-fetch MangaBaka, MAL and AniList data for a single Kavita
|
||||||
|
series and apply the diff (metadata + persons + relationships).
|
||||||
|
- update_all(library_ids):
|
||||||
|
Run update_series for every series that has a match in the
|
||||||
|
cache and lives in the given libraries.
|
||||||
|
|
||||||
|
A single shared HTTP session (rate-limited for MangaBaka) and shared
|
||||||
|
resolver singletons are used across the whole run to maximise cache
|
||||||
|
hits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from MALResolver import MALResolver
|
||||||
|
from AniListResolver import AniListResolver
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
|
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||||
|
from KavitaSeriesUpdater import KavitaSeriesUpdater
|
||||||
|
from LightNovelMetadataBuilder import (
|
||||||
|
LightNovelMetadataBuilder,
|
||||||
|
pick_thumbnail_url,
|
||||||
|
)
|
||||||
|
from RelationshipSync import RelationshipSync
|
||||||
|
|
||||||
|
|
||||||
|
class LightNovelOrchestrator:
|
||||||
|
def __init__(self, *,
|
||||||
|
kavita_url: str,
|
||||||
|
kavita_api_key: str,
|
||||||
|
matches_cache: MatchesCache,
|
||||||
|
language: str = "en",
|
||||||
|
request_timeout: int = 30,
|
||||||
|
api_base_url: str = "https://api.mangabaka.dev/v1"):
|
||||||
|
self._cache = matches_cache
|
||||||
|
self._timeout = request_timeout
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.setdefault("User-Agent",
|
||||||
|
"KavitaLightNovelOrchestrator/1.0")
|
||||||
|
_apply_mangabaka_rate_limit(session)
|
||||||
|
self._session = session
|
||||||
|
|
||||||
|
# First construction in the LN container — pins the singletons to
|
||||||
|
# light-novel search mode (manga container uses the defaults).
|
||||||
|
self._mal = MALResolver(request_timeout=request_timeout,
|
||||||
|
search_type="lightnovel")
|
||||||
|
self._al = AniListResolver(request_timeout=request_timeout,
|
||||||
|
media_format="novel")
|
||||||
|
|
||||||
|
self._client = KavitaClient(kavita_url, kavita_api_key,
|
||||||
|
request_timeout=request_timeout)
|
||||||
|
self._builder = LightNovelMetadataBuilder(
|
||||||
|
api_base_url=api_base_url,
|
||||||
|
language=language,
|
||||||
|
request_timeout=request_timeout,
|
||||||
|
session=session,
|
||||||
|
mal_resolver=self._mal,
|
||||||
|
al_resolver=self._al,
|
||||||
|
matches_cache=matches_cache,
|
||||||
|
)
|
||||||
|
self._series_updater = KavitaSeriesUpdater(self._client)
|
||||||
|
self._person_updater = KavitaPersonUpdater(
|
||||||
|
self._client,
|
||||||
|
mal_resolver=self._mal,
|
||||||
|
al_resolver=self._al,
|
||||||
|
)
|
||||||
|
self._relation_sync = RelationshipSync(
|
||||||
|
self._client, matches_cache, builder=self._builder)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Library listings
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def list_libraries(self) -> list[dict]:
|
||||||
|
return self._client.list_libraries()
|
||||||
|
|
||||||
|
def list_series_in_libraries(self, library_ids: list[int]) -> list[dict]:
|
||||||
|
result: list[dict] = []
|
||||||
|
for lib_id in library_ids:
|
||||||
|
try:
|
||||||
|
result.extend(self._client.list_series_in_library(int(lib_id)))
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[orchestrator] library {lib_id} list failed: {exc}",
|
||||||
|
flush=True)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Matching
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def build_matches(self, library_ids: list[int]) -> dict:
|
||||||
|
"""
|
||||||
|
Resolves every series in the given libraries against MangaBaka.
|
||||||
|
|
||||||
|
Series already present in matches.json keep their stored
|
||||||
|
mangabakaId; the kavitaSeriesId + libraryId fields are refreshed
|
||||||
|
in case the user moved a series between libraries.
|
||||||
|
"""
|
||||||
|
stats = {"checked": 0, "matched": 0, "skipped": 0, "missing": 0}
|
||||||
|
for series in self.list_series_in_libraries(library_ids):
|
||||||
|
title = (series.get("name") or "").strip()
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
stats["checked"] += 1
|
||||||
|
kavita_id = int(series.get("id") or 0)
|
||||||
|
library_id = int(series.get("libraryId") or 0)
|
||||||
|
|
||||||
|
cached = self._cache.get(title)
|
||||||
|
if cached and cached.get("mangabakaId"):
|
||||||
|
self._cache.upsert(
|
||||||
|
title,
|
||||||
|
kavita_series_id=kavita_id,
|
||||||
|
library_id=library_id,
|
||||||
|
)
|
||||||
|
stats["skipped"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
mb_series = self._builder.search_series(title)
|
||||||
|
if not mb_series:
|
||||||
|
self._cache.upsert(
|
||||||
|
title,
|
||||||
|
kavita_series_id=kavita_id,
|
||||||
|
library_id=library_id,
|
||||||
|
)
|
||||||
|
stats["missing"] += 1
|
||||||
|
print(f"[match] {title!r}: no MangaBaka hit", flush=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._cache.upsert(
|
||||||
|
title,
|
||||||
|
mangabaka_id=mb_series.get("id"),
|
||||||
|
mangabaka_name=mb_series.get("title") or "",
|
||||||
|
image_url=pick_thumbnail_url(mb_series.get("cover")),
|
||||||
|
kavita_series_id=kavita_id,
|
||||||
|
library_id=library_id,
|
||||||
|
)
|
||||||
|
stats["matched"] += 1
|
||||||
|
print(f"[match] {title!r} -> {mb_series.get('title')!r} "
|
||||||
|
f"(id={mb_series.get('id')})", flush=True)
|
||||||
|
return stats
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Updating
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def update_series(self, kavita_series_id: int) -> dict:
|
||||||
|
"""Runs a full metadata update for a single Kavita series."""
|
||||||
|
hit = self._cache.get_by_kavita_id(int(kavita_series_id))
|
||||||
|
if not hit:
|
||||||
|
# Try to resolve via the Kavita series name on the fly.
|
||||||
|
series = self._client.get_series(int(kavita_series_id))
|
||||||
|
title = (series.get("name") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return {"ok": False, "error": "series not in matches.json"}
|
||||||
|
built = self._builder.build(title=title)
|
||||||
|
if not built:
|
||||||
|
return {"ok": False, "error": "no MangaBaka match"}
|
||||||
|
self._cache.upsert(
|
||||||
|
title,
|
||||||
|
mangabaka_id=built.get("mangabakaId"),
|
||||||
|
mangabaka_name=built.get("mangabakaTitle"),
|
||||||
|
image_url=built.get("coverUrl"),
|
||||||
|
kavita_series_id=int(kavita_series_id),
|
||||||
|
library_id=int(series.get("libraryId") or 0),
|
||||||
|
)
|
||||||
|
cached_title = title
|
||||||
|
cached_entry = self._cache.get(title) or {}
|
||||||
|
else:
|
||||||
|
cached_title, cached_entry = hit
|
||||||
|
built = self._builder.build(mangabaka_id=cached_entry.get("mangabakaId"))
|
||||||
|
if not built:
|
||||||
|
return {"ok": False, "error": "mangabaka id no longer resolvable"}
|
||||||
|
|
||||||
|
prev_cover = cached_entry.get("imageUrl") or ""
|
||||||
|
try:
|
||||||
|
series_report = self._series_updater.update_series(
|
||||||
|
int(kavita_series_id), built,
|
||||||
|
previous_cover_url=prev_cover,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return {"ok": False, "error": f"series update failed: {exc}"}
|
||||||
|
|
||||||
|
# Persons
|
||||||
|
try:
|
||||||
|
person_report = self._person_updater.update_for_manga(
|
||||||
|
built.get("malId"),
|
||||||
|
al_manga_id=built.get("anilistId"),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
person_report = {"error": str(exc)}
|
||||||
|
|
||||||
|
# Relationships + collection
|
||||||
|
try:
|
||||||
|
relation_report = self._relation_sync.sync(
|
||||||
|
int(kavita_series_id), built)
|
||||||
|
except Exception as exc:
|
||||||
|
relation_report = {"error": str(exc)}
|
||||||
|
|
||||||
|
# Stamp the new cover URL on the cache so the next run knows when
|
||||||
|
# to re-upload.
|
||||||
|
self._cache.upsert(
|
||||||
|
cached_title,
|
||||||
|
image_url=built.get("coverUrl") or prev_cover,
|
||||||
|
)
|
||||||
|
self._cache.mark_updated(cached_title)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"title": cached_title,
|
||||||
|
"mangabakaId": built.get("mangabakaId"),
|
||||||
|
"series": series_report,
|
||||||
|
"persons": person_report,
|
||||||
|
"relationships": relation_report,
|
||||||
|
}
|
||||||
|
|
||||||
|
def update_all(self, library_ids: "list[int] | None") -> dict:
|
||||||
|
"""Updates every cached series in the given libraries."""
|
||||||
|
if library_ids is None:
|
||||||
|
entries = self._cache.all()["matches"]
|
||||||
|
else:
|
||||||
|
entries = self._cache.all_in_libraries(library_ids)["matches"]
|
||||||
|
|
||||||
|
results: list[dict] = []
|
||||||
|
ok = fail = 0
|
||||||
|
for title, entry in entries.items():
|
||||||
|
ksid = int(entry.get("kavitaSeriesId") or 0)
|
||||||
|
if not ksid or not entry.get("mangabakaId"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
res = self.update_series(ksid)
|
||||||
|
except Exception as exc:
|
||||||
|
res = {"ok": False, "error": str(exc)}
|
||||||
|
res["title"] = title
|
||||||
|
results.append(res)
|
||||||
|
if res.get("ok"):
|
||||||
|
ok += 1
|
||||||
|
else:
|
||||||
|
fail += 1
|
||||||
|
print(f"[update] {title!r}: "
|
||||||
|
f"{'ok' if res.get('ok') else 'FAIL ' + str(res.get('error'))}",
|
||||||
|
flush=True)
|
||||||
|
return {"ok": ok, "failed": fail, "results": results}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Direct helpers exposed to the WebApp
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def fetch_series(self, mangabaka_id) -> "dict | None":
|
||||||
|
return self._builder.fetch_series(mangabaka_id)
|
||||||
@@ -2,26 +2,30 @@
|
|||||||
matches_cache.py
|
matches_cache.py
|
||||||
================
|
================
|
||||||
|
|
||||||
Persistent JSON cache that maps a Suwayomi/series search title to the
|
Persistent JSON cache that maps a Kavita series title to the MangaBaka
|
||||||
MangaBaka series it was matched against.
|
series it was matched against, plus enough context to update the right
|
||||||
|
Kavita record later.
|
||||||
|
|
||||||
Structure on disk::
|
Structure on disk::
|
||||||
|
|
||||||
{
|
{
|
||||||
"matches": {
|
"matches": {
|
||||||
"<search title>": {
|
"<kavita series name>": {
|
||||||
"mangabakaId": "12345",
|
"mangabakaId": "12345",
|
||||||
"mangabakaName": "One-Punch Man",
|
"mangabakaName": "Re:Zero",
|
||||||
"imageUrl": "https://.../cover.jpg",
|
"imageUrl": "https://.../cover.jpg",
|
||||||
"firstMatchTime": 1700000000
|
"kavitaSeriesId": 42,
|
||||||
|
"libraryId": 3,
|
||||||
|
"firstMatchTime": 1700000000,
|
||||||
|
"lastUpdateTime": 1700100000
|
||||||
},
|
},
|
||||||
...
|
...
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
|
The cache is the source of truth for the WebUI's matches table and is
|
||||||
search request, and is written back to disk on every mutation so a crash
|
written back on every mutation so a crash mid-batch does not lose
|
||||||
does not lose matches that were resolved in the current run.
|
matches that were resolved in the current run.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -32,6 +36,14 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _set_int(entry: dict, key: str, value) -> None:
|
||||||
|
"""Sets entry[key] = int(value); ignores values that don't coerce."""
|
||||||
|
try:
|
||||||
|
entry[key] = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class MatchesCache:
|
class MatchesCache:
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self._path = Path(path)
|
self._path = Path(path)
|
||||||
@@ -47,26 +59,35 @@ class MatchesCache:
|
|||||||
entry = self._data["matches"].get(title)
|
entry = self._data["matches"].get(title)
|
||||||
return dict(entry) if entry else None
|
return dict(entry) if entry else None
|
||||||
|
|
||||||
def add(self, title: str, *,
|
def get_by_kavita_id(self, kavita_series_id: int) -> "tuple[str, dict] | None":
|
||||||
mangabaka_id,
|
|
||||||
mangabaka_name: str,
|
|
||||||
image_url: "str | None") -> dict:
|
|
||||||
entry = {
|
|
||||||
"mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "",
|
|
||||||
"mangabakaName": mangabaka_name or "",
|
|
||||||
"imageUrl": image_url or "",
|
|
||||||
"firstMatchTime": int(time.time()),
|
|
||||||
}
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._data["matches"][title] = entry
|
for title, entry in self._data["matches"].items():
|
||||||
self._save_unlocked()
|
if entry.get("kavitaSeriesId") == kavita_series_id:
|
||||||
return dict(entry)
|
return title, dict(entry)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_by_mangabaka_id(self, mangabaka_id) -> "tuple[str, dict] | None":
|
||||||
|
target = str(mangabaka_id) if mangabaka_id is not None else ""
|
||||||
|
if not target:
|
||||||
|
return None
|
||||||
|
with self._lock:
|
||||||
|
for title, entry in self._data["matches"].items():
|
||||||
|
if str(entry.get("mangabakaId") or "") == target:
|
||||||
|
return title, dict(entry)
|
||||||
|
return None
|
||||||
|
|
||||||
def upsert(self, title: str, *,
|
def upsert(self, title: str, *,
|
||||||
mangabaka_id=None,
|
mangabaka_id=None,
|
||||||
mangabaka_name=None,
|
mangabaka_name=None,
|
||||||
image_url=None,
|
image_url=None,
|
||||||
first_match_time=None) -> dict:
|
kavita_series_id=None,
|
||||||
|
library_id=None,
|
||||||
|
first_match_time=None,
|
||||||
|
last_update_time=None) -> dict:
|
||||||
|
"""
|
||||||
|
Inserts or updates an entry. Only fields passed explicitly are
|
||||||
|
modified; the rest are preserved.
|
||||||
|
"""
|
||||||
with self._lock:
|
with self._lock:
|
||||||
entry = self._data["matches"].get(title)
|
entry = self._data["matches"].get(title)
|
||||||
if entry is None:
|
if entry is None:
|
||||||
@@ -74,7 +95,10 @@ class MatchesCache:
|
|||||||
"mangabakaId": "",
|
"mangabakaId": "",
|
||||||
"mangabakaName": "",
|
"mangabakaName": "",
|
||||||
"imageUrl": "",
|
"imageUrl": "",
|
||||||
|
"kavitaSeriesId": 0,
|
||||||
|
"libraryId": 0,
|
||||||
"firstMatchTime": int(time.time()),
|
"firstMatchTime": int(time.time()),
|
||||||
|
"lastUpdateTime": 0,
|
||||||
}
|
}
|
||||||
self._data["matches"][title] = entry
|
self._data["matches"][title] = entry
|
||||||
if mangabaka_id is not None:
|
if mangabaka_id is not None:
|
||||||
@@ -83,14 +107,24 @@ class MatchesCache:
|
|||||||
entry["mangabakaName"] = mangabaka_name
|
entry["mangabakaName"] = mangabaka_name
|
||||||
if image_url is not None:
|
if image_url is not None:
|
||||||
entry["imageUrl"] = image_url
|
entry["imageUrl"] = image_url
|
||||||
|
if kavita_series_id is not None:
|
||||||
|
_set_int(entry, "kavitaSeriesId", kavita_series_id)
|
||||||
|
if library_id is not None:
|
||||||
|
_set_int(entry, "libraryId", library_id)
|
||||||
if first_match_time is not None:
|
if first_match_time is not None:
|
||||||
try:
|
_set_int(entry, "firstMatchTime", first_match_time)
|
||||||
entry["firstMatchTime"] = int(first_match_time)
|
if last_update_time is not None:
|
||||||
except (TypeError, ValueError):
|
_set_int(entry, "lastUpdateTime", last_update_time)
|
||||||
pass
|
|
||||||
self._save_unlocked()
|
self._save_unlocked()
|
||||||
return dict(entry)
|
return dict(entry)
|
||||||
|
|
||||||
|
def mark_updated(self, title: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
entry = self._data["matches"].get(title)
|
||||||
|
if entry is not None:
|
||||||
|
entry["lastUpdateTime"] = int(time.time())
|
||||||
|
self._save_unlocked()
|
||||||
|
|
||||||
def rename(self, old_title: str, new_title: str) -> bool:
|
def rename(self, old_title: str, new_title: str) -> bool:
|
||||||
if not new_title or old_title == new_title:
|
if not new_title or old_title == new_title:
|
||||||
return False
|
return False
|
||||||
@@ -115,6 +149,20 @@ class MatchesCache:
|
|||||||
return {"matches": {k: dict(v)
|
return {"matches": {k: dict(v)
|
||||||
for k, v in self._data["matches"].items()}}
|
for k, v in self._data["matches"].items()}}
|
||||||
|
|
||||||
|
def all_in_libraries(self, library_ids: "list[int] | None") -> dict:
|
||||||
|
"""
|
||||||
|
Returns the cache filtered to entries whose libraryId is in
|
||||||
|
`library_ids`. Pass None to return everything.
|
||||||
|
"""
|
||||||
|
if library_ids is None:
|
||||||
|
return self.all()
|
||||||
|
ids = {int(i) for i in library_ids}
|
||||||
|
with self._lock:
|
||||||
|
return {"matches": {
|
||||||
|
k: dict(v) for k, v in self._data["matches"].items()
|
||||||
|
if int(v.get("libraryId") or 0) in ids
|
||||||
|
}}
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Internal IO
|
# Internal IO
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -0,0 +1,764 @@
|
|||||||
|
"""
|
||||||
|
matches_web_app.py
|
||||||
|
==================
|
||||||
|
|
||||||
|
Flask web UI for the Kavita light-novel metadata fetcher.
|
||||||
|
|
||||||
|
Pages
|
||||||
|
-----
|
||||||
|
GET / HTML UI (matches table + actions)
|
||||||
|
|
||||||
|
Match cache (JSON)
|
||||||
|
------------------
|
||||||
|
GET /api/libraries Lists Kavita libraries
|
||||||
|
GET /api/matches Full cache, optionally filtered by libraryIds=
|
||||||
|
POST /api/matches Upsert a single match
|
||||||
|
body: {title, mangabakaId}
|
||||||
|
POST /api/matches/delete Remove a match
|
||||||
|
body: {title}
|
||||||
|
|
||||||
|
Background jobs
|
||||||
|
---------------
|
||||||
|
POST /api/build Build matches for libraries
|
||||||
|
body: {libraryIds: [int, ...]}
|
||||||
|
POST /api/update Update a single series
|
||||||
|
body: {kavitaSeriesId}
|
||||||
|
POST /api/update-all Update every cached series in libraries
|
||||||
|
body: {libraryIds: [int, ...] | null}
|
||||||
|
GET /api/status Current background job status (status, log)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from flask import Flask, jsonify, request, Response
|
||||||
|
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from LightNovelMetadataBuilder import pick_thumbnail_url
|
||||||
|
|
||||||
|
|
||||||
|
def _int_list(values) -> list[int]:
|
||||||
|
"""Coerces an iterable of mixed values to a list of positive ints."""
|
||||||
|
out: list[int] = []
|
||||||
|
for v in (values or []):
|
||||||
|
try:
|
||||||
|
n = int(v)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if n > 0:
|
||||||
|
out.append(n)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
_INDEX_HTML = r"""<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Kavita light-novel metadata fetcher</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
||||||
|
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
||||||
|
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
|
||||||
|
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
|
||||||
|
.bar select[multiple] { background:#222; color:#eee; border:1px solid #444; min-width: 14rem; min-height: 4.2rem; }
|
||||||
|
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
|
||||||
|
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
|
||||||
|
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
|
||||||
|
button.success { background:#15803d; border-color:#15803d; color:white; }
|
||||||
|
button:disabled { opacity:.5; cursor:default; }
|
||||||
|
table { border-collapse: collapse; width: 100%; }
|
||||||
|
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
|
||||||
|
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
|
||||||
|
th.sortable { cursor: pointer; user-select: none; }
|
||||||
|
th.sortable:hover { background:#252525; }
|
||||||
|
th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
|
||||||
|
tr:nth-child(even) td { background: #161616; }
|
||||||
|
td.image img { max-width: 90px; max-height: 130px; display:block; }
|
||||||
|
td.id input { width: 12rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
|
||||||
|
td.title a { color: #60a5fa; text-decoration: none; }
|
||||||
|
td.title a:hover { text-decoration: underline; }
|
||||||
|
td.actions { white-space: nowrap; }
|
||||||
|
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
|
||||||
|
.dirty td { background: #1f2937 !important; }
|
||||||
|
.count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
|
||||||
|
pre.log { background:#0a0a0a; color:#9ca3af; padding:.5rem .75rem; max-height:18rem; overflow:auto; border:1px solid #333; font-size:.8rem; white-space:pre-wrap; }
|
||||||
|
label { font-size:.9rem; color:#9ca3af; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Kavita light-novel metadata fetcher <span id="count" class="count"></span></h1>
|
||||||
|
|
||||||
|
<div class="bar">
|
||||||
|
<label>Libraries
|
||||||
|
<select id="libraries" multiple size="3"></select>
|
||||||
|
</label>
|
||||||
|
<button id="reload">Reload</button>
|
||||||
|
<button id="build">Match all in libraries</button>
|
||||||
|
<button id="updateAll" class="success">Update all in libraries</button>
|
||||||
|
<button id="batchSave" class="primary">Save dirty (0)</button>
|
||||||
|
<span class="status" id="status"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="bar">
|
||||||
|
<input id="filter" type="search" placeholder="Filter by title…">
|
||||||
|
<span class="count" id="jobStatus"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<pre id="jobLog" class="log" hidden></pre>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
|
||||||
|
<th>mangabakaId</th>
|
||||||
|
<th>mangabakaName</th>
|
||||||
|
<th>library</th>
|
||||||
|
<th class="sortable" data-col="lastUpdateTime">Last update <span class="arrow" id="arrow-lastUpdateTime"></span></th>
|
||||||
|
<th>Image</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="rows"></tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const MB_SEARCH = "https://mangabaka.org/search?q=";
|
||||||
|
let matchesData = {};
|
||||||
|
let librariesById = {};
|
||||||
|
let currentSort = { col: "title", asc: true };
|
||||||
|
let jobPollHandle = null;
|
||||||
|
|
||||||
|
function fmtTime(unix) {
|
||||||
|
if (!unix) return "";
|
||||||
|
const d = new Date(unix * 1000);
|
||||||
|
return d.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function setStatus(msg) { document.getElementById("status").textContent = msg; }
|
||||||
|
|
||||||
|
function selectedLibraryIds() {
|
||||||
|
const sel = document.getElementById("libraries");
|
||||||
|
return Array.from(sel.selectedOptions).map(o => parseInt(o.value, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateDirtyCount() {
|
||||||
|
const n = document.querySelectorAll("#rows tr.dirty").length;
|
||||||
|
const btn = document.getElementById("batchSave");
|
||||||
|
btn.textContent = "Save dirty (" + n + ")";
|
||||||
|
btn.disabled = n === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeRow(title, e) {
|
||||||
|
const tr = document.createElement("tr");
|
||||||
|
tr.dataset.title = title;
|
||||||
|
|
||||||
|
// Title — links to MangaBaka search
|
||||||
|
const titleTd = document.createElement("td");
|
||||||
|
titleTd.className = "title";
|
||||||
|
const a = document.createElement("a");
|
||||||
|
a.href = MB_SEARCH + encodeURIComponent(title) + "&type=novel";
|
||||||
|
a.target = "_blank";
|
||||||
|
a.rel = "noopener";
|
||||||
|
a.textContent = title;
|
||||||
|
titleTd.appendChild(a);
|
||||||
|
tr.appendChild(titleTd);
|
||||||
|
|
||||||
|
// mangabakaId (editable)
|
||||||
|
const idTd = document.createElement("td");
|
||||||
|
idTd.className = "id";
|
||||||
|
const idInp = document.createElement("input");
|
||||||
|
idInp.value = e.mangabakaId || "";
|
||||||
|
idInp.dataset.original = e.mangabakaId || "";
|
||||||
|
idInp.addEventListener("input", () => {
|
||||||
|
if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
|
||||||
|
else tr.classList.remove("dirty");
|
||||||
|
updateDirtyCount();
|
||||||
|
});
|
||||||
|
idTd.appendChild(idInp);
|
||||||
|
tr.appendChild(idTd);
|
||||||
|
|
||||||
|
// mangabakaName
|
||||||
|
const nameTd = document.createElement("td");
|
||||||
|
nameTd.textContent = e.mangabakaName || "";
|
||||||
|
tr.appendChild(nameTd);
|
||||||
|
|
||||||
|
// library
|
||||||
|
const libTd = document.createElement("td");
|
||||||
|
const libId = e.libraryId || 0;
|
||||||
|
libTd.textContent = librariesById[libId] || (libId ? "#" + libId : "");
|
||||||
|
tr.appendChild(libTd);
|
||||||
|
|
||||||
|
// lastUpdateTime
|
||||||
|
const timeTd = document.createElement("td");
|
||||||
|
timeTd.textContent = e.lastUpdateTime ? fmtTime(e.lastUpdateTime) : "";
|
||||||
|
tr.appendChild(timeTd);
|
||||||
|
|
||||||
|
// Image
|
||||||
|
const imgTd = document.createElement("td");
|
||||||
|
imgTd.className = "image";
|
||||||
|
const img = document.createElement("img");
|
||||||
|
img.src = e.imageUrl || "";
|
||||||
|
img.alt = "";
|
||||||
|
img.loading = "lazy";
|
||||||
|
imgTd.appendChild(img);
|
||||||
|
tr.appendChild(imgTd);
|
||||||
|
|
||||||
|
// Actions
|
||||||
|
const actTd = document.createElement("td");
|
||||||
|
actTd.className = "actions";
|
||||||
|
|
||||||
|
const save = document.createElement("button");
|
||||||
|
save.textContent = "Save";
|
||||||
|
save.className = "primary";
|
||||||
|
save.addEventListener("click", () => saveRow(tr));
|
||||||
|
actTd.appendChild(save);
|
||||||
|
|
||||||
|
const update = document.createElement("button");
|
||||||
|
update.textContent = "Update";
|
||||||
|
update.className = "success";
|
||||||
|
update.style.marginLeft = ".25rem";
|
||||||
|
update.disabled = !e.kavitaSeriesId;
|
||||||
|
update.title = e.kavitaSeriesId
|
||||||
|
? "Push metadata to Kavita series #" + e.kavitaSeriesId
|
||||||
|
: "Run a Match cycle first so we know the Kavita series id";
|
||||||
|
update.addEventListener("click", () => updateRow(tr));
|
||||||
|
actTd.appendChild(update);
|
||||||
|
|
||||||
|
const del = document.createElement("button");
|
||||||
|
del.textContent = "Delete";
|
||||||
|
del.className = "danger";
|
||||||
|
del.style.marginLeft = ".25rem";
|
||||||
|
del.addEventListener("click", () => deleteRow(tr));
|
||||||
|
actTd.appendChild(del);
|
||||||
|
|
||||||
|
tr.appendChild(actTd);
|
||||||
|
|
||||||
|
tr._idInp = idInp;
|
||||||
|
tr._nameTd = nameTd;
|
||||||
|
tr._img = img;
|
||||||
|
tr._timeTd = timeTd;
|
||||||
|
tr._update = update;
|
||||||
|
return tr;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveRow(tr) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
const newId = tr._idInp.value.trim();
|
||||||
|
setStatus("Saving " + title + "…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ title: title, mangabakaId: newId }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
const data = await r.json();
|
||||||
|
const entry = data.entry || {};
|
||||||
|
matchesData[title] = entry;
|
||||||
|
tr._idInp.value = entry.mangabakaId || "";
|
||||||
|
tr._idInp.dataset.original = entry.mangabakaId || "";
|
||||||
|
tr._nameTd.textContent = entry.mangabakaName || "";
|
||||||
|
tr._img.src = entry.imageUrl || "";
|
||||||
|
tr.classList.remove("dirty");
|
||||||
|
updateDirtyCount();
|
||||||
|
setStatus("Saved " + title);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Save failed (" + title + "): " + err.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteRow(tr) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
if (!confirm("Delete " + title + "?")) return;
|
||||||
|
setStatus("Deleting " + title + "…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches/delete", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ title: title }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
delete matchesData[title];
|
||||||
|
tr.remove();
|
||||||
|
document.getElementById("count").textContent =
|
||||||
|
"(" + Object.keys(matchesData).length + " entries)";
|
||||||
|
setStatus("Deleted");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Delete failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function updateRow(tr) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
const entry = matchesData[title] || {};
|
||||||
|
if (!entry.kavitaSeriesId) {
|
||||||
|
setStatus("No kavitaSeriesId for " + title + " — run match first");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setStatus("Updating " + title + "…");
|
||||||
|
tr._update.disabled = true;
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/update", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ kavitaSeriesId: entry.kavitaSeriesId }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
const res = await r.json();
|
||||||
|
setStatus(res.ok ? "Updated " + title : "Update failed: " + res.error);
|
||||||
|
if (res.ok) {
|
||||||
|
entry.lastUpdateTime = Math.floor(Date.now() / 1000);
|
||||||
|
tr._timeTd.textContent = fmtTime(entry.lastUpdateTime);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Update failed: " + err.message);
|
||||||
|
} finally {
|
||||||
|
tr._update.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function batchSave() {
|
||||||
|
const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
|
||||||
|
if (dirty.length === 0) return;
|
||||||
|
if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
|
||||||
|
setStatus("Batch saving " + dirty.length + " rows…");
|
||||||
|
let ok = 0, fail = 0;
|
||||||
|
for (const tr of dirty) {
|
||||||
|
const success = await saveRow(tr);
|
||||||
|
if (success) ok++; else fail++;
|
||||||
|
}
|
||||||
|
setStatus("Batch: " + ok + " ok, " + fail + " failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
function sortedTitles() {
|
||||||
|
const titles = Object.keys(matchesData);
|
||||||
|
const dir = currentSort.asc ? 1 : -1;
|
||||||
|
if (currentSort.col === "title") {
|
||||||
|
return titles.sort((a, b) => a.localeCompare(b) * dir);
|
||||||
|
}
|
||||||
|
if (currentSort.col === "lastUpdateTime") {
|
||||||
|
return titles.sort((a, b) => {
|
||||||
|
const av = matchesData[a].lastUpdateTime || 0;
|
||||||
|
const bv = matchesData[b].lastUpdateTime || 0;
|
||||||
|
return (av - bv) * dir;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return titles;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateSortArrows() {
|
||||||
|
for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
|
||||||
|
const id = "arrow-" + currentSort.col;
|
||||||
|
const el = document.getElementById(id);
|
||||||
|
if (el) el.textContent = currentSort.asc ? "▲" : "▼";
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyFilter() {
|
||||||
|
const q = document.getElementById("filter").value.toLowerCase();
|
||||||
|
const libs = new Set(selectedLibraryIds());
|
||||||
|
for (const tr of document.querySelectorAll("#rows tr")) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
const entry = matchesData[title] || {};
|
||||||
|
const titleMatch = title.toLowerCase().includes(q);
|
||||||
|
const libMatch = libs.size === 0 || libs.has(entry.libraryId || 0);
|
||||||
|
tr.style.display = (titleMatch && libMatch) ? "" : "none";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function render() {
|
||||||
|
const tbody = document.getElementById("rows");
|
||||||
|
tbody.innerHTML = "";
|
||||||
|
for (const t of sortedTitles()) {
|
||||||
|
tbody.appendChild(makeRow(t, matchesData[t]));
|
||||||
|
}
|
||||||
|
updateSortArrows();
|
||||||
|
applyFilter();
|
||||||
|
updateDirtyCount();
|
||||||
|
document.getElementById("count").textContent =
|
||||||
|
"(" + Object.keys(matchesData).length + " entries)";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadLibraries() {
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/libraries");
|
||||||
|
const data = await r.json();
|
||||||
|
const libs = data.libraries || [];
|
||||||
|
const defaults = new Set(data.defaults || []);
|
||||||
|
librariesById = {};
|
||||||
|
const sel = document.getElementById("libraries");
|
||||||
|
sel.innerHTML = "";
|
||||||
|
for (const lib of libs) {
|
||||||
|
librariesById[lib.id] = lib.name;
|
||||||
|
const opt = document.createElement("option");
|
||||||
|
opt.value = lib.id;
|
||||||
|
opt.textContent = lib.name + " (#" + lib.id + ")";
|
||||||
|
if (defaults.has(lib.id)) opt.selected = true;
|
||||||
|
sel.appendChild(opt);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Failed to load libraries: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
setStatus("Loading…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches");
|
||||||
|
const data = await r.json();
|
||||||
|
matchesData = data.matches || {};
|
||||||
|
render();
|
||||||
|
setStatus(Object.keys(matchesData).length + " entries");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Load failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pollJob() {
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/status");
|
||||||
|
const s = await r.json();
|
||||||
|
const jobStatus = document.getElementById("jobStatus");
|
||||||
|
const jobLog = document.getElementById("jobLog");
|
||||||
|
if (!s.running && !s.lastFinished) {
|
||||||
|
jobStatus.textContent = "";
|
||||||
|
jobLog.hidden = true;
|
||||||
|
stopPolling();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
jobLog.hidden = false;
|
||||||
|
jobLog.textContent = (s.log || []).join("\n");
|
||||||
|
jobLog.scrollTop = jobLog.scrollHeight;
|
||||||
|
if (s.running) {
|
||||||
|
jobStatus.textContent = "Running: " + (s.label || "");
|
||||||
|
} else {
|
||||||
|
jobStatus.textContent = "Done: " + (s.label || "");
|
||||||
|
stopPolling();
|
||||||
|
load();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
/* keep polling silently */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function startPolling() {
|
||||||
|
if (jobPollHandle) return;
|
||||||
|
jobPollHandle = setInterval(pollJob, 1000);
|
||||||
|
pollJob();
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopPolling() {
|
||||||
|
if (jobPollHandle) clearInterval(jobPollHandle);
|
||||||
|
jobPollHandle = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startBuild() {
|
||||||
|
const libs = selectedLibraryIds();
|
||||||
|
if (libs.length === 0) {
|
||||||
|
setStatus("Pick at least one library");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!confirm("Match every series in " + libs.length + " library(ies)?")) return;
|
||||||
|
setStatus("Build started");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/build", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ libraryIds: libs }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
startPolling();
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Build failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startUpdateAll() {
|
||||||
|
const libs = selectedLibraryIds();
|
||||||
|
if (libs.length === 0) {
|
||||||
|
if (!confirm("No libraries selected — update every cached series?")) return;
|
||||||
|
} else if (!confirm("Update every cached series in " + libs.length + " library(ies)?")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
setStatus("Update-all started");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/update-all", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ libraryIds: libs.length ? libs : null }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
startPolling();
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Update-all failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("filter").addEventListener("input", applyFilter);
|
||||||
|
document.getElementById("libraries").addEventListener("change", applyFilter);
|
||||||
|
document.getElementById("reload").addEventListener("click", load);
|
||||||
|
document.getElementById("batchSave").addEventListener("click", batchSave);
|
||||||
|
document.getElementById("build").addEventListener("click", startBuild);
|
||||||
|
document.getElementById("updateAll").addEventListener("click", startUpdateAll);
|
||||||
|
for (const th of document.querySelectorAll("th.sortable")) {
|
||||||
|
th.addEventListener("click", () => {
|
||||||
|
const col = th.dataset.col;
|
||||||
|
if (currentSort.col === col) currentSort.asc = !currentSort.asc;
|
||||||
|
else { currentSort.col = col; currentSort.asc = true; }
|
||||||
|
render();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
await loadLibraries();
|
||||||
|
await load();
|
||||||
|
// Resume polling if there's a job running from a previous session
|
||||||
|
pollJob();
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class _JobState:
|
||||||
|
"""Thread-safe container for the current background job's progress."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._running = False
|
||||||
|
self._label = ""
|
||||||
|
self._log: list[str] = []
|
||||||
|
self._last_finished_at = 0
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
|
||||||
|
def start(self, label: str, target, *args, **kwargs) -> bool:
|
||||||
|
with self._lock:
|
||||||
|
if self._running:
|
||||||
|
return False
|
||||||
|
self._running = True
|
||||||
|
self._label = label
|
||||||
|
self._log = [f"[{time.strftime('%H:%M:%S')}] {label} started"]
|
||||||
|
|
||||||
|
def runner():
|
||||||
|
try:
|
||||||
|
target(self, *args, **kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
self.append(f"FATAL: {exc}")
|
||||||
|
finally:
|
||||||
|
with self._lock:
|
||||||
|
self._running = False
|
||||||
|
self._last_finished_at = int(time.time())
|
||||||
|
self.append(f"[{time.strftime('%H:%M:%S')}] finished")
|
||||||
|
|
||||||
|
self._thread = threading.Thread(target=runner,
|
||||||
|
name=f"job:{label}",
|
||||||
|
daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def append(self, line: str) -> None:
|
||||||
|
with self._lock:
|
||||||
|
self._log.append(line)
|
||||||
|
# Cap log length so the response stays bounded.
|
||||||
|
if len(self._log) > 1000:
|
||||||
|
self._log = self._log[-800:]
|
||||||
|
|
||||||
|
def snapshot(self) -> dict:
|
||||||
|
with self._lock:
|
||||||
|
return {
|
||||||
|
"running": self._running,
|
||||||
|
"label": self._label,
|
||||||
|
"log": list(self._log),
|
||||||
|
"lastFinished": self._last_finished_at,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MatchesWebApp:
|
||||||
|
def __init__(self, cache: MatchesCache, *,
|
||||||
|
orchestrator=None,
|
||||||
|
default_library_ids: "list[int] | None" = None,
|
||||||
|
host: str = "0.0.0.0",
|
||||||
|
port: int = 8080):
|
||||||
|
self._cache = cache
|
||||||
|
self._orchestrator = orchestrator
|
||||||
|
self._defaults = list(default_library_ids or [])
|
||||||
|
self._host = host
|
||||||
|
self._port = port
|
||||||
|
self._job = _JobState()
|
||||||
|
self._app = Flask(__name__)
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
self._register_routes()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def app(self) -> Flask:
|
||||||
|
return self._app
|
||||||
|
|
||||||
|
def start(self) -> threading.Thread:
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return self._thread
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._app.run,
|
||||||
|
kwargs={"host": self._host, "port": self._port,
|
||||||
|
"debug": False, "use_reloader": False,
|
||||||
|
"threaded": True},
|
||||||
|
name="MatchesWebApp",
|
||||||
|
daemon=False,
|
||||||
|
)
|
||||||
|
self._thread.start()
|
||||||
|
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
|
||||||
|
flush=True)
|
||||||
|
return self._thread
|
||||||
|
|
||||||
|
def wait(self) -> None:
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Routes
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _register_routes(self) -> None:
|
||||||
|
app = self._app
|
||||||
|
cache = self._cache
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def index() -> Response:
|
||||||
|
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
|
||||||
|
|
||||||
|
@app.get("/api/libraries")
|
||||||
|
def api_libraries():
|
||||||
|
if self._orchestrator is None:
|
||||||
|
return jsonify([])
|
||||||
|
try:
|
||||||
|
libs = self._orchestrator.list_libraries()
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"libraries failed: {exc}", status=502)
|
||||||
|
return jsonify({"libraries": libs, "defaults": self._defaults})
|
||||||
|
|
||||||
|
@app.get("/api/matches")
|
||||||
|
def api_list():
|
||||||
|
raw = request.args.get("libraryIds") or ""
|
||||||
|
lib_ids = _int_list(raw.split(","))
|
||||||
|
if lib_ids:
|
||||||
|
return jsonify(cache.all_in_libraries(lib_ids))
|
||||||
|
return jsonify(cache.all())
|
||||||
|
|
||||||
|
@app.post("/api/matches")
|
||||||
|
def api_upsert():
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
title = (body.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return Response("title is required", status=400)
|
||||||
|
new_id_raw = body.get("mangabakaId")
|
||||||
|
new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
|
||||||
|
if not new_id:
|
||||||
|
return Response("mangabakaId is required", status=400)
|
||||||
|
|
||||||
|
new_name: "str | None" = None
|
||||||
|
new_image: "str | None" = None
|
||||||
|
if self._orchestrator is not None:
|
||||||
|
try:
|
||||||
|
series = self._orchestrator.fetch_series(new_id)
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"resolve failed: {exc}", status=502)
|
||||||
|
if not series:
|
||||||
|
return Response(
|
||||||
|
f"MangaBaka has no series with id {new_id}",
|
||||||
|
status=404)
|
||||||
|
new_name = series.get("title") or ""
|
||||||
|
new_image = pick_thumbnail_url(series.get("cover")) or ""
|
||||||
|
|
||||||
|
entry = cache.upsert(
|
||||||
|
title,
|
||||||
|
mangabaka_id=new_id,
|
||||||
|
mangabaka_name=new_name,
|
||||||
|
image_url=new_image,
|
||||||
|
)
|
||||||
|
return jsonify({"title": title, "entry": entry})
|
||||||
|
|
||||||
|
@app.post("/api/matches/delete")
|
||||||
|
def api_delete():
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
title = (body.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return Response("title is required", status=400)
|
||||||
|
removed = cache.remove(title)
|
||||||
|
return jsonify({"removed": removed, "title": title})
|
||||||
|
|
||||||
|
@app.post("/api/build")
|
||||||
|
def api_build():
|
||||||
|
if self._orchestrator is None:
|
||||||
|
return Response("no orchestrator configured", status=503)
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
library_ids = _int_list(body.get("libraryIds"))
|
||||||
|
if not library_ids:
|
||||||
|
return Response("libraryIds required", status=400)
|
||||||
|
|
||||||
|
label = f"match libraries {library_ids}"
|
||||||
|
|
||||||
|
def task(job: _JobState, lib_ids):
|
||||||
|
stats = self._orchestrator.build_matches(lib_ids)
|
||||||
|
job.append(f"matched={stats.get('matched')} "
|
||||||
|
f"skipped={stats.get('skipped')} "
|
||||||
|
f"missing={stats.get('missing')} "
|
||||||
|
f"checked={stats.get('checked')}")
|
||||||
|
|
||||||
|
if not self._job.start(label, task, library_ids):
|
||||||
|
return Response("a job is already running", status=409)
|
||||||
|
return jsonify({"started": label})
|
||||||
|
|
||||||
|
@app.post("/api/update")
|
||||||
|
def api_update():
|
||||||
|
if self._orchestrator is None:
|
||||||
|
return Response("no orchestrator configured", status=503)
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
ksid = body.get("kavitaSeriesId")
|
||||||
|
try:
|
||||||
|
ksid_int = int(ksid)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return Response("kavitaSeriesId required", status=400)
|
||||||
|
try:
|
||||||
|
res = self._orchestrator.update_series(ksid_int)
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"update failed: {exc}", status=500)
|
||||||
|
return jsonify(res)
|
||||||
|
|
||||||
|
@app.post("/api/update-all")
|
||||||
|
def api_update_all():
|
||||||
|
if self._orchestrator is None:
|
||||||
|
return Response("no orchestrator configured", status=503)
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
raw = body.get("libraryIds")
|
||||||
|
library_ids = None if raw is None else _int_list(raw)
|
||||||
|
|
||||||
|
label = ("update all (every library)" if library_ids is None
|
||||||
|
else f"update all in libraries {library_ids}")
|
||||||
|
|
||||||
|
def task(job: _JobState, lib_ids):
|
||||||
|
summary = self._orchestrator.update_all(lib_ids)
|
||||||
|
job.append(f"ok={summary.get('ok')} failed={summary.get('failed')}")
|
||||||
|
for res in summary.get("results", []):
|
||||||
|
title = res.get("title", "?")
|
||||||
|
if res.get("ok"):
|
||||||
|
flags = []
|
||||||
|
sr = res.get("series") or {}
|
||||||
|
for k, v in sr.items():
|
||||||
|
if v == "changed":
|
||||||
|
flags.append(k)
|
||||||
|
job.append(
|
||||||
|
f" {title}: changed=[{', '.join(flags) or '-'}]")
|
||||||
|
else:
|
||||||
|
job.append(f" {title}: FAIL {res.get('error')}")
|
||||||
|
|
||||||
|
if not self._job.start(label, task, library_ids):
|
||||||
|
return Response("a job is already running", status=409)
|
||||||
|
return jsonify({"started": label})
|
||||||
|
|
||||||
|
@app.get("/api/status")
|
||||||
|
def api_status():
|
||||||
|
snap = self._job.snapshot()
|
||||||
|
snap["defaults"] = self._defaults
|
||||||
|
return jsonify(snap)
|
||||||
@@ -0,0 +1,174 @@
|
|||||||
|
"""
|
||||||
|
relationship_sync.py
|
||||||
|
====================
|
||||||
|
|
||||||
|
Mirrors MangaBaka's ``relationships_v2`` graph into Kavita:
|
||||||
|
|
||||||
|
1. Every related MangaBaka series that is *also* present in Kavita
|
||||||
|
(resolved via MatchesCache) is added to a shared Kavita collection
|
||||||
|
so the whole franchise can be browsed in one place.
|
||||||
|
2. Series-level relationships (prequel / sequel / spin-off / …) are
|
||||||
|
written via ``POST /api/Series/update-related`` so navigating
|
||||||
|
between entries surfaces the right neighbours.
|
||||||
|
|
||||||
|
Only relationships where both endpoints exist in Kavita are written.
|
||||||
|
Relationships pointing to series that have not been imported yet are
|
||||||
|
silently skipped (the next match run picks them up).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
|
||||||
|
|
||||||
|
# MangaBaka relation_type -> Kavita UpdateRelatedSeriesDto bucket
|
||||||
|
_RELATION_MAP = {
|
||||||
|
"prequel": "prequels",
|
||||||
|
"sequel": "sequels",
|
||||||
|
"side_story": "sideStories",
|
||||||
|
"spin_off": "spinOffs",
|
||||||
|
"spinoff": "spinOffs",
|
||||||
|
"alternative_version": "alternativeVersions",
|
||||||
|
"alternative_story": "alternativeVersions",
|
||||||
|
"alternative_setting": "alternativeSettings",
|
||||||
|
"adapted_from": "adaptations",
|
||||||
|
"adaptation": "adaptations",
|
||||||
|
"doujinshi": "doujinshis",
|
||||||
|
"parent": "contains", # the parent "contains" the child
|
||||||
|
}
|
||||||
|
|
||||||
|
_ALL_BUCKETS = (
|
||||||
|
"adaptations", "characters", "contains", "others",
|
||||||
|
"prequels", "sequels", "sideStories", "spinOffs",
|
||||||
|
"alternativeSettings", "alternativeVersions", "doujinshis",
|
||||||
|
"editions", "annuals",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RelationshipSync:
|
||||||
|
def __init__(self, client: KavitaClient, cache: MatchesCache, *,
|
||||||
|
builder=None):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
client : KavitaClient for collection / relation writes.
|
||||||
|
cache : MatchesCache to resolve mangabakaId -> kavitaSeriesId.
|
||||||
|
builder : optional LightNovelMetadataBuilder used to fetch parent
|
||||||
|
series titles when picking the collection name.
|
||||||
|
"""
|
||||||
|
self._client = client
|
||||||
|
self._cache = cache
|
||||||
|
self._builder = builder
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def sync(self, kavita_series_id: int, built: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Applies the relationship and collection links described by
|
||||||
|
`built["relationships"]` (raw MangaBaka relationships_v2 list)
|
||||||
|
for the given Kavita series. Returns a small status dict.
|
||||||
|
"""
|
||||||
|
report: dict = {"relations": {}, "collection": None,
|
||||||
|
"missing_series": []}
|
||||||
|
|
||||||
|
relationships = built.get("relationships") or []
|
||||||
|
if not relationships:
|
||||||
|
return report
|
||||||
|
|
||||||
|
# Resolve mangabakaId -> kavitaSeriesId for every related entry.
|
||||||
|
related: dict[str, list[int]] = {b: [] for b in _ALL_BUCKETS}
|
||||||
|
all_kavita_ids: set[int] = set()
|
||||||
|
for rel in relationships:
|
||||||
|
mb_id = rel.get("to_series_id")
|
||||||
|
if mb_id is None:
|
||||||
|
continue
|
||||||
|
hit = self._cache.get_by_mangabaka_id(mb_id)
|
||||||
|
if not hit:
|
||||||
|
report["missing_series"].append(int(mb_id))
|
||||||
|
continue
|
||||||
|
_title, entry = hit
|
||||||
|
ksid = int(entry.get("kavitaSeriesId") or 0)
|
||||||
|
if not ksid:
|
||||||
|
report["missing_series"].append(int(mb_id))
|
||||||
|
continue
|
||||||
|
bucket = _RELATION_MAP.get((rel.get("relation_type") or "").lower(),
|
||||||
|
"others")
|
||||||
|
if ksid not in related[bucket]:
|
||||||
|
related[bucket].append(ksid)
|
||||||
|
all_kavita_ids.add(ksid)
|
||||||
|
|
||||||
|
# ----- Relationships ------------------------------------------
|
||||||
|
if any(related.values()):
|
||||||
|
payload = {"seriesId": int(kavita_series_id)}
|
||||||
|
for bucket in _ALL_BUCKETS:
|
||||||
|
payload[bucket] = related[bucket]
|
||||||
|
try:
|
||||||
|
self._client.update_related(payload)
|
||||||
|
report["relations"] = {k: v for k, v in related.items() if v}
|
||||||
|
except Exception as exc:
|
||||||
|
report["relations"] = {"error": str(exc)}
|
||||||
|
|
||||||
|
# ----- Collection ---------------------------------------------
|
||||||
|
# Include the current series in the collection so it shows up too.
|
||||||
|
all_kavita_ids.add(int(kavita_series_id))
|
||||||
|
if len(all_kavita_ids) >= 2:
|
||||||
|
collection_name = self._collection_name(built, relationships)
|
||||||
|
collection_id = self._find_collection_id(collection_name)
|
||||||
|
try:
|
||||||
|
self._client.add_series_to_collection(
|
||||||
|
collection_id=collection_id,
|
||||||
|
title=collection_name,
|
||||||
|
series_ids=sorted(all_kavita_ids),
|
||||||
|
)
|
||||||
|
report["collection"] = collection_name
|
||||||
|
except Exception as exc:
|
||||||
|
report["collection"] = f"error: {exc}"
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _find_collection_id(self, name: str) -> int:
|
||||||
|
"""Returns the id of an existing collection by title, or 0 to create."""
|
||||||
|
if not name:
|
||||||
|
return 0
|
||||||
|
target = name.strip().lower()
|
||||||
|
try:
|
||||||
|
for col in self._client.list_collections():
|
||||||
|
if (col.get("title") or "").strip().lower() == target:
|
||||||
|
try:
|
||||||
|
return int(col.get("id") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _collection_name(self, built: dict,
|
||||||
|
relationships: list[dict]) -> str:
|
||||||
|
"""
|
||||||
|
Picks the collection name. Uses the parent series title from
|
||||||
|
MangaBaka if the current series has one; otherwise falls back to
|
||||||
|
the current series' own title.
|
||||||
|
"""
|
||||||
|
for rel in relationships:
|
||||||
|
if (rel.get("relation_type") or "").lower() == "parent":
|
||||||
|
parent_id = rel.get("to_series_id")
|
||||||
|
if parent_id is not None and self._builder is not None:
|
||||||
|
try:
|
||||||
|
parent_md = self._builder.fetch_series(parent_id)
|
||||||
|
if parent_md and parent_md.get("title"):
|
||||||
|
return parent_md["title"]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Even without a builder, the cache may know the parent.
|
||||||
|
hit = self._cache.get_by_mangabaka_id(parent_id)
|
||||||
|
if hit:
|
||||||
|
_title, entry = hit
|
||||||
|
name = entry.get("mangabakaName")
|
||||||
|
if name:
|
||||||
|
return name
|
||||||
|
return built.get("mangabakaTitle") or ""
|
||||||
@@ -37,18 +37,27 @@ Data source notes
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import difflib
|
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
# Shared modules live one level up (src/); needed when a module in this
|
||||||
|
# folder is run directly as a script (the entry points set the path).
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||||
|
|
||||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
from MangaBakaWorksResolver import MangaBakaWorksResolver, _pick_image_url
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CoverCache import CoverCache, _IMAGE_EXTS
|
||||||
|
from TextUtils import person_name_with_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@@ -57,10 +66,20 @@ except ImportError:
|
|||||||
_HAS_PIL = False
|
_HAS_PIL = False
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _no_measure():
|
||||||
|
"""No-op stand-in for a perf recorder's measure() context manager."""
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Constants
|
# Constants
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
# Series types accepted by the MangaBaka search endpoint. Light/web novels
|
||||||
|
# are filtered out because this pipeline only handles image-based manga.
|
||||||
|
# Passed to `requests` as a list so each value becomes its own `&type=...`
|
||||||
|
# query parameter (MangaBaka's API expects repeated keys, not a CSV list).
|
||||||
|
_SEARCH_TYPES = ["manga", "manhwa", "manhua"]
|
||||||
|
|
||||||
_AGE_RATING_MAP = {
|
_AGE_RATING_MAP = {
|
||||||
"safe": "Everyone",
|
"safe": "Everyone",
|
||||||
@@ -172,7 +191,8 @@ class ComicInfoBuilder:
|
|||||||
works_resolver: "MangaBakaWorksResolver | None" = None,
|
works_resolver: "MangaBakaWorksResolver | None" = None,
|
||||||
mal_resolver: "MALResolver | None" = None,
|
mal_resolver: "MALResolver | None" = None,
|
||||||
al_resolver: "AniListResolver | None" = None,
|
al_resolver: "AniListResolver | None" = None,
|
||||||
matches_cache: "MatchesCache | None" = None):
|
matches_cache: "MatchesCache | None" = None,
|
||||||
|
cover_cache: "CoverCache | None" = None):
|
||||||
if not manga_title or not str(manga_title).strip():
|
if not manga_title or not str(manga_title).strip():
|
||||||
raise ValueError("manga_title must not be empty.")
|
raise ValueError("manga_title must not be empty.")
|
||||||
|
|
||||||
@@ -184,6 +204,9 @@ class ComicInfoBuilder:
|
|||||||
self.request_timeout = request_timeout
|
self.request_timeout = request_timeout
|
||||||
self._session = session or requests.Session()
|
self._session = session or requests.Session()
|
||||||
self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0")
|
self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0")
|
||||||
|
# Throttle every call to api.mangabaka.dev (idempotent — safe even
|
||||||
|
# when the session was already prepared by a parent class).
|
||||||
|
_apply_mangabaka_rate_limit(self._session)
|
||||||
|
|
||||||
self._volume_resolver = (volume_resolver
|
self._volume_resolver = (volume_resolver
|
||||||
or MangaDexVolumeResolver(
|
or MangaDexVolumeResolver(
|
||||||
@@ -200,6 +223,13 @@ class ComicInfoBuilder:
|
|||||||
self._al_resolver = al_resolver or AniListResolver(
|
self._al_resolver = al_resolver or AniListResolver(
|
||||||
request_timeout=request_timeout)
|
request_timeout=request_timeout)
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
|
self._cover_cache = cover_cache or _default_cover_cache()
|
||||||
|
|
||||||
|
# Optional performance recorder (duck-typed: any object with a
|
||||||
|
# .measure(name) context manager). The mover sets this per chapter;
|
||||||
|
# when None, _measure() is a no-op so the builder stays decoupled
|
||||||
|
# from PerfStats and works standalone (e.g. the cover updater).
|
||||||
|
self.perf = None
|
||||||
|
|
||||||
self._metadata: "dict | None" = None
|
self._metadata: "dict | None" = None
|
||||||
self._pages: list[dict] = []
|
self._pages: list[dict] = []
|
||||||
@@ -245,6 +275,12 @@ class ComicInfoBuilder:
|
|||||||
self._cover_path = None
|
self._cover_path = None
|
||||||
self._suwayomi_data = {}
|
self._suwayomi_data = {}
|
||||||
|
|
||||||
|
def _measure(self, name: str):
|
||||||
|
"""Times a named step on the attached recorder; no-op when unset."""
|
||||||
|
if self.perf is not None:
|
||||||
|
return self.perf.measure(name)
|
||||||
|
return _no_measure()
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
# Public XML functions
|
# Public XML functions
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
@@ -288,10 +324,12 @@ class ComicInfoBuilder:
|
|||||||
if not folder.is_dir():
|
if not folder.is_dir():
|
||||||
raise NotADirectoryError(f"Folder not found: {folder}")
|
raise NotADirectoryError(f"Folder not found: {folder}")
|
||||||
|
|
||||||
|
with self._measure("read_comicinfo"):
|
||||||
self._suwayomi_data = self._read_existing_comicinfo(folder)
|
self._suwayomi_data = self._read_existing_comicinfo(folder)
|
||||||
|
|
||||||
self._cover_path = None
|
self._cover_path = None
|
||||||
if download_cover:
|
if download_cover:
|
||||||
|
with self._measure("cover"):
|
||||||
self._cover_path = self._download_cover(folder, cover_filename)
|
self._cover_path = self._download_cover(folder, cover_filename)
|
||||||
|
|
||||||
cover_resolved = self._cover_path.resolve() if self._cover_path else None
|
cover_resolved = self._cover_path.resolve() if self._cover_path else None
|
||||||
@@ -312,6 +350,9 @@ class ComicInfoBuilder:
|
|||||||
ordered.extend((img, "Story") for img in story_images)
|
ordered.extend((img, "Story") for img in story_images)
|
||||||
|
|
||||||
self._pages = []
|
self._pages = []
|
||||||
|
# Probing every page for its pixel dimensions reads each file — on a
|
||||||
|
# network share this is often the dominant per-chapter cost.
|
||||||
|
with self._measure("image_dimensions"):
|
||||||
for index, (img_path, page_type) in enumerate(ordered):
|
for index, (img_path, page_type) in enumerate(ordered):
|
||||||
width, height = self._image_dimensions(img_path)
|
width, height = self._image_dimensions(img_path)
|
||||||
try:
|
try:
|
||||||
@@ -378,7 +419,8 @@ class ComicInfoBuilder:
|
|||||||
|
|
||||||
url = f"{self.api_base_url}/series/search"
|
url = f"{self.api_base_url}/series/search"
|
||||||
resp = self._session.get(
|
resp = self._session.get(
|
||||||
url, params={"q": title, "page": 1, "limit": 1},
|
url, params={"q": title, "type": _SEARCH_TYPES,
|
||||||
|
"page": 1, "limit": 1},
|
||||||
timeout=self.request_timeout)
|
timeout=self.request_timeout)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json().get("data") or []
|
data = resp.json().get("data") or []
|
||||||
@@ -389,7 +431,7 @@ class ComicInfoBuilder:
|
|||||||
title,
|
title,
|
||||||
mangabaka_id=series.get("id"),
|
mangabaka_id=series.get("id"),
|
||||||
mangabaka_name=series.get("title") or "",
|
mangabaka_name=series.get("title") or "",
|
||||||
image_url=_pick_cover_url(series.get("cover")),
|
image_url=_pick_thumbnail_url(series.get("cover")),
|
||||||
)
|
)
|
||||||
|
|
||||||
return series
|
return series
|
||||||
@@ -428,8 +470,7 @@ class ComicInfoBuilder:
|
|||||||
# ----- Title / Series -----------------------------------------------
|
# ----- Title / Series -----------------------------------------------
|
||||||
add("Title", sd.get("Title") or f"Chapter {self._chapter}")
|
add("Title", sd.get("Title") or f"Chapter {self._chapter}")
|
||||||
add("Series", md.get("title") or self._manga_title)
|
add("Series", md.get("title") or self._manga_title)
|
||||||
add("LocalizedSeries",
|
add("LocalizedSeries", self._romanized_for_native(md))
|
||||||
md.get("native_title") or md.get("romanized_title"))
|
|
||||||
add("SeriesSort", self._get_sort_title(md))
|
add("SeriesSort", self._get_sort_title(md))
|
||||||
add("Number", sd.get("Number") or self._chapter)
|
add("Number", sd.get("Number") or self._chapter)
|
||||||
add("Count", md.get("total_chapters"))
|
add("Count", md.get("total_chapters"))
|
||||||
@@ -473,9 +514,19 @@ class ComicInfoBuilder:
|
|||||||
add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or [])))
|
add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or [])))
|
||||||
|
|
||||||
# ----- Characters — MAL first, AniList fallback ---------------------
|
# ----- Characters — MAL first, AniList fallback ---------------------
|
||||||
characters = self._mal_resolver.get_characters(mal_id)
|
# Names are disambiguated with the tracker *character* id
|
||||||
if not characters and al_id:
|
# ("Rem (MAL 118737)") so same-named characters from different
|
||||||
characters = self._al_resolver.get_characters(al_id)
|
# series stay separate Kavita person records. The format is shared
|
||||||
|
# with the light-novel updater — see TextUtils.person_name_with_id.
|
||||||
|
char_entries = self._mal_resolver.get_characters_detailed(mal_id)
|
||||||
|
if not char_entries and al_id:
|
||||||
|
char_entries = self._al_resolver.get_characters_detailed(al_id)
|
||||||
|
characters = [
|
||||||
|
person_name_with_id(e.get("name"),
|
||||||
|
mal_id=e.get("mal_id"),
|
||||||
|
al_id=e.get("al_id"))
|
||||||
|
for e in char_entries if (e.get("name") or "").strip()
|
||||||
|
]
|
||||||
add("Characters", ", ".join(characters) if characters else None)
|
add("Characters", ", ".join(characters) if characters else None)
|
||||||
|
|
||||||
# ----- Web links ----------------------------------------------------
|
# ----- Web links ----------------------------------------------------
|
||||||
@@ -570,11 +621,13 @@ class ComicInfoBuilder:
|
|||||||
# ======================================================================
|
# ======================================================================
|
||||||
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
|
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
|
||||||
"""
|
"""
|
||||||
Downloads the cover for the current chapter/volume.
|
Fetches the cover for the current chapter/volume and writes it into
|
||||||
|
`folder`.
|
||||||
|
|
||||||
If a volume is known and a volume-specific cover exists in MangaBaka
|
If a volume is known and a volume-specific cover exists in MangaBaka,
|
||||||
works, that cover is used. Otherwise the series default cover is
|
that cover is used; otherwise the series default cover. The image
|
||||||
downloaded (raw variant preferred).
|
itself comes from the CoverCache, so a cover shared by many chapters
|
||||||
|
is downloaded only once.
|
||||||
"""
|
"""
|
||||||
md = self._get_metadata()
|
md = self._get_metadata()
|
||||||
volume = self._determine_volume()
|
volume = self._determine_volume()
|
||||||
@@ -592,18 +645,13 @@ class ComicInfoBuilder:
|
|||||||
if not cover_url:
|
if not cover_url:
|
||||||
cover_url = _pick_cover_url(md.get("cover"))
|
cover_url = _pick_cover_url(md.get("cover"))
|
||||||
|
|
||||||
if not cover_url:
|
fetched = self._cover_cache.get(cover_url) if cover_url else None
|
||||||
|
if not fetched:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
data, ext = fetched
|
||||||
resp = self._session.get(cover_url, timeout=self.request_timeout)
|
|
||||||
resp.raise_for_status()
|
|
||||||
except requests.RequestException:
|
|
||||||
return None
|
|
||||||
|
|
||||||
ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
|
|
||||||
target = folder / f"{cover_filename}{ext}"
|
target = folder / f"{cover_filename}{ext}"
|
||||||
target.write_bytes(resp.content)
|
target.write_bytes(data)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
@@ -637,6 +685,82 @@ class ComicInfoBuilder:
|
|||||||
# ======================================================================
|
# ======================================================================
|
||||||
# Title helpers
|
# Title helpers
|
||||||
# ======================================================================
|
# ======================================================================
|
||||||
|
# Mapping from series type to the matching romanized language code(s)
|
||||||
|
# in the MangaBaka titles array. Used to pick the correct romaji /
|
||||||
|
# romaja / pinyin for LocalizedSeries.
|
||||||
|
_ROMANIZED_LANG_BY_TYPE = {
|
||||||
|
"manga": ("ja-latn", "ja-romaji"),
|
||||||
|
"manhwa": ("ko-latn", "ko-romaji"),
|
||||||
|
"manhua": ("zh-latn",),
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _pick_best_title(titles, language_codes: tuple,
|
||||||
|
prefer_trait: "str | None" = None) -> "str | None":
|
||||||
|
"""
|
||||||
|
Picks the highest-scoring entry from a MangaBaka `titles` list for
|
||||||
|
any of the given language codes.
|
||||||
|
|
||||||
|
Scoring: preferred trait (+4) > "official" trait (+2) > is_primary
|
||||||
|
(+1); first seen wins on ties. Returns None when no entry matches.
|
||||||
|
"""
|
||||||
|
if not isinstance(titles, list):
|
||||||
|
return None
|
||||||
|
best_score = -1
|
||||||
|
best_title: "str | None" = None
|
||||||
|
for entry in titles:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||||||
|
if lang not in language_codes:
|
||||||
|
continue
|
||||||
|
title = entry.get("title")
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
traits = entry.get("traits") or []
|
||||||
|
score = 0
|
||||||
|
if prefer_trait and prefer_trait in traits:
|
||||||
|
score += 4
|
||||||
|
if "official" in traits:
|
||||||
|
score += 2
|
||||||
|
if entry.get("is_primary"):
|
||||||
|
score += 1
|
||||||
|
if score > best_score:
|
||||||
|
best_score, best_title = score, title
|
||||||
|
return best_title
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _romanized_for_native(cls, md: dict) -> "str | None":
|
||||||
|
"""
|
||||||
|
Picks the romanized title in the manga's original language from the
|
||||||
|
``titles`` array.
|
||||||
|
|
||||||
|
The series' original language is inferred from ``type``::
|
||||||
|
|
||||||
|
manga -> ja-Latn (Japanese romaji)
|
||||||
|
manhwa -> ko-Latn (Korean romaja)
|
||||||
|
manhua -> zh-Latn (Chinese pinyin)
|
||||||
|
|
||||||
|
Among multiple entries for the matching language, the one with the
|
||||||
|
highest "quality score" wins (``official`` trait > ``is_primary`` >
|
||||||
|
first seen).
|
||||||
|
|
||||||
|
The root-level ``romanized_title`` field is **deliberately not used
|
||||||
|
as a fallback** — MangaBaka frequently stores a different language's
|
||||||
|
romanization there (e.g. Korean romaja on a Japanese manga), which
|
||||||
|
is exactly what this function is meant to avoid.
|
||||||
|
|
||||||
|
Returns ``None`` when no romanized title is available for the
|
||||||
|
inferred language.
|
||||||
|
"""
|
||||||
|
mtype = (md.get("type") or "").lower()
|
||||||
|
langs = cls._ROMANIZED_LANG_BY_TYPE.get(mtype)
|
||||||
|
if not langs:
|
||||||
|
return None
|
||||||
|
|
||||||
|
titles = md.get("titles") or md.get("alt_titles") or []
|
||||||
|
return cls._pick_best_title(titles, langs)
|
||||||
|
|
||||||
def _get_sort_title(self, md: dict) -> "str | None":
|
def _get_sort_title(self, md: dict) -> "str | None":
|
||||||
"""
|
"""
|
||||||
Returns the SeriesSort title in the configured language.
|
Returns the SeriesSort title in the configured language.
|
||||||
@@ -671,31 +795,7 @@ class ComicInfoBuilder:
|
|||||||
|
|
||||||
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
||||||
) -> "str | None":
|
) -> "str | None":
|
||||||
"""Picks the best title entry for any of the given language codes."""
|
return self._pick_best_title(titles, language_codes, prefer_trait)
|
||||||
if not isinstance(titles, list):
|
|
||||||
return None
|
|
||||||
best_score = -1
|
|
||||||
best_title: "str | None" = None
|
|
||||||
for entry in titles:
|
|
||||||
if not isinstance(entry, dict):
|
|
||||||
continue
|
|
||||||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
|
||||||
if lang not in language_codes:
|
|
||||||
continue
|
|
||||||
title = entry.get("title")
|
|
||||||
if not title:
|
|
||||||
continue
|
|
||||||
traits = entry.get("traits") or []
|
|
||||||
score = 0
|
|
||||||
if prefer_trait and prefer_trait in traits:
|
|
||||||
score += 4
|
|
||||||
if "official" in traits:
|
|
||||||
score += 2
|
|
||||||
if entry.get("is_primary"):
|
|
||||||
score += 1
|
|
||||||
if score > best_score:
|
|
||||||
best_score, best_title = score, title
|
|
||||||
return best_title
|
|
||||||
|
|
||||||
result: dict[str, str] = {}
|
result: dict[str, str] = {}
|
||||||
|
|
||||||
@@ -956,12 +1056,14 @@ class ComicInfoBuilder:
|
|||||||
return unique
|
return unique
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _read_existing_comicinfo(folder: Path) -> dict:
|
def read_comicinfo_fields(xml_source) -> dict:
|
||||||
xml_path = folder / "ComicInfo.xml"
|
"""
|
||||||
if not xml_path.is_file():
|
Parses ComicInfo.xml content (bytes or str) and returns the fields
|
||||||
return {}
|
relevant as supplementary Suwayomi data. Returns {} on parse errors.
|
||||||
|
Reusable for XML read directly from a CBZ archive (no extraction).
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
root = ET.parse(xml_path).getroot()
|
root = ET.fromstring(xml_source)
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -975,6 +1077,16 @@ class ComicInfoBuilder:
|
|||||||
data[tag] = child.text.strip()
|
data[tag] = child.text.strip()
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _read_existing_comicinfo(folder: Path) -> dict:
|
||||||
|
xml_path = folder / "ComicInfo.xml"
|
||||||
|
if not xml_path.is_file():
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return ComicInfoBuilder.read_comicinfo_fields(xml_path.read_bytes())
|
||||||
|
except OSError:
|
||||||
|
return {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _image_dimensions(path: Path):
|
def _image_dimensions(path: Path):
|
||||||
if not _HAS_PIL:
|
if not _HAS_PIL:
|
||||||
@@ -987,22 +1099,37 @@ class ComicInfoBuilder:
|
|||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Module-level helpers (shared with MangaBakaWorksResolver logic)
|
# Module-level helpers
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
def _pick_cover_url(cover) -> "str | None":
|
|
||||||
|
# Alias: _pick_image_url (from MangaBakaWorksResolver) is the canonical
|
||||||
|
# generic image-block picker; _pick_cover_url is kept for backward compat.
|
||||||
|
_pick_cover_url = _pick_image_url
|
||||||
|
|
||||||
|
# Shared fallback CoverCache for builders constructed without an explicit
|
||||||
|
# one (temporary directory, removed at process exit). Created lazily so
|
||||||
|
# importing this module never touches the filesystem.
|
||||||
|
_shared_cover_cache: "CoverCache | None" = None
|
||||||
|
|
||||||
|
|
||||||
|
def _default_cover_cache() -> CoverCache:
|
||||||
|
global _shared_cover_cache
|
||||||
|
if _shared_cover_cache is None:
|
||||||
|
_shared_cover_cache = CoverCache()
|
||||||
|
return _shared_cover_cache
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_thumbnail_url(cover) -> "str | None":
|
||||||
"""
|
"""
|
||||||
Selects the best cover URL from a MangaBaka cover object.
|
Picks a small cover variant suitable for a UI thumbnail.
|
||||||
|
|
||||||
Real API shape (from `GET /v1/series/{id}` and `/works`):
|
Order of preference: x150@x2 > x150@x1 > x150@x3 > x250@x2 > x250@x1 >
|
||||||
{
|
x250@x3 > x350@x2 > x350@x1 > x350@x3 > raw. x150@x2 is roughly 300px
|
||||||
"raw": {"url": "...", "size": ..., "height": ..., "width": ...},
|
wide — sharp on HiDPI displays at the ~90px thumbnail size used in
|
||||||
"x150": {"x1": "...", "x2": "...", "x3": "..."},
|
the matches table, while still being a fraction of the raw image
|
||||||
"x250": {"x1": "...", "x2": "...", "x3": "..."},
|
weight (often 50KB vs. several MB).
|
||||||
"x350": {"x1": "...", "x2": "...", "x3": "..."}
|
|
||||||
}
|
|
||||||
|
|
||||||
Order of preference: raw original > x350@x3 > x250@x3 > x150@x3
|
Falls back to `_pick_cover_url` if no thumbnail variant is available.
|
||||||
(falling through to lower densities and sizes as needed).
|
|
||||||
"""
|
"""
|
||||||
if not cover:
|
if not cover:
|
||||||
return None
|
return None
|
||||||
@@ -1011,46 +1138,17 @@ def _pick_cover_url(cover) -> "str | None":
|
|||||||
if not isinstance(cover, dict):
|
if not isinstance(cover, dict):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 1) Preferred: the unscaled "raw" image
|
for size_key in ("x150", "x250", "x350"):
|
||||||
raw = cover.get("raw")
|
|
||||||
if isinstance(raw, dict):
|
|
||||||
url = raw.get("url")
|
|
||||||
if isinstance(url, str) and url:
|
|
||||||
return url
|
|
||||||
elif isinstance(raw, str) and raw:
|
|
||||||
return raw
|
|
||||||
|
|
||||||
# 2) Fallback: size-keyed variants, largest first, highest density first
|
|
||||||
for size_key in ("x350", "x250", "x150"):
|
|
||||||
variant = cover.get(size_key)
|
variant = cover.get(size_key)
|
||||||
if isinstance(variant, dict):
|
if isinstance(variant, dict):
|
||||||
for density in ("x3", "x2", "x1"):
|
for density in ("x2", "x1", "x3"):
|
||||||
url = variant.get(density)
|
url = variant.get(density)
|
||||||
if isinstance(url, str) and url:
|
if isinstance(url, str) and url:
|
||||||
return url
|
return url
|
||||||
elif isinstance(variant, str) and variant:
|
elif isinstance(variant, str) and variant:
|
||||||
return variant
|
return variant
|
||||||
|
|
||||||
# 3) Last-ditch fallback: any http URL anywhere in the structure
|
return _pick_cover_url(cover)
|
||||||
for val in cover.values():
|
|
||||||
if isinstance(val, str) and val.startswith("http"):
|
|
||||||
return val
|
|
||||||
if isinstance(val, dict):
|
|
||||||
for sub in val.values():
|
|
||||||
if isinstance(sub, str) and sub.startswith("http"):
|
|
||||||
return sub
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _guess_extension(url: str, content_type: str) -> str:
|
|
||||||
url_ext = Path(url.split("?")[0]).suffix.lower()
|
|
||||||
if url_ext in _IMAGE_EXTS:
|
|
||||||
return url_ext
|
|
||||||
ct = (content_type or "").lower()
|
|
||||||
if "png" in ct: return ".png"
|
|
||||||
if "webp" in ct: return ".webp"
|
|
||||||
if "gif" in ct: return ".gif"
|
|
||||||
return ".jpg"
|
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
@@ -0,0 +1,554 @@
|
|||||||
|
"""
|
||||||
|
kavita_volume_cover_updater.py
|
||||||
|
==============================
|
||||||
|
|
||||||
|
Periodically re-checks chapters already moved to the Kavita library whose
|
||||||
|
volume could not be resolved at move time (``"volume": null`` in the
|
||||||
|
series' ``chapter_index.json``).
|
||||||
|
|
||||||
|
When MangaDex has since assigned the chapter to a volume, the updater:
|
||||||
|
|
||||||
|
1. writes the volume into ``chapter_index.json``,
|
||||||
|
2. updates ``<Volume>`` inside the chapter's ComicInfo.xml (in-archive),
|
||||||
|
3. downloads the MangaBaka volume cover and swaps it in for the
|
||||||
|
placeholder ``000.<ext>`` series cover, and
|
||||||
|
4. refreshes the *first* chapter's ComicInfo.xml with full metadata —
|
||||||
|
Kavita can be configured to take series metadata from the lowest
|
||||||
|
chapter, so it must reflect the latest state.
|
||||||
|
|
||||||
|
Host-IO policy
|
||||||
|
--------------
|
||||||
|
* Per series only ``chapter_index.json`` is read (no archive is opened to
|
||||||
|
discover its contents).
|
||||||
|
* Series without null-volume chapters are skipped before any API call.
|
||||||
|
* An archive is read+rewritten exactly once per update (single pass,
|
||||||
|
written to a ``.tmp`` file, then atomically replaced).
|
||||||
|
|
||||||
|
Every updated chapter is appended to a log file (one line per update).
|
||||||
|
|
||||||
|
Reused components
|
||||||
|
-----------------
|
||||||
|
* ``SuwayomiMover`` — chapter index helpers, dirname sanitizer
|
||||||
|
* ``ComicInfoBuilder`` — metadata fetch (matches-cache ID lookup),
|
||||||
|
chapter→volume resolution, XML build
|
||||||
|
* ``MangaBakaWorksResolver`` — volume covers (/images with /works fallback)
|
||||||
|
* ``MangaDexVolumeResolver`` — chapter→volume aggregate (shared cache)
|
||||||
|
* ``MangaBakaRateLimit`` — process-wide API throttle
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
requests -> pip install requests
|
||||||
|
Pillow -> pip install pillow (optional, page-0 dimensions)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import zipfile
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Shared modules live one level up (src/); needed when a module in this
|
||||||
|
# folder is run directly as a script (the entry points set the path).
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||||
|
|
||||||
|
from ComicInfoBuilder import ComicInfoBuilder
|
||||||
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
|
from MALResolver import MALResolver
|
||||||
|
from AniListResolver import AniListResolver
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from SuwayomiMover import (_load_chapter_index, _save_chapter_index,
|
||||||
|
_sanitize_dirname, _normalise_volume_value)
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CronSchedule import CronSchedule
|
||||||
|
from CoverCache import CoverCache, _IMAGE_EXTS
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
_HAS_PIL = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_PIL = False
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> str:
|
||||||
|
return datetime.now().isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
|
||||||
|
def _image_dims_from_bytes(data: bytes) -> tuple:
|
||||||
|
"""Returns (width, height) of an image byte blob, or (None, None)."""
|
||||||
|
if not _HAS_PIL:
|
||||||
|
return (None, None)
|
||||||
|
try:
|
||||||
|
with Image.open(io.BytesIO(data)) as im:
|
||||||
|
return im.size
|
||||||
|
except Exception:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _chapter_sort_value(num: str) -> float:
|
||||||
|
try:
|
||||||
|
return float(num)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return float("inf")
|
||||||
|
|
||||||
|
|
||||||
|
def _update_page0_attrs(pages_el: "ET.Element", cover_bytes: bytes) -> None:
|
||||||
|
"""Refreshes size/dimension attributes of the FrontCover page entry."""
|
||||||
|
for page in pages_el:
|
||||||
|
if page.get("Image") == "0":
|
||||||
|
page.set("ImageSize", str(len(cover_bytes)))
|
||||||
|
width, height = _image_dims_from_bytes(cover_bytes)
|
||||||
|
if width and height:
|
||||||
|
page.set("ImageWidth", str(width))
|
||||||
|
page.set("ImageHeight", str(height))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_tree(root: "ET.Element") -> str:
|
||||||
|
tree = ET.ElementTree(root)
|
||||||
|
try:
|
||||||
|
ET.indent(tree, space=" ")
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
return ('<?xml version="1.0" encoding="UTF-8"?>\n'
|
||||||
|
+ ET.tostring(root, encoding="unicode"))
|
||||||
|
|
||||||
|
|
||||||
|
class KavitaVolumeCoverUpdater:
|
||||||
|
"""
|
||||||
|
Scans the Kavita library for chapters whose volume was unknown at move
|
||||||
|
time and back-fills volume + volume cover once MangaDex / MangaBaka
|
||||||
|
provide the data. Runs periodically on a background thread.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
kavita_path : Root of the Kavita library (series folders inside).
|
||||||
|
matches_cache : MatchesCache — provides the MangaBaka series ID per
|
||||||
|
series (mandatory; folders without a match are skipped).
|
||||||
|
language : ComicInfo language (passed to ComicInfoBuilder).
|
||||||
|
request_timeout : HTTP timeout in seconds.
|
||||||
|
log_path : File that receives one line per updated chapter.
|
||||||
|
Default: <kavita_path>/volume_updater.log
|
||||||
|
schedule : Cron expression (5 fields) defining when scans run,
|
||||||
|
e.g. "0 19 * * 1,4" = 19:00 every Monday and
|
||||||
|
Thursday. Evaluated in local time — set the TZ env
|
||||||
|
var inside Docker. Default: "0 19 * * 1,4".
|
||||||
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
|
temporary cache, deleted at process exit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
kavita_path,
|
||||||
|
*,
|
||||||
|
matches_cache: MatchesCache,
|
||||||
|
language: str = "en",
|
||||||
|
request_timeout: int = 30,
|
||||||
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
|
log_path=None,
|
||||||
|
schedule: str = "0 19 * * 1,4",
|
||||||
|
cover_cache_dir=None):
|
||||||
|
self._dst = Path(kavita_path)
|
||||||
|
self._matches_cache = matches_cache
|
||||||
|
self._language = language
|
||||||
|
self._timeout = request_timeout
|
||||||
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
self._log_path = (Path(log_path) if log_path
|
||||||
|
else self._dst / "volume_updater.log")
|
||||||
|
self._cron = CronSchedule(schedule)
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.setdefault("User-Agent", "KavitaVolumeCoverUpdater/1.0")
|
||||||
|
_apply_mangabaka_rate_limit(session)
|
||||||
|
self._session = session
|
||||||
|
|
||||||
|
self._mal = MALResolver(request_timeout=request_timeout)
|
||||||
|
self._al = AniListResolver(request_timeout=request_timeout)
|
||||||
|
self._vol_resolver = MangaDexVolumeResolver(
|
||||||
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._works_resolver = MangaBakaWorksResolver(
|
||||||
|
api_base_url=api_base_url,
|
||||||
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._cover_cache = CoverCache(
|
||||||
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
|
self._stop = threading.Event()
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cron API (mirrors SuwayomiFolderWatcher)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Starts the periodic scan thread. Non-blocking."""
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return
|
||||||
|
self._stop.clear()
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._loop, name="KavitaVolumeCoverUpdater", daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
print(f"[{_now()}] [updater] scanning {self._dst} "
|
||||||
|
f"on cron '{self._cron.expression}'", flush=True)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stops the scan thread (current scan finishes its series first)."""
|
||||||
|
self._stop.set()
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join(timeout=10)
|
||||||
|
|
||||||
|
def wait(self) -> None:
|
||||||
|
"""Blocks the calling thread until stop() is invoked."""
|
||||||
|
self._stop.wait()
|
||||||
|
|
||||||
|
def _loop(self) -> None:
|
||||||
|
while not self._stop.is_set():
|
||||||
|
next_run = self._cron.next_after(datetime.now())
|
||||||
|
wait = max(0.0, (next_run - datetime.now()).total_seconds())
|
||||||
|
print(f"[{_now()}] [updater] next scheduled scan: "
|
||||||
|
f"{next_run.isoformat(timespec='minutes')}", flush=True)
|
||||||
|
if self._stop.wait(wait):
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
summary = self.update_all()
|
||||||
|
print(f"[{_now()}] [updater] scan done: "
|
||||||
|
f"{summary['series_updated']} series / "
|
||||||
|
f"{summary['chapters_updated']} chapters updated",
|
||||||
|
flush=True)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[{_now()}] [updater] scan ERROR: {exc}", flush=True)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public scan API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def update_all(self) -> dict:
|
||||||
|
"""
|
||||||
|
Scans every series folder under the Kavita root once.
|
||||||
|
Returns {"series_scanned": n, "series_updated": n, "chapters_updated": n}.
|
||||||
|
"""
|
||||||
|
summary = {"series_scanned": 0, "series_updated": 0,
|
||||||
|
"chapters_updated": 0}
|
||||||
|
if not self._dst.is_dir():
|
||||||
|
print(f"[updater] kavita path missing: {self._dst}", flush=True)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
# The whole point of a scan is detecting volume assignments added
|
||||||
|
# since the previous run — start from fresh API data, not the
|
||||||
|
# process-lifetime resolver caches.
|
||||||
|
self._vol_resolver.clear_cache()
|
||||||
|
self._works_resolver.clear_cache()
|
||||||
|
|
||||||
|
for series_dir in sorted(self._dst.iterdir()):
|
||||||
|
if self._stop.is_set():
|
||||||
|
break
|
||||||
|
if not series_dir.is_dir():
|
||||||
|
continue
|
||||||
|
summary["series_scanned"] += 1
|
||||||
|
try:
|
||||||
|
updated = self.update_series(series_dir)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[updater] {series_dir.name}: ERROR {exc}", flush=True)
|
||||||
|
continue
|
||||||
|
if updated:
|
||||||
|
summary["series_updated"] += 1
|
||||||
|
summary["chapters_updated"] += updated
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def update_series(self, series_dir: Path) -> int:
|
||||||
|
"""
|
||||||
|
Updates one series folder. Returns the number of updated chapters.
|
||||||
|
|
||||||
|
Only chapters listed in chapter_index.json with ``"volume": null``
|
||||||
|
are candidates; everything else costs no further host reads.
|
||||||
|
"""
|
||||||
|
index = _load_chapter_index(series_dir)
|
||||||
|
chapters: dict = index["chapter"]
|
||||||
|
if not chapters:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
missing = [num for num, e in chapters.items()
|
||||||
|
if isinstance(e, dict) and e.get("volume") is None]
|
||||||
|
if not missing:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
match_key, match = self._find_match_for_folder(series_dir.name)
|
||||||
|
if not match or not match.get("mangabakaId"):
|
||||||
|
print(f"[updater] {series_dir.name}: no matches.json entry — skip",
|
||||||
|
flush=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Builder resolves metadata via the cached MangaBaka ID and gives us
|
||||||
|
# the exact same chapter→volume logic the mover uses.
|
||||||
|
builder = ComicInfoBuilder(
|
||||||
|
match_key, chapter=missing[0],
|
||||||
|
api_base_url=self._api_base_url,
|
||||||
|
language=self._language,
|
||||||
|
request_timeout=self._timeout,
|
||||||
|
session=self._session,
|
||||||
|
volume_resolver=self._vol_resolver,
|
||||||
|
works_resolver=self._works_resolver,
|
||||||
|
mal_resolver=self._mal,
|
||||||
|
al_resolver=self._al,
|
||||||
|
matches_cache=self._matches_cache,
|
||||||
|
cover_cache=self._cover_cache,
|
||||||
|
)
|
||||||
|
md = builder.fetch_metadata()
|
||||||
|
series_id = str(md.get("id") or "")
|
||||||
|
|
||||||
|
# Resolve volumes for all null-volume chapters first (API only).
|
||||||
|
updates: dict[str, dict] = {} # num -> {"volume": str, "cover": tuple|None}
|
||||||
|
for num in sorted(missing, key=_chapter_sort_value):
|
||||||
|
builder.chapter = num
|
||||||
|
try:
|
||||||
|
volume = builder._determine_volume()
|
||||||
|
except Exception:
|
||||||
|
volume = None
|
||||||
|
if not volume:
|
||||||
|
continue
|
||||||
|
updates[num] = {"volume": volume,
|
||||||
|
"cover": self._fetch_cover(series_id, volume)}
|
||||||
|
|
||||||
|
if not updates:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
first = min(chapters, key=_chapter_sort_value)
|
||||||
|
updated = 0
|
||||||
|
|
||||||
|
for num, up in updates.items():
|
||||||
|
entry = chapters[num]
|
||||||
|
cbz = series_dir / (entry.get("archiveName") or "")
|
||||||
|
if not entry.get("archiveName") or not cbz.is_file():
|
||||||
|
print(f"[updater] {series_dir.name} ch.{num}: archive missing "
|
||||||
|
f"({entry.get('archiveName')!r}) — skip", flush=True)
|
||||||
|
continue
|
||||||
|
# The first chapter gets a full metadata rebuild (Kavita reads
|
||||||
|
# series metadata from it); other chapters only a volume edit.
|
||||||
|
ok, cover_swapped = self._apply_update(
|
||||||
|
cbz, builder, num,
|
||||||
|
volume=up["volume"], cover=up["cover"],
|
||||||
|
full_rebuild=(num == first))
|
||||||
|
if not ok:
|
||||||
|
continue
|
||||||
|
entry["volume"] = _normalise_volume_value(up["volume"])
|
||||||
|
updated += 1
|
||||||
|
self._log(f"{series_dir.name} | chapter {num} -> volume "
|
||||||
|
f"{up['volume']} | cover "
|
||||||
|
f"{'replaced' if cover_swapped else 'kept'} | {cbz.name}")
|
||||||
|
|
||||||
|
# Refresh the first chapter's metadata when any other chapter changed
|
||||||
|
# (skip when it was already fully rebuilt in the loop above).
|
||||||
|
if updated and first not in updates:
|
||||||
|
first_entry = chapters.get(first) or {}
|
||||||
|
cbz = series_dir / (first_entry.get("archiveName") or "")
|
||||||
|
if first_entry.get("archiveName") and cbz.is_file():
|
||||||
|
ok, _ = self._apply_update(
|
||||||
|
cbz, builder, first,
|
||||||
|
volume=None, cover=None, full_rebuild=True)
|
||||||
|
if ok:
|
||||||
|
self._log(f"{series_dir.name} | chapter {first} | "
|
||||||
|
f"first-chapter metadata refreshed | {cbz.name}")
|
||||||
|
|
||||||
|
if updated:
|
||||||
|
_save_chapter_index(series_dir, index)
|
||||||
|
return updated
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Matching Kavita folder -> matches.json entry
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _find_match_for_folder(self, folder_name: str) -> tuple:
|
||||||
|
"""
|
||||||
|
Maps a Kavita series folder back to its matches.json entry.
|
||||||
|
|
||||||
|
The folder was created as ``_sanitize_dirname(mangabaka_title)``, so
|
||||||
|
the comparison sanitizes each entry's mangabakaName the same way.
|
||||||
|
Falls back to the folderTitle (Suwayomi name) for robustness.
|
||||||
|
Returns (match_key, entry) or (None, None).
|
||||||
|
"""
|
||||||
|
target = folder_name.strip().casefold()
|
||||||
|
matches = self._matches_cache.all()["matches"]
|
||||||
|
for key, entry in matches.items():
|
||||||
|
name = entry.get("mangabakaName") or ""
|
||||||
|
if name and _sanitize_dirname(name).strip().casefold() == target:
|
||||||
|
return key, entry
|
||||||
|
for key, entry in matches.items():
|
||||||
|
folder = entry.get("folderTitle") or key
|
||||||
|
if _sanitize_dirname(folder).strip().casefold() == target:
|
||||||
|
return key, entry
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Cover download
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _fetch_cover(self, series_id: str, volume) -> "tuple[str, bytes] | None":
|
||||||
|
"""
|
||||||
|
Fetches the MangaBaka volume cover via the CoverCache (one download
|
||||||
|
per unique URL, even across chapters sharing a volume).
|
||||||
|
Returns ("000<ext>", bytes) or None when no cover is available.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
url = self._works_resolver.get_cover_for_volume(series_id, volume)
|
||||||
|
except Exception:
|
||||||
|
url = None
|
||||||
|
if not url:
|
||||||
|
return None
|
||||||
|
fetched = self._cover_cache.get(url)
|
||||||
|
if not fetched:
|
||||||
|
return None
|
||||||
|
data, ext = fetched
|
||||||
|
return (f"000{ext}", data)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Archive update (single read + single write per archive)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _apply_update(self, cbz_path: Path, builder: ComicInfoBuilder,
|
||||||
|
chapter_num: str, *,
|
||||||
|
volume, cover, full_rebuild: bool) -> tuple:
|
||||||
|
"""
|
||||||
|
Rewrites one CBZ archive with an updated ComicInfo.xml and (when
|
||||||
|
provided and a placeholder exists) a new cover image.
|
||||||
|
|
||||||
|
Returns (ok, cover_swapped).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(cbz_path, "r") as zin:
|
||||||
|
try:
|
||||||
|
old_xml = zin.read("ComicInfo.xml")
|
||||||
|
except KeyError:
|
||||||
|
old_xml = None
|
||||||
|
|
||||||
|
if full_rebuild or old_xml is None:
|
||||||
|
new_xml = self._build_full_xml(
|
||||||
|
builder, chapter_num, old_xml, cover)
|
||||||
|
else:
|
||||||
|
new_xml = self._edit_volume_xml(old_xml, volume, cover)
|
||||||
|
if new_xml is None: # parse error -> full rebuild
|
||||||
|
new_xml = self._build_full_xml(
|
||||||
|
builder, chapter_num, None, cover)
|
||||||
|
|
||||||
|
infos = zin.infolist()
|
||||||
|
# Cover is only ever *replaced*: inserting one would shift
|
||||||
|
# every <Pages> image index in the existing XML.
|
||||||
|
has_placeholder = any(
|
||||||
|
Path(i.filename).stem == "000"
|
||||||
|
and Path(i.filename).suffix.lower() in _IMAGE_EXTS
|
||||||
|
for i in infos)
|
||||||
|
swap_cover = cover is not None and has_placeholder
|
||||||
|
|
||||||
|
tmp = cbz_path.with_suffix(cbz_path.suffix + ".tmp")
|
||||||
|
wrote_xml = False
|
||||||
|
with zipfile.ZipFile(tmp, "w", zipfile.ZIP_STORED) as zout:
|
||||||
|
for info in infos:
|
||||||
|
p = Path(info.filename)
|
||||||
|
if (swap_cover and p.stem == "000"
|
||||||
|
and p.suffix.lower() in _IMAGE_EXTS):
|
||||||
|
zout.writestr(cover[0], cover[1])
|
||||||
|
elif info.filename == "ComicInfo.xml":
|
||||||
|
zout.writestr("ComicInfo.xml", new_xml)
|
||||||
|
wrote_xml = True
|
||||||
|
else:
|
||||||
|
zout.writestr(info, zin.read(info.filename))
|
||||||
|
if not wrote_xml:
|
||||||
|
zout.writestr("ComicInfo.xml", new_xml)
|
||||||
|
tmp.replace(cbz_path)
|
||||||
|
return True, swap_cover
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[updater] {cbz_path.name}: update failed: {exc}",
|
||||||
|
flush=True)
|
||||||
|
return False, False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# XML builders
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _edit_volume_xml(self, old_xml: bytes, volume,
|
||||||
|
cover) -> "str | None":
|
||||||
|
"""
|
||||||
|
Sets <Volume> in an existing ComicInfo.xml and refreshes the
|
||||||
|
FrontCover page attributes when the cover gets replaced.
|
||||||
|
Returns None when the XML is unparseable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(old_xml)
|
||||||
|
except ET.ParseError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
el = root.find("Volume")
|
||||||
|
if el is None:
|
||||||
|
el = ET.SubElement(root, "Volume")
|
||||||
|
el.text = str(volume)
|
||||||
|
|
||||||
|
if cover is not None:
|
||||||
|
pages = root.find("Pages")
|
||||||
|
if pages is not None:
|
||||||
|
_update_page0_attrs(pages, cover[1])
|
||||||
|
|
||||||
|
return _serialize_tree(root)
|
||||||
|
|
||||||
|
def _build_full_xml(self, builder: ComicInfoBuilder, chapter_num: str,
|
||||||
|
old_xml: "bytes | None", cover) -> str:
|
||||||
|
"""
|
||||||
|
Rebuilds the complete ComicInfo.xml via ComicInfoBuilder (fresh
|
||||||
|
MangaBaka/MAL metadata). Suwayomi-derived fields and the <Pages>
|
||||||
|
section are carried over from the previous XML.
|
||||||
|
"""
|
||||||
|
builder.chapter = chapter_num # also clears builder page state
|
||||||
|
builder._suwayomi_data = (
|
||||||
|
ComicInfoBuilder.read_comicinfo_fields(old_xml) if old_xml else {})
|
||||||
|
root = builder._build_tree().getroot()
|
||||||
|
|
||||||
|
if old_xml:
|
||||||
|
try:
|
||||||
|
old_root = ET.fromstring(old_xml)
|
||||||
|
except ET.ParseError:
|
||||||
|
old_root = None
|
||||||
|
if old_root is not None:
|
||||||
|
pages = old_root.find("Pages")
|
||||||
|
if pages is not None and cover is not None:
|
||||||
|
_update_page0_attrs(pages, cover[1])
|
||||||
|
page_count = old_root.find("PageCount")
|
||||||
|
if page_count is not None:
|
||||||
|
root.append(page_count)
|
||||||
|
if pages is not None:
|
||||||
|
root.append(pages)
|
||||||
|
|
||||||
|
return _serialize_tree(root)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _log(self, msg: str) -> None:
|
||||||
|
line = f"[{_now()}] {msg}"
|
||||||
|
print(f"[updater] {msg}", flush=True)
|
||||||
|
try:
|
||||||
|
self._log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with self._log_path.open("a", encoding="utf-8") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
except OSError as exc:
|
||||||
|
print(f"[updater] cannot write log file {self._log_path}: {exc}",
|
||||||
|
flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# Usage example
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
||||||
|
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||||
|
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
||||||
|
|
||||||
|
updater = KavitaVolumeCoverUpdater(
|
||||||
|
KAVITA_PATH,
|
||||||
|
matches_cache=MatchesCache(MATCHES_PATH),
|
||||||
|
)
|
||||||
|
|
||||||
|
# One-shot scan (no cron thread):
|
||||||
|
summary = updater.update_all()
|
||||||
|
print(f"\n[updater] {summary}")
|
||||||
|
|
||||||
|
# Or run on the cron schedule (default: 19:00 every Mon + Thu):
|
||||||
|
# updater.start()
|
||||||
|
# updater.wait()
|
||||||
@@ -43,7 +43,6 @@ Dependencies
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import difflib
|
import difflib
|
||||||
import re
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -0,0 +1,218 @@
|
|||||||
|
"""
|
||||||
|
matches_cache.py
|
||||||
|
================
|
||||||
|
|
||||||
|
Persistent JSON cache that maps a normalised (lowercase) search title to the
|
||||||
|
MangaBaka series it was matched against.
|
||||||
|
|
||||||
|
Structure on disk::
|
||||||
|
|
||||||
|
{
|
||||||
|
"matches": {
|
||||||
|
"<normalised lowercase key>": {
|
||||||
|
"folderTitle": "Original Folder Name",
|
||||||
|
"mangabakaId": "12345",
|
||||||
|
"mangabakaName": "One-Punch Man",
|
||||||
|
"imageUrl": "https://.../cover.jpg",
|
||||||
|
"firstMatchTime": 1700000000
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Keys are always stored lowercase so that folder names differing only in
|
||||||
|
capitalisation (e.g. "[Oshi No Ko]" vs "[oshi no ko]") are treated as
|
||||||
|
identical entries. The original casing is preserved in the ``folderTitle``
|
||||||
|
field and is used for display purposes (e.g. the web UI title link).
|
||||||
|
|
||||||
|
The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
|
||||||
|
search request, and is written back to disk on every mutation so a crash
|
||||||
|
does not lose matches that were resolved in the current run.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_key(title: str) -> str:
|
||||||
|
"""Normalises a cache key to lowercase for case-insensitive deduplication."""
|
||||||
|
return title.lower()
|
||||||
|
|
||||||
|
|
||||||
|
class MatchesCache:
|
||||||
|
def __init__(self, path):
|
||||||
|
self._path = Path(path)
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
self._data: dict = {"matches": {}}
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public lookup / mutation API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def get(self, title: str) -> "dict | None":
|
||||||
|
with self._lock:
|
||||||
|
entry = self._data["matches"].get(_norm_key(title))
|
||||||
|
return dict(entry) if entry else None
|
||||||
|
|
||||||
|
def add(self, title: str, *,
|
||||||
|
mangabaka_id,
|
||||||
|
mangabaka_name: str,
|
||||||
|
image_url: "str | None") -> dict:
|
||||||
|
entry = {
|
||||||
|
"folderTitle": title,
|
||||||
|
"mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "",
|
||||||
|
"mangabakaName": mangabaka_name or "",
|
||||||
|
"imageUrl": image_url or "",
|
||||||
|
"firstMatchTime": int(time.time()),
|
||||||
|
}
|
||||||
|
with self._lock:
|
||||||
|
self._data["matches"][_norm_key(title)] = entry
|
||||||
|
self._save_unlocked()
|
||||||
|
return dict(entry)
|
||||||
|
|
||||||
|
def upsert(self, title: str, *,
|
||||||
|
mangabaka_id=None,
|
||||||
|
mangabaka_name=None,
|
||||||
|
image_url=None,
|
||||||
|
first_match_time=None) -> dict:
|
||||||
|
norm = _norm_key(title)
|
||||||
|
with self._lock:
|
||||||
|
entry = self._data["matches"].get(norm)
|
||||||
|
if entry is None:
|
||||||
|
entry = {
|
||||||
|
"folderTitle": title,
|
||||||
|
"mangabakaId": "",
|
||||||
|
"mangabakaName": "",
|
||||||
|
"imageUrl": "",
|
||||||
|
"firstMatchTime": int(time.time()),
|
||||||
|
}
|
||||||
|
self._data["matches"][norm] = entry
|
||||||
|
# folderTitle is only set on creation; preserve original casing on updates.
|
||||||
|
if mangabaka_id is not None:
|
||||||
|
entry["mangabakaId"] = str(mangabaka_id)
|
||||||
|
if mangabaka_name is not None:
|
||||||
|
entry["mangabakaName"] = mangabaka_name
|
||||||
|
if image_url is not None:
|
||||||
|
entry["imageUrl"] = image_url
|
||||||
|
if first_match_time is not None:
|
||||||
|
try:
|
||||||
|
entry["firstMatchTime"] = int(first_match_time)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
pass
|
||||||
|
self._save_unlocked()
|
||||||
|
return dict(entry)
|
||||||
|
|
||||||
|
def rename(self, old_title: str, new_title: str) -> bool:
|
||||||
|
old_norm = _norm_key(old_title)
|
||||||
|
new_norm = _norm_key(new_title)
|
||||||
|
if not new_title or old_norm == new_norm:
|
||||||
|
return False
|
||||||
|
with self._lock:
|
||||||
|
entry = self._data["matches"].pop(old_norm, None)
|
||||||
|
if entry is None:
|
||||||
|
return False
|
||||||
|
entry["folderTitle"] = new_title
|
||||||
|
self._data["matches"][new_norm] = entry
|
||||||
|
self._save_unlocked()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def remove(self, title: str) -> bool:
|
||||||
|
norm = _norm_key(title)
|
||||||
|
with self._lock:
|
||||||
|
existed = norm in self._data["matches"]
|
||||||
|
if existed:
|
||||||
|
del self._data["matches"][norm]
|
||||||
|
self._save_unlocked()
|
||||||
|
return existed
|
||||||
|
|
||||||
|
def all(self) -> dict:
|
||||||
|
with self._lock:
|
||||||
|
return {"matches": {k: dict(v)
|
||||||
|
for k, v in self._data["matches"].items()}}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal IO
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _load(self) -> None:
|
||||||
|
if not self._path.is_file():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with self._path.open("r", encoding="utf-8") as f:
|
||||||
|
loaded = json.load(f)
|
||||||
|
except (OSError, json.JSONDecodeError) as exc:
|
||||||
|
print(f"[MatchesCache] failed to load {self._path}: {exc}",
|
||||||
|
flush=True)
|
||||||
|
return
|
||||||
|
if not isinstance(loaded, dict) or not isinstance(loaded.get("matches"), dict):
|
||||||
|
return
|
||||||
|
|
||||||
|
normalized, changed = self._normalize_on_load(loaded["matches"])
|
||||||
|
loaded["matches"] = normalized
|
||||||
|
self._data = loaded
|
||||||
|
if changed:
|
||||||
|
print(f"[MatchesCache] migrated {changed} entr{'y' if changed == 1 else 'ies'} "
|
||||||
|
f"(lowercase keys / folderTitle), saving", flush=True)
|
||||||
|
self._save_unlocked()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_on_load(raw: dict) -> "tuple[dict, int]":
|
||||||
|
"""
|
||||||
|
Normalises the raw matches dict loaded from disk.
|
||||||
|
|
||||||
|
- Keys are lowercased.
|
||||||
|
- ``folderTitle`` is added from the original key when missing.
|
||||||
|
- Duplicate keys (same normalised form) are merged by keeping the
|
||||||
|
entry with the higher ``firstMatchTime``.
|
||||||
|
|
||||||
|
Returns (normalised_dict, number_of_changed_entries).
|
||||||
|
"""
|
||||||
|
result: dict = {}
|
||||||
|
changed = 0
|
||||||
|
|
||||||
|
for orig_key, entry in raw.items():
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
norm = _norm_key(orig_key)
|
||||||
|
entry = dict(entry)
|
||||||
|
|
||||||
|
# Add folderTitle if absent
|
||||||
|
if "folderTitle" not in entry:
|
||||||
|
entry["folderTitle"] = orig_key
|
||||||
|
changed += 1
|
||||||
|
|
||||||
|
if norm != orig_key:
|
||||||
|
changed += 1
|
||||||
|
|
||||||
|
# Merge duplicates: keep data from the more recent entry, but
|
||||||
|
# prefer the folderTitle that contains uppercase letters (= the
|
||||||
|
# original folder name) regardless of which entry is newer.
|
||||||
|
if norm in result:
|
||||||
|
existing = result[norm]
|
||||||
|
if entry.get("firstMatchTime", 0) > existing.get("firstMatchTime", 0):
|
||||||
|
# Newer entry wins for data; preserve better-cased folderTitle
|
||||||
|
existing_ft = existing.get("folderTitle", norm)
|
||||||
|
new_ft = entry.get("folderTitle", norm)
|
||||||
|
if existing_ft != existing_ft.lower() and new_ft == new_ft.lower():
|
||||||
|
entry["folderTitle"] = existing_ft
|
||||||
|
result[norm] = entry
|
||||||
|
else:
|
||||||
|
# Existing entry stays; but adopt new folderTitle if it has casing
|
||||||
|
existing_ft = existing.get("folderTitle", norm)
|
||||||
|
new_ft = entry.get("folderTitle", norm)
|
||||||
|
if new_ft != new_ft.lower() and existing_ft == existing_ft.lower():
|
||||||
|
existing["folderTitle"] = new_ft
|
||||||
|
else:
|
||||||
|
result[norm] = entry
|
||||||
|
|
||||||
|
return result, changed
|
||||||
|
|
||||||
|
def _save_unlocked(self) -> None:
|
||||||
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||||
|
with tmp.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(self._data, f, ensure_ascii=False, indent=2)
|
||||||
|
tmp.replace(self._path)
|
||||||
@@ -0,0 +1,642 @@
|
|||||||
|
"""
|
||||||
|
matches_web_app.py
|
||||||
|
==================
|
||||||
|
|
||||||
|
Flask web UI for inspecting and editing the matches.json file produced by
|
||||||
|
MatchesCache.
|
||||||
|
|
||||||
|
Routes
|
||||||
|
------
|
||||||
|
GET / HTML table view (one row per cached match)
|
||||||
|
GET /api/matches JSON dump of the full cache
|
||||||
|
POST /api/matches Update an entry's mangabakaId
|
||||||
|
body: {title, mangabakaId}
|
||||||
|
Server resolves the id against MangaBaka and
|
||||||
|
refreshes the mangabakaName + imageUrl fields.
|
||||||
|
POST /api/matches/delete Remove an entry body: {title}
|
||||||
|
POST /api/build Trigger a full re-scan via
|
||||||
|
SuwayomiMover.build_matches_only
|
||||||
|
|
||||||
|
The Title cell is rendered as a link to MangaBaka's search page restricted
|
||||||
|
to the manga / manhwa / manhua types. Only mangabakaId is editable; title
|
||||||
|
(folder name) and mangabakaName (info only) are read-only.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from flask import Flask, jsonify, request, Response
|
||||||
|
|
||||||
|
from MatchesCache import MatchesCache
|
||||||
|
from ComicInfoBuilder import _pick_thumbnail_url
|
||||||
|
|
||||||
|
|
||||||
|
_INDEX_HTML = """<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>MangaBaka matches</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
||||||
|
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
||||||
|
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
|
||||||
|
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
|
||||||
|
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
|
||||||
|
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
|
||||||
|
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
|
||||||
|
button:disabled { opacity:.5; cursor:default; }
|
||||||
|
table { border-collapse: collapse; width: 100%; }
|
||||||
|
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
|
||||||
|
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
|
||||||
|
th.sortable { cursor: pointer; user-select: none; }
|
||||||
|
th.sortable:hover { background:#252525; }
|
||||||
|
th .arrow { display:inline-block; width:.8em; color:#9ca3af; }
|
||||||
|
tr:nth-child(even) td { background: #161616; }
|
||||||
|
td.image img { max-width: 90px; max-height: 130px; display:block; }
|
||||||
|
td.id input { width: 14rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; }
|
||||||
|
td.title a { color: #60a5fa; text-decoration: none; }
|
||||||
|
td.title a:hover { text-decoration: underline; }
|
||||||
|
td.actions { white-space: nowrap; }
|
||||||
|
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
|
||||||
|
.dirty td { background: #1f2937 !important; }
|
||||||
|
.count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>MangaBaka matches <span id="count" class="count"></span></h1>
|
||||||
|
<div class="bar">
|
||||||
|
<input id="filter" type="search" placeholder="Filter by title…">
|
||||||
|
<button id="reload">Reload</button>
|
||||||
|
<button id="batchSave" class="primary">Save dirty (0)</button>
|
||||||
|
<button id="build">Build all (rescan)</button>
|
||||||
|
<button id="move">Start move</button>
|
||||||
|
<a href="/perf" style="margin-left:.5rem;color:#60a5fa;">Performance ▸</a>
|
||||||
|
<span class="status" id="status"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="sortable" data-col="title">Title <span class="arrow" id="arrow-title"></span></th>
|
||||||
|
<th>mangabakaId</th>
|
||||||
|
<th>mangabakaName</th>
|
||||||
|
<th class="sortable" data-col="firstMatchTime">firstMatchTime <span class="arrow" id="arrow-firstMatchTime"></span></th>
|
||||||
|
<th>Image</th>
|
||||||
|
<th></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="rows"></tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const TYPES = "&type=manhwa&type=manhua&type=manga";
|
||||||
|
let matchesData = {};
|
||||||
|
let currentSort = { col: "title", asc: true };
|
||||||
|
|
||||||
|
function fmtTime(unix) {
|
||||||
|
if (!unix) return "";
|
||||||
|
const d = new Date(unix * 1000);
|
||||||
|
return d.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
|
function searchUrl(title) {
|
||||||
|
return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
|
||||||
|
}
|
||||||
|
|
||||||
|
function setStatus(msg) { document.getElementById("status").textContent = msg; }
|
||||||
|
|
||||||
|
function updateDirtyCount() {
|
||||||
|
const n = document.querySelectorAll("#rows tr.dirty").length;
|
||||||
|
const btn = document.getElementById("batchSave");
|
||||||
|
btn.textContent = "Save dirty (" + n + ")";
|
||||||
|
btn.disabled = n === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeRow(title, e) {
|
||||||
|
const tr = document.createElement("tr");
|
||||||
|
tr.dataset.title = title;
|
||||||
|
const displayTitle = e.folderTitle || title;
|
||||||
|
tr.dataset.folderTitle = displayTitle;
|
||||||
|
|
||||||
|
// Title — link only, not editable; shows folderTitle (original casing)
|
||||||
|
const titleTd = document.createElement("td");
|
||||||
|
titleTd.className = "title";
|
||||||
|
const titleLink = document.createElement("a");
|
||||||
|
titleLink.href = searchUrl(displayTitle);
|
||||||
|
titleLink.target = "_blank";
|
||||||
|
titleLink.rel = "noopener";
|
||||||
|
titleLink.textContent = displayTitle;
|
||||||
|
titleTd.appendChild(titleLink);
|
||||||
|
tr.appendChild(titleTd);
|
||||||
|
|
||||||
|
// mangabakaId — editable
|
||||||
|
const idTd = document.createElement("td");
|
||||||
|
idTd.className = "id";
|
||||||
|
const idInp = document.createElement("input");
|
||||||
|
idInp.value = e.mangabakaId || "";
|
||||||
|
idInp.dataset.original = e.mangabakaId || "";
|
||||||
|
idInp.addEventListener("input", () => {
|
||||||
|
if (idInp.value !== idInp.dataset.original) tr.classList.add("dirty");
|
||||||
|
else tr.classList.remove("dirty");
|
||||||
|
updateDirtyCount();
|
||||||
|
});
|
||||||
|
idTd.appendChild(idInp);
|
||||||
|
tr.appendChild(idTd);
|
||||||
|
|
||||||
|
// mangabakaName — plain text (info only)
|
||||||
|
const nameTd = document.createElement("td");
|
||||||
|
nameTd.className = "name";
|
||||||
|
nameTd.textContent = e.mangabakaName || "";
|
||||||
|
tr.appendChild(nameTd);
|
||||||
|
|
||||||
|
// firstMatchTime — plain text
|
||||||
|
const timeTd = document.createElement("td");
|
||||||
|
timeTd.textContent = fmtTime(e.firstMatchTime);
|
||||||
|
tr.appendChild(timeTd);
|
||||||
|
|
||||||
|
// Image
|
||||||
|
const imgTd = document.createElement("td");
|
||||||
|
imgTd.className = "image";
|
||||||
|
const img = document.createElement("img");
|
||||||
|
img.src = e.imageUrl || "";
|
||||||
|
img.alt = "";
|
||||||
|
img.loading = "lazy";
|
||||||
|
imgTd.appendChild(img);
|
||||||
|
tr.appendChild(imgTd);
|
||||||
|
|
||||||
|
// Actions
|
||||||
|
const actTd = document.createElement("td");
|
||||||
|
actTd.className = "actions";
|
||||||
|
const save = document.createElement("button");
|
||||||
|
save.textContent = "Save";
|
||||||
|
save.className = "primary";
|
||||||
|
save.addEventListener("click", () => saveRow(tr));
|
||||||
|
const del = document.createElement("button");
|
||||||
|
del.textContent = "Delete";
|
||||||
|
del.className = "danger";
|
||||||
|
del.style.marginLeft = ".25rem";
|
||||||
|
del.addEventListener("click", () => deleteRow(tr));
|
||||||
|
actTd.append(save, del);
|
||||||
|
tr.appendChild(actTd);
|
||||||
|
|
||||||
|
tr._idInp = idInp;
|
||||||
|
tr._nameTd = nameTd;
|
||||||
|
tr._img = img;
|
||||||
|
return tr;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveRow(tr) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
const newId = tr._idInp.value.trim();
|
||||||
|
setStatus("Saving " + (tr.dataset.folderTitle || title) + "…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ title: title, mangabakaId: newId }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
const data = await r.json();
|
||||||
|
const entry = data.entry || {};
|
||||||
|
matchesData[title] = entry;
|
||||||
|
tr._idInp.value = entry.mangabakaId || "";
|
||||||
|
tr._idInp.dataset.original = entry.mangabakaId || "";
|
||||||
|
tr._nameTd.textContent = entry.mangabakaName || "";
|
||||||
|
tr._img.src = entry.imageUrl || "";
|
||||||
|
tr.classList.remove("dirty");
|
||||||
|
updateDirtyCount();
|
||||||
|
setStatus("Saved " + (tr.dataset.folderTitle || title));
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Save failed (" + title + "): " + err.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteRow(tr) {
|
||||||
|
const title = tr.dataset.title;
|
||||||
|
if (!confirm("Delete " + title + "?")) return;
|
||||||
|
setStatus("Deleting " + title + "…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches/delete", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ title: title }),
|
||||||
|
});
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
delete matchesData[title];
|
||||||
|
tr.remove();
|
||||||
|
updateDirtyCount();
|
||||||
|
document.getElementById("count").textContent =
|
||||||
|
"(" + Object.keys(matchesData).length + " entries)";
|
||||||
|
setStatus("Deleted");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Delete failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function batchSave() {
|
||||||
|
const dirty = Array.from(document.querySelectorAll("#rows tr.dirty"));
|
||||||
|
if (dirty.length === 0) return;
|
||||||
|
if (!confirm("Save " + dirty.length + " changed row(s)?")) return;
|
||||||
|
setStatus("Batch saving " + dirty.length + " rows…");
|
||||||
|
let ok = 0, fail = 0;
|
||||||
|
for (const tr of dirty) {
|
||||||
|
const success = await saveRow(tr);
|
||||||
|
if (success) ok++; else fail++;
|
||||||
|
}
|
||||||
|
setStatus("Batch: " + ok + " ok, " + fail + " failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
function sortedTitles() {
|
||||||
|
const titles = Object.keys(matchesData);
|
||||||
|
const dir = currentSort.asc ? 1 : -1;
|
||||||
|
if (currentSort.col === "title") {
|
||||||
|
return titles.sort((a, b) => {
|
||||||
|
const fa = (matchesData[a].folderTitle || a).toLowerCase();
|
||||||
|
const fb = (matchesData[b].folderTitle || b).toLowerCase();
|
||||||
|
return fa.localeCompare(fb) * dir;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (currentSort.col === "firstMatchTime") {
|
||||||
|
return titles.sort((a, b) => {
|
||||||
|
const av = matchesData[a].firstMatchTime || 0;
|
||||||
|
const bv = matchesData[b].firstMatchTime || 0;
|
||||||
|
return (av - bv) * dir;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return titles;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateSortArrows() {
|
||||||
|
for (const a of document.querySelectorAll("th .arrow")) a.textContent = "";
|
||||||
|
const id = "arrow-" + currentSort.col;
|
||||||
|
const el = document.getElementById(id);
|
||||||
|
if (el) el.textContent = currentSort.asc ? "▲" : "▼";
|
||||||
|
}
|
||||||
|
|
||||||
|
function render() {
|
||||||
|
const tbody = document.getElementById("rows");
|
||||||
|
tbody.innerHTML = "";
|
||||||
|
for (const t of sortedTitles()) {
|
||||||
|
tbody.appendChild(makeRow(t, matchesData[t]));
|
||||||
|
}
|
||||||
|
updateSortArrows();
|
||||||
|
applyFilter();
|
||||||
|
updateDirtyCount();
|
||||||
|
document.getElementById("count").textContent =
|
||||||
|
"(" + Object.keys(matchesData).length + " entries)";
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
setStatus("Loading…");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/matches");
|
||||||
|
const data = await r.json();
|
||||||
|
matchesData = data.matches || {};
|
||||||
|
render();
|
||||||
|
setStatus(Object.keys(matchesData).length + " entries");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Load failed: " + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyFilter() {
|
||||||
|
const q = document.getElementById("filter").value.toLowerCase();
|
||||||
|
for (const tr of document.querySelectorAll("#rows tr")) {
|
||||||
|
const t = (tr.dataset.folderTitle || tr.dataset.title).toLowerCase();
|
||||||
|
tr.style.display = t.includes(q) ? "" : "none";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("filter").addEventListener("input", applyFilter);
|
||||||
|
document.getElementById("reload").addEventListener("click", load);
|
||||||
|
document.getElementById("batchSave").addEventListener("click", batchSave);
|
||||||
|
document.getElementById("build").addEventListener("click", async () => {
|
||||||
|
if (!confirm("Run full scan? This may take several minutes.")) return;
|
||||||
|
setStatus("Building… (running on the server)");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/build", { method: "POST" });
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
setStatus("Build finished");
|
||||||
|
load();
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Build failed: " + err.message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
document.getElementById("move").addEventListener("click", async () => {
|
||||||
|
if (!confirm("Start move operation? This will process all series and may take a long time.")) return;
|
||||||
|
const btn = document.getElementById("move");
|
||||||
|
btn.disabled = true;
|
||||||
|
setStatus("Moving… (running on the server)");
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/move", { method: "POST" });
|
||||||
|
if (!r.ok) throw new Error(await r.text());
|
||||||
|
const data = await r.json();
|
||||||
|
const total = Object.keys(data.results || {}).length;
|
||||||
|
setStatus("Move finished — " + total + " series processed");
|
||||||
|
} catch (err) {
|
||||||
|
setStatus("Move failed: " + err.message);
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for (const th of document.querySelectorAll("th.sortable")) {
|
||||||
|
th.addEventListener("click", () => {
|
||||||
|
const col = th.dataset.col;
|
||||||
|
if (currentSort.col === col) currentSort.asc = !currentSort.asc;
|
||||||
|
else { currentSort.col = col; currentSort.asc = true; }
|
||||||
|
render();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
load();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
_PERF_HTML = """<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Move performance</title>
|
||||||
|
<style>
|
||||||
|
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
|
||||||
|
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
|
||||||
|
h2 { font-size: 1.05rem; margin: 1.4rem 0 .5rem; color:#cbd5e1; }
|
||||||
|
a { color:#60a5fa; text-decoration:none; }
|
||||||
|
a:hover { text-decoration:underline; }
|
||||||
|
.bar { display:flex; gap:.6rem; align-items:center; margin-bottom:1rem; flex-wrap:wrap; }
|
||||||
|
select, button { padding:.35rem .6rem; background:#222; color:#eee; border:1px solid #555; }
|
||||||
|
.summary { color:#9ca3af; margin:.3rem 0 1rem; }
|
||||||
|
table { border-collapse: collapse; width: 100%; margin-bottom:.5rem; }
|
||||||
|
th, td { border: 1px solid #333; padding: .35rem .6rem; text-align: left; }
|
||||||
|
th { background:#1d1d1d; }
|
||||||
|
td.num { text-align:right; font-variant-numeric: tabular-nums; white-space:nowrap; }
|
||||||
|
.barcell { position:relative; }
|
||||||
|
.barfill { position:absolute; left:0; top:0; bottom:0; background:#2563eb33; z-index:0; }
|
||||||
|
.barcell span { position:relative; z-index:1; }
|
||||||
|
details { margin:.3rem 0; }
|
||||||
|
summary { cursor:pointer; padding:.25rem 0; }
|
||||||
|
.chip { color:#9ca3af; font-size:.85rem; }
|
||||||
|
.err { color:#f87171; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Move performance <a href="/" style="font-size:.9rem;">◂ back to matches</a></h1>
|
||||||
|
<div class="bar">
|
||||||
|
<label>Run: <select id="runSelect"></select></label>
|
||||||
|
<button id="reload">Reload</button>
|
||||||
|
<span class="summary" id="summary"></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="content"></div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
let runs = [];
|
||||||
|
|
||||||
|
function fmtSecs(s) { return (s || 0).toFixed(2) + "s"; }
|
||||||
|
function fmtTime(unix) { return unix ? new Date(unix * 1000).toLocaleString() : ""; }
|
||||||
|
|
||||||
|
function stepTable(totals, grandTotal) {
|
||||||
|
const entries = Object.entries(totals || {}).sort((a, b) => b[1] - a[1]);
|
||||||
|
if (!entries.length) return "<p class=chip>(no steps recorded)</p>";
|
||||||
|
const max = entries[0][1] || 1;
|
||||||
|
let rows = "";
|
||||||
|
for (const [name, secs] of entries) {
|
||||||
|
const pct = grandTotal ? (secs / grandTotal * 100) : 0;
|
||||||
|
const w = (secs / max * 100);
|
||||||
|
rows += "<tr><td>" + name + "</td>"
|
||||||
|
+ "<td class='num'>" + fmtSecs(secs) + "</td>"
|
||||||
|
+ "<td class='num'>" + pct.toFixed(1) + "%</td>"
|
||||||
|
+ "<td class='barcell'><div class='barfill' style='width:" + w + "%'></div>"
|
||||||
|
+ "<span> </span></td></tr>";
|
||||||
|
}
|
||||||
|
return "<table><thead><tr><th>Step</th><th class=num>Total</th>"
|
||||||
|
+ "<th class=num>% of run</th><th> </th></tr></thead><tbody>"
|
||||||
|
+ rows + "</tbody></table>";
|
||||||
|
}
|
||||||
|
|
||||||
|
function seriesBlock(s) {
|
||||||
|
let chapters = "";
|
||||||
|
// Chapters sorted slowest first to surface outliers.
|
||||||
|
const chs = (s.chapters || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
|
||||||
|
for (const c of chs) {
|
||||||
|
const steps = Object.entries(c.steps || {}).sort((a, b) => b[1] - a[1])
|
||||||
|
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ");
|
||||||
|
chapters += "<tr><td>" + c.chapter + (c.ok ? "" : " <span class=err>(failed)</span>") + "</td>"
|
||||||
|
+ "<td class='num'>" + fmtSecs(c.totalSeconds) + "</td>"
|
||||||
|
+ "<td>" + steps + "</td></tr>";
|
||||||
|
}
|
||||||
|
const seriesSteps = Object.entries(s.steps || {})
|
||||||
|
.map(([n, v]) => n + " " + fmtSecs(v)).join(", ") || "—";
|
||||||
|
return "<details><summary><b>" + s.title + "</b> "
|
||||||
|
+ "<span class=chip>" + fmtSecs(s.totalSeconds) + " · "
|
||||||
|
+ (s.chapterCount || 0) + " chapters · " + seriesSteps + "</span></summary>"
|
||||||
|
+ "<table><thead><tr><th>Chapter</th><th class=num>Total</th>"
|
||||||
|
+ "<th>Steps</th></tr></thead><tbody>" + chapters + "</tbody></table></details>";
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderRun(run) {
|
||||||
|
const c = document.getElementById("content");
|
||||||
|
if (!run) { c.innerHTML = "<p class=chip>No runs recorded yet.</p>"; return; }
|
||||||
|
document.getElementById("summary").textContent =
|
||||||
|
fmtTime(run.startedAt) + " · " + fmtSecs(run.totalSeconds) + " · "
|
||||||
|
+ run.seriesCount + " series · " + run.chapterCount + " chapters";
|
||||||
|
|
||||||
|
let html = "<h2>Chapter steps (summed over all chapters)</h2>"
|
||||||
|
+ stepTable(run.stepTotals, run.totalSeconds)
|
||||||
|
+ "<h2>Series steps (metadata / person sync)</h2>"
|
||||||
|
+ stepTable(run.seriesStepTotals, run.totalSeconds)
|
||||||
|
+ "<h2>Series detail</h2>";
|
||||||
|
const series = (run.series || []).slice().sort((a, b) => b.totalSeconds - a.totalSeconds);
|
||||||
|
html += series.map(seriesBlock).join("");
|
||||||
|
c.innerHTML = html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderSelect() {
|
||||||
|
const sel = document.getElementById("runSelect");
|
||||||
|
sel.innerHTML = "";
|
||||||
|
runs.forEach((r, i) => {
|
||||||
|
const o = document.createElement("option");
|
||||||
|
o.value = i;
|
||||||
|
o.textContent = fmtTime(r.startedAt) + " (" + fmtSecs(r.totalSeconds) + ")";
|
||||||
|
sel.appendChild(o);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
const r = await fetch("/api/perf");
|
||||||
|
const data = await r.json();
|
||||||
|
runs = data.runs || [];
|
||||||
|
renderSelect();
|
||||||
|
renderRun(runs[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById("runSelect").addEventListener("change", e => {
|
||||||
|
renderRun(runs[e.target.value]);
|
||||||
|
});
|
||||||
|
document.getElementById("reload").addEventListener("click", load);
|
||||||
|
load();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MatchesWebApp:
|
||||||
|
"""
|
||||||
|
Flask app exposing the MatchesCache. `mover` is required when you want
|
||||||
|
POST /api/matches to resolve a new mangabakaId against MangaBaka (it
|
||||||
|
uses the mover's rate-limited session) and when POST /api/build should
|
||||||
|
work.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache: MatchesCache, *,
|
||||||
|
mover=None,
|
||||||
|
perf_stats=None,
|
||||||
|
host: str = "0.0.0.0",
|
||||||
|
port: int = 8080):
|
||||||
|
self._cache = cache
|
||||||
|
self._mover = mover
|
||||||
|
self._perf = perf_stats
|
||||||
|
self._host = host
|
||||||
|
self._port = port
|
||||||
|
self._build_lock = threading.Lock()
|
||||||
|
self._move_lock = threading.Lock()
|
||||||
|
self._app = Flask(__name__)
|
||||||
|
self._thread: "threading.Thread | None" = None
|
||||||
|
self._register_routes()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def app(self) -> Flask:
|
||||||
|
return self._app
|
||||||
|
|
||||||
|
def start(self) -> threading.Thread:
|
||||||
|
"""
|
||||||
|
Starts the Flask server on a background thread and returns it.
|
||||||
|
|
||||||
|
The thread is non-daemon so the process stays alive even when the
|
||||||
|
caller does not explicitly join() — important when this is the
|
||||||
|
only foreground task (e.g. watcher disabled for testing).
|
||||||
|
"""
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return self._thread
|
||||||
|
self._thread = threading.Thread(
|
||||||
|
target=self._app.run,
|
||||||
|
kwargs={"host": self._host, "port": self._port,
|
||||||
|
"debug": False, "use_reloader": False,
|
||||||
|
"threaded": True},
|
||||||
|
name="MatchesWebApp",
|
||||||
|
daemon=False,
|
||||||
|
)
|
||||||
|
self._thread.start()
|
||||||
|
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
|
||||||
|
flush=True)
|
||||||
|
return self._thread
|
||||||
|
|
||||||
|
def wait(self) -> None:
|
||||||
|
"""Blocks until the Flask thread exits."""
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Routes
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def _register_routes(self) -> None:
|
||||||
|
app = self._app
|
||||||
|
cache = self._cache
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def index() -> Response:
|
||||||
|
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
|
||||||
|
|
||||||
|
@app.get("/api/matches")
|
||||||
|
def api_list():
|
||||||
|
return jsonify(cache.all())
|
||||||
|
|
||||||
|
@app.post("/api/matches")
|
||||||
|
def api_upsert():
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
title = (body.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return Response("title is required", status=400)
|
||||||
|
|
||||||
|
new_id_raw = body.get("mangabakaId")
|
||||||
|
new_id = str(new_id_raw).strip() if new_id_raw is not None else ""
|
||||||
|
if not new_id:
|
||||||
|
return Response("mangabakaId is required", status=400)
|
||||||
|
|
||||||
|
# Resolve the id against MangaBaka so mangabakaName + imageUrl
|
||||||
|
# always reflect what the id actually points to.
|
||||||
|
new_name: "str | None" = None
|
||||||
|
new_image: "str | None" = None
|
||||||
|
if self._mover is not None:
|
||||||
|
try:
|
||||||
|
series = self._mover.fetch_series(new_id)
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"resolve failed: {exc}", status=502)
|
||||||
|
if not series:
|
||||||
|
return Response(
|
||||||
|
f"MangaBaka has no series with id {new_id}",
|
||||||
|
status=404)
|
||||||
|
new_name = series.get("title") or ""
|
||||||
|
new_image = _pick_thumbnail_url(series.get("cover")) or ""
|
||||||
|
|
||||||
|
entry = cache.upsert(
|
||||||
|
title,
|
||||||
|
mangabaka_id=new_id,
|
||||||
|
mangabaka_name=new_name,
|
||||||
|
image_url=new_image,
|
||||||
|
)
|
||||||
|
return jsonify({"title": title, "entry": entry})
|
||||||
|
|
||||||
|
@app.post("/api/matches/delete")
|
||||||
|
def api_delete():
|
||||||
|
body = request.get_json(silent=True) or {}
|
||||||
|
title = (body.get("title") or "").strip()
|
||||||
|
if not title:
|
||||||
|
return Response("title is required", status=400)
|
||||||
|
removed = cache.remove(title)
|
||||||
|
return jsonify({"removed": removed, "title": title})
|
||||||
|
|
||||||
|
@app.post("/api/build")
|
||||||
|
def api_build():
|
||||||
|
if self._mover is None:
|
||||||
|
return Response("no mover configured", status=503)
|
||||||
|
if not self._build_lock.acquire(blocking=False):
|
||||||
|
return Response("build already running", status=409)
|
||||||
|
try:
|
||||||
|
result = self._mover.build_matches_only()
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"build failed: {exc}", status=500)
|
||||||
|
finally:
|
||||||
|
self._build_lock.release()
|
||||||
|
return jsonify(result)
|
||||||
|
|
||||||
|
@app.post("/api/move")
|
||||||
|
def api_move():
|
||||||
|
if self._mover is None:
|
||||||
|
return Response("no mover configured", status=503)
|
||||||
|
if not self._move_lock.acquire(blocking=False):
|
||||||
|
return Response("move already running", status=409)
|
||||||
|
try:
|
||||||
|
results = self._mover.process_all()
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(f"move failed: {exc}", status=500)
|
||||||
|
finally:
|
||||||
|
self._move_lock.release()
|
||||||
|
return jsonify({"results": results})
|
||||||
|
|
||||||
|
@app.get("/perf")
|
||||||
|
def perf_page() -> Response:
|
||||||
|
return Response(_PERF_HTML, mimetype="text/html; charset=utf-8")
|
||||||
|
|
||||||
|
@app.get("/api/perf")
|
||||||
|
def api_perf():
|
||||||
|
if self._perf is None:
|
||||||
|
return jsonify({"runs": []})
|
||||||
|
return jsonify(self._perf.all())
|
||||||
@@ -0,0 +1,242 @@
|
|||||||
|
"""
|
||||||
|
perf_stats.py
|
||||||
|
=============
|
||||||
|
|
||||||
|
Lightweight performance profiler for the Suwayomi -> Kavita move pipeline.
|
||||||
|
|
||||||
|
It records, per move run, how long each step of every chapter takes plus
|
||||||
|
per-series and per-run totals, so a slowdown can be traced to the step
|
||||||
|
responsible (cover download, image-dimension probing, CBZ packing, …).
|
||||||
|
|
||||||
|
Data model (one entry per run, newest first)::
|
||||||
|
|
||||||
|
{
|
||||||
|
"runs": [
|
||||||
|
{
|
||||||
|
"startedAt": 1700000000, # unix seconds
|
||||||
|
"finishedAt": 1700000123,
|
||||||
|
"totalSeconds": 123.4, # wall clock of the whole run
|
||||||
|
"seriesCount": 2,
|
||||||
|
"chapterCount": 31,
|
||||||
|
"stepTotals": { # summed over ALL chapters
|
||||||
|
"cover": 41.2, "image_dimensions": 55.8, "pack_cbz": 18.1, ...
|
||||||
|
},
|
||||||
|
"seriesStepTotals": { # summed over ALL series
|
||||||
|
"fetch_metadata": 2.4, "person_sync": 9.7
|
||||||
|
},
|
||||||
|
"series": [
|
||||||
|
{
|
||||||
|
"title": "Call of the Night",
|
||||||
|
"totalSeconds": 60.2,
|
||||||
|
"chapterCount": 20,
|
||||||
|
"steps": {"fetch_metadata": 1.2, "person_sync": 3.4},
|
||||||
|
"chapters": [
|
||||||
|
{"chapter": "1", "ok": true, "totalSeconds": 11.5,
|
||||||
|
"steps": {"cover": 1.8, "image_dimensions": 4.2, ...}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Usage from the mover::
|
||||||
|
|
||||||
|
perf = PerfStats(path) # path=None -> disabled (no-op)
|
||||||
|
run = perf.begin_run()
|
||||||
|
series = run.begin_series("Title")
|
||||||
|
with series.measure("fetch_metadata"):
|
||||||
|
...
|
||||||
|
chap = series.begin_chapter("1")
|
||||||
|
with chap.measure("pack_cbz"):
|
||||||
|
...
|
||||||
|
chap.finish(ok=True)
|
||||||
|
series.finish()
|
||||||
|
run.finish() # persists the run to disk
|
||||||
|
|
||||||
|
When ``path`` is None every recorder is a no-op and nothing is written,
|
||||||
|
so the profiler can be left permanently wired in with negligible cost.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# Keep the JSON small: only the most recent runs are retained on disk.
|
||||||
|
_MAX_RUNS = 30
|
||||||
|
|
||||||
|
|
||||||
|
class _StepTimer:
|
||||||
|
"""
|
||||||
|
Base recorder: accumulates ``{step_name: seconds}`` and tracks its own
|
||||||
|
wall-clock lifetime. ``enabled=False`` turns every method into a no-op.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, enabled: bool = True):
|
||||||
|
self.steps: dict[str, float] = {}
|
||||||
|
self._enabled = enabled
|
||||||
|
self._t0 = time.monotonic()
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def measure(self, name: str):
|
||||||
|
"""Context manager timing a named step (accumulates on repeat use)."""
|
||||||
|
if not self._enabled:
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
self.steps[name] = round(
|
||||||
|
self.steps.get(name, 0.0) + (time.monotonic() - start), 4)
|
||||||
|
|
||||||
|
def elapsed(self) -> float:
|
||||||
|
return round(time.monotonic() - self._t0, 4)
|
||||||
|
|
||||||
|
|
||||||
|
class ChapterRecorder(_StepTimer):
|
||||||
|
"""Per-chapter step timer."""
|
||||||
|
|
||||||
|
def __init__(self, series: "SeriesRecorder", chapter: str,
|
||||||
|
enabled: bool = True):
|
||||||
|
super().__init__(enabled)
|
||||||
|
self._series = series
|
||||||
|
self._chapter = chapter
|
||||||
|
self._ok = True
|
||||||
|
|
||||||
|
def finish(self, *, ok: bool = True) -> None:
|
||||||
|
self._ok = ok
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
self._series._chapters.append({
|
||||||
|
"chapter": self._chapter,
|
||||||
|
"ok": ok,
|
||||||
|
"totalSeconds": self.elapsed(),
|
||||||
|
"steps": self.steps,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class SeriesRecorder(_StepTimer):
|
||||||
|
"""Per-series step timer; also collects its chapters."""
|
||||||
|
|
||||||
|
def __init__(self, run: "RunRecorder", title: str, enabled: bool = True):
|
||||||
|
super().__init__(enabled)
|
||||||
|
self._run = run
|
||||||
|
self._title = title
|
||||||
|
self._chapters: list[dict] = []
|
||||||
|
|
||||||
|
def begin_chapter(self, chapter: str) -> ChapterRecorder:
|
||||||
|
return ChapterRecorder(self, chapter, enabled=self._enabled)
|
||||||
|
|
||||||
|
def finish(self) -> None:
|
||||||
|
if not self._enabled:
|
||||||
|
return
|
||||||
|
self._run._series.append({
|
||||||
|
"title": self._title,
|
||||||
|
"totalSeconds": self.elapsed(),
|
||||||
|
"chapterCount": len(self._chapters),
|
||||||
|
"steps": self.steps,
|
||||||
|
"chapters": self._chapters,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class RunRecorder:
|
||||||
|
"""Top-level recorder for one full move run."""
|
||||||
|
|
||||||
|
def __init__(self, stats: "PerfStats", enabled: bool = True):
|
||||||
|
self._stats = stats
|
||||||
|
self._enabled = enabled
|
||||||
|
self._series: list[dict] = []
|
||||||
|
self._started = time.time()
|
||||||
|
self._t0 = time.monotonic()
|
||||||
|
|
||||||
|
def begin_series(self, title: str) -> SeriesRecorder:
|
||||||
|
return SeriesRecorder(self, title, enabled=self._enabled)
|
||||||
|
|
||||||
|
def finish(self) -> dict | None:
|
||||||
|
"""Aggregates the run and persists it. Returns the run dict."""
|
||||||
|
if not self._enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
step_totals: dict[str, float] = {}
|
||||||
|
series_step_totals: dict[str, float] = {}
|
||||||
|
chapter_count = 0
|
||||||
|
for s in self._series:
|
||||||
|
for step, secs in s["steps"].items():
|
||||||
|
series_step_totals[step] = round(
|
||||||
|
series_step_totals.get(step, 0.0) + secs, 4)
|
||||||
|
for ch in s["chapters"]:
|
||||||
|
chapter_count += 1
|
||||||
|
for step, secs in ch["steps"].items():
|
||||||
|
step_totals[step] = round(
|
||||||
|
step_totals.get(step, 0.0) + secs, 4)
|
||||||
|
|
||||||
|
run = {
|
||||||
|
"startedAt": round(self._started),
|
||||||
|
"finishedAt": round(time.time()),
|
||||||
|
"totalSeconds": round(time.monotonic() - self._t0, 4),
|
||||||
|
"seriesCount": len(self._series),
|
||||||
|
"chapterCount": chapter_count,
|
||||||
|
"stepTotals": step_totals,
|
||||||
|
"seriesStepTotals": series_step_totals,
|
||||||
|
"series": self._series,
|
||||||
|
}
|
||||||
|
self._stats._append_run(run)
|
||||||
|
return run
|
||||||
|
|
||||||
|
|
||||||
|
class PerfStats:
|
||||||
|
"""
|
||||||
|
Profiler facade + JSON persistence.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : Destination JSON file. None disables the profiler entirely
|
||||||
|
(every recorder becomes a no-op and nothing is written).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, path=None):
|
||||||
|
self._path = Path(path) if path else None
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def enabled(self) -> bool:
|
||||||
|
return self._path is not None
|
||||||
|
|
||||||
|
def begin_run(self) -> RunRecorder:
|
||||||
|
return RunRecorder(self, enabled=self.enabled)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Read / write
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def all(self) -> dict:
|
||||||
|
"""Returns the persisted runs ({"runs": [...]}); newest first."""
|
||||||
|
if not self._path or not self._path.is_file():
|
||||||
|
return {"runs": []}
|
||||||
|
try:
|
||||||
|
with self._path.open("r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return {"runs": []}
|
||||||
|
if not isinstance(data, dict) or not isinstance(data.get("runs"), list):
|
||||||
|
return {"runs": []}
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _append_run(self, run: dict) -> None:
|
||||||
|
if not self._path:
|
||||||
|
return
|
||||||
|
with self._lock:
|
||||||
|
data = self.all()
|
||||||
|
runs = data["runs"]
|
||||||
|
runs.insert(0, run) # newest first
|
||||||
|
del runs[_MAX_RUNS:] # cap history
|
||||||
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
|
||||||
|
with tmp.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump({"runs": runs}, f, ensure_ascii=False, indent=2)
|
||||||
|
tmp.replace(self._path)
|
||||||
@@ -29,7 +29,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import queue
|
import queue
|
||||||
import threading
|
import threading
|
||||||
import time
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -43,26 +43,93 @@ Dependencies
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import sys
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
|
# Shared modules live one level up (src/); needed when a module in this
|
||||||
|
# folder is run directly as a script (the entry points set the path).
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||||
|
|
||||||
|
from ComicInfoBuilder import (ComicInfoBuilder, _pick_thumbnail_url,
|
||||||
|
_SEARCH_TYPES, _natural_key)
|
||||||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||||||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||||||
from MALResolver import MALResolver
|
from MALResolver import MALResolver
|
||||||
from AniListResolver import AniListResolver
|
from AniListResolver import AniListResolver
|
||||||
|
from KavitaClient import KavitaClient
|
||||||
from KavitaPersonUpdater import KavitaPersonUpdater
|
from KavitaPersonUpdater import KavitaPersonUpdater
|
||||||
from MatchesCache import MatchesCache
|
from MatchesCache import MatchesCache
|
||||||
|
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||||||
|
from CoverCache import CoverCache, _IMAGE_EXTS
|
||||||
|
from PerfStats import PerfStats
|
||||||
|
|
||||||
|
|
||||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
|
||||||
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
_CHAPTER_RE = re.compile(r'[Cc]hapter\s+(\d+(?:\.\d+)?)')
|
||||||
|
|
||||||
|
# JSON file written into each Kavita series folder, listing every chapter
|
||||||
|
# already moved. Avoids opening CBZ archives to determine what is present.
|
||||||
|
# Absence is interpreted as "folder empty" (per spec), not "scan the folder".
|
||||||
|
_CHAPTER_INDEX_FILENAME = "chapter_index.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalise_volume_value(value):
|
||||||
|
"""
|
||||||
|
Normalises a volume identifier for storage in chapter_index.json.
|
||||||
|
|
||||||
|
Returns int when the value is a whole number, float for fractional
|
||||||
|
volumes, None when missing. Mirrors how the user wants volumes
|
||||||
|
rendered (``"volume": 1`` rather than ``"volume": "1"``).
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
text = str(value).strip()
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
f = float(text)
|
||||||
|
return int(f) if f.is_integer() else f
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _load_chapter_index(dest_series: Path) -> dict:
|
||||||
|
"""
|
||||||
|
Reads chapter_index.json from a Kavita series folder.
|
||||||
|
|
||||||
|
Returns ``{"chapter": {}}`` when the file is missing or unreadable —
|
||||||
|
per the project spec, absence means "no chapters are present yet".
|
||||||
|
"""
|
||||||
|
path = dest_series / _CHAPTER_INDEX_FILENAME
|
||||||
|
if not path.is_file():
|
||||||
|
return {"chapter": {}}
|
||||||
|
try:
|
||||||
|
with path.open("r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
except (OSError, json.JSONDecodeError) as exc:
|
||||||
|
print(f" [warn] chapter_index unreadable ({path.name}): {exc} — "
|
||||||
|
f"treating folder as empty")
|
||||||
|
return {"chapter": {}}
|
||||||
|
if not isinstance(data, dict) or not isinstance(data.get("chapter"), dict):
|
||||||
|
return {"chapter": {}}
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _save_chapter_index(dest_series: Path, index: dict) -> None:
|
||||||
|
"""Writes chapter_index.json atomically into a Kavita series folder."""
|
||||||
|
path = dest_series / _CHAPTER_INDEX_FILENAME
|
||||||
|
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||||
|
with tmp.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(index, f, ensure_ascii=False, indent=2)
|
||||||
|
tmp.replace(path)
|
||||||
|
|
||||||
# Parenthetical source labels that Suwayomi appends to series names.
|
# Parenthetical source labels that Suwayomi appends to series names.
|
||||||
# These are not part of the actual title and confuse MangaBaka searches.
|
# These are not part of the actual title and confuse MangaBaka searches.
|
||||||
_SOURCE_LABEL_RE = re.compile(
|
_SOURCE_LABEL_RE = re.compile(
|
||||||
@@ -75,11 +142,6 @@ _SOURCE_LABEL_RE = re.compile(
|
|||||||
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
|
_WIN_ILLEGAL_RE = re.compile(r'[\\/*?"<>|]')
|
||||||
|
|
||||||
|
|
||||||
def _natural_key(name: str) -> list:
|
|
||||||
return [int(p) if p.isdigit() else p.lower()
|
|
||||||
for p in re.split(r"(\d+)", name)]
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_dirname(name: str) -> str:
|
def _sanitize_dirname(name: str) -> str:
|
||||||
"""
|
"""
|
||||||
Makes a string safe to use as a Windows (or SMB) directory name.
|
Makes a string safe to use as a Windows (or SMB) directory name.
|
||||||
@@ -134,34 +196,6 @@ def _clean_suwayomi_title(title: str) -> str:
|
|||||||
return _SOURCE_LABEL_RE.sub("", title).strip()
|
return _SOURCE_LABEL_RE.sub("", title).strip()
|
||||||
|
|
||||||
|
|
||||||
def _mal_id_from_metadata(md: dict) -> "int | None":
|
|
||||||
"""Extracts the MAL ID from a MangaBaka series dict's source map."""
|
|
||||||
for raw_key, info in (md.get("source") or {}).items():
|
|
||||||
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) in ("myanimelist", "mal"):
|
|
||||||
if isinstance(info, dict):
|
|
||||||
mal_id = info.get("id")
|
|
||||||
if mal_id is not None:
|
|
||||||
try:
|
|
||||||
return int(mal_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _al_id_from_metadata(md: dict) -> "int | None":
|
|
||||||
"""Extracts the AniList ID from a MangaBaka series dict's source map."""
|
|
||||||
for raw_key, info in (md.get("source") or {}).items():
|
|
||||||
if re.sub(r"[^a-z0-9]", "", raw_key.lower()) == "anilist":
|
|
||||||
if isinstance(info, dict):
|
|
||||||
al_id = info.get("id")
|
|
||||||
if al_id is not None:
|
|
||||||
try:
|
|
||||||
return int(al_id)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _chapter_image_size(chapter_dir: Path) -> int:
|
def _chapter_image_size(chapter_dir: Path) -> int:
|
||||||
"""Returns the total file size of all images in a chapter folder."""
|
"""Returns the total file size of all images in a chapter folder."""
|
||||||
return sum(
|
return sum(
|
||||||
@@ -278,6 +312,10 @@ class SuwayomiMover:
|
|||||||
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
language : ComicInfo LanguageISO and SeriesSort language ("en").
|
||||||
request_timeout : HTTP timeout in seconds for all API / image requests.
|
request_timeout : HTTP timeout in seconds for all API / image requests.
|
||||||
delete_source : Remove the source chapter folder after successful pack.
|
delete_source : Remove the source chapter folder after successful pack.
|
||||||
|
cover_cache_dir : Directory for the persistent cover cache. None ->
|
||||||
|
temporary cache, deleted at process exit.
|
||||||
|
perf_stats : Optional PerfStats instance for per-step timing. None
|
||||||
|
(default) disables profiling.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@@ -290,7 +328,9 @@ class SuwayomiMover:
|
|||||||
request_timeout: int = 30,
|
request_timeout: int = 30,
|
||||||
delete_source: bool = True,
|
delete_source: bool = True,
|
||||||
matches_cache: "MatchesCache | None" = None,
|
matches_cache: "MatchesCache | None" = None,
|
||||||
api_base_url: str = "https://api.mangabaka.dev/v1"):
|
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||||||
|
cover_cache_dir=None,
|
||||||
|
perf_stats: "PerfStats | None" = None):
|
||||||
self._src = Path(suwayomi_path)
|
self._src = Path(suwayomi_path)
|
||||||
self._dst = Path(kavita_path)
|
self._dst = Path(kavita_path)
|
||||||
self._language = language
|
self._language = language
|
||||||
@@ -298,11 +338,14 @@ class SuwayomiMover:
|
|||||||
self._delete_source = delete_source
|
self._delete_source = delete_source
|
||||||
self._matches_cache = matches_cache
|
self._matches_cache = matches_cache
|
||||||
self._api_base_url = api_base_url.rstrip("/")
|
self._api_base_url = api_base_url.rstrip("/")
|
||||||
|
self._perf = perf_stats or PerfStats(None)
|
||||||
|
|
||||||
# Shared HTTP session and resolvers — reused across all series/chapters
|
# Shared HTTP session and resolvers — reused across all series/chapters
|
||||||
# to maximise cache hits and minimise API round-trips.
|
# to maximise cache hits and minimise API round-trips.
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
|
session.headers.setdefault("User-Agent", "SuwayomiMover/1.0")
|
||||||
|
# Throttle every call to api.mangabaka.dev (>=1s gap + retry on 429).
|
||||||
|
_apply_mangabaka_rate_limit(session)
|
||||||
self._session = session
|
self._session = session
|
||||||
|
|
||||||
self._mal = MALResolver(request_timeout=request_timeout)
|
self._mal = MALResolver(request_timeout=request_timeout)
|
||||||
@@ -311,14 +354,18 @@ class SuwayomiMover:
|
|||||||
request_timeout=request_timeout, session=session)
|
request_timeout=request_timeout, session=session)
|
||||||
self._works_resolver = MangaBakaWorksResolver(
|
self._works_resolver = MangaBakaWorksResolver(
|
||||||
request_timeout=request_timeout, session=session)
|
request_timeout=request_timeout, session=session)
|
||||||
|
self._cover_cache = CoverCache(
|
||||||
|
cover_cache_dir, session=session, request_timeout=request_timeout)
|
||||||
|
|
||||||
self._person_updater: "KavitaPersonUpdater | None" = None
|
self._person_updater: "KavitaPersonUpdater | None" = None
|
||||||
if kavita_base_url and kavita_api_key:
|
if kavita_base_url and kavita_api_key:
|
||||||
self._person_updater = KavitaPersonUpdater(
|
kavita_client = KavitaClient(
|
||||||
kavita_base_url, kavita_api_key,
|
kavita_base_url, kavita_api_key,
|
||||||
mal_resolver=self._mal,
|
|
||||||
al_resolver=self._al,
|
|
||||||
request_timeout=request_timeout)
|
request_timeout=request_timeout)
|
||||||
|
self._person_updater = KavitaPersonUpdater(
|
||||||
|
kavita_client,
|
||||||
|
mal_resolver=self._mal,
|
||||||
|
al_resolver=self._al)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Public API
|
# Public API
|
||||||
@@ -334,6 +381,8 @@ class SuwayomiMover:
|
|||||||
dict from _process_series_dir.
|
dict from _process_series_dir.
|
||||||
"""
|
"""
|
||||||
results: dict = {}
|
results: dict = {}
|
||||||
|
run = self._perf.begin_run()
|
||||||
|
try:
|
||||||
for source_dir in sorted(self._src.iterdir()):
|
for source_dir in sorted(self._src.iterdir()):
|
||||||
if not source_dir.is_dir():
|
if not source_dir.is_dir():
|
||||||
continue
|
continue
|
||||||
@@ -342,7 +391,9 @@ class SuwayomiMover:
|
|||||||
continue
|
continue
|
||||||
title = manga_dir.name
|
title = manga_dir.name
|
||||||
print(f"[SuwayomiMover] {title}")
|
print(f"[SuwayomiMover] {title}")
|
||||||
results[title] = self._process_series_dir(manga_dir)
|
results[title] = self._process_series_dir(manga_dir, run)
|
||||||
|
finally:
|
||||||
|
run.finish()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def process_series(self, manga_title: str) -> dict:
|
def process_series(self, manga_title: str) -> dict:
|
||||||
@@ -358,10 +409,26 @@ class SuwayomiMover:
|
|||||||
continue
|
continue
|
||||||
candidate = source_dir / manga_title
|
candidate = source_dir / manga_title
|
||||||
if candidate.is_dir():
|
if candidate.is_dir():
|
||||||
return self._process_series_dir(candidate)
|
run = self._perf.begin_run()
|
||||||
|
try:
|
||||||
|
return self._process_series_dir(candidate, run)
|
||||||
|
finally:
|
||||||
|
run.finish()
|
||||||
raise FileNotFoundError(
|
raise FileNotFoundError(
|
||||||
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
|
||||||
|
|
||||||
|
def fetch_series(self, series_id) -> "dict | None":
|
||||||
|
"""
|
||||||
|
Fetches a MangaBaka series by id via the shared (rate-limited) session.
|
||||||
|
Returns the inner `data` dict, or None if not found / empty.
|
||||||
|
"""
|
||||||
|
if series_id is None or str(series_id).strip() == "":
|
||||||
|
return None
|
||||||
|
url = f"{self._api_base_url}/series/{series_id}"
|
||||||
|
resp = self._session.get(url, timeout=self._timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json().get("data")
|
||||||
|
|
||||||
def build_matches_only(self) -> dict:
|
def build_matches_only(self) -> dict:
|
||||||
"""
|
"""
|
||||||
Walks every series under the Suwayomi root and resolves each one
|
Walks every series under the Suwayomi root and resolves each one
|
||||||
@@ -410,7 +477,8 @@ class SuwayomiMover:
|
|||||||
try:
|
try:
|
||||||
resp = self._session.get(
|
resp = self._session.get(
|
||||||
search_url,
|
search_url,
|
||||||
params={"q": builder_title, "page": 1, "limit": 1},
|
params={"q": builder_title, "type": _SEARCH_TYPES,
|
||||||
|
"page": 1, "limit": 1},
|
||||||
timeout=self._timeout)
|
timeout=self._timeout)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json().get("data") or []
|
data = resp.json().get("data") or []
|
||||||
@@ -422,7 +490,7 @@ class SuwayomiMover:
|
|||||||
builder_title,
|
builder_title,
|
||||||
mangabaka_id=series.get("id"),
|
mangabaka_id=series.get("id"),
|
||||||
mangabaka_name=series.get("title") or "",
|
mangabaka_name=series.get("title") or "",
|
||||||
image_url=_pick_cover_url(series.get("cover")),
|
image_url=_pick_thumbnail_url(series.get("cover")),
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f" [warn] search failed for {builder_title!r}: {exc}")
|
print(f" [warn] search failed for {builder_title!r}: {exc}")
|
||||||
@@ -432,8 +500,9 @@ class SuwayomiMover:
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Internal: series
|
# Internal: series
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
def _process_series_dir(self, manga_dir: Path) -> dict:
|
def _process_series_dir(self, manga_dir: Path, run=None) -> dict:
|
||||||
manga_title = manga_dir.name
|
manga_title = manga_dir.name
|
||||||
|
series_rec = (run or self._perf.begin_run()).begin_series(manga_title)
|
||||||
|
|
||||||
chapter_dirs = sorted(
|
chapter_dirs = sorted(
|
||||||
(d for d in manga_dir.iterdir() if d.is_dir()),
|
(d for d in manga_dir.iterdir() if d.is_dir()),
|
||||||
@@ -477,12 +546,14 @@ class SuwayomiMover:
|
|||||||
mal_resolver=self._mal,
|
mal_resolver=self._mal,
|
||||||
al_resolver=self._al,
|
al_resolver=self._al,
|
||||||
matches_cache=self._matches_cache,
|
matches_cache=self._matches_cache,
|
||||||
|
cover_cache=self._cover_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.
|
||||||
md: "dict | None" = None
|
md: "dict | None" = None
|
||||||
mangabaka_title = manga_title
|
mangabaka_title = manga_title
|
||||||
try:
|
try:
|
||||||
|
with series_rec.measure("fetch_metadata"):
|
||||||
md = builder.fetch_metadata()
|
md = builder.fetch_metadata()
|
||||||
mangabaka_title = md.get("title") or manga_title
|
mangabaka_title = md.get("title") or manga_title
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@@ -493,24 +564,50 @@ class SuwayomiMover:
|
|||||||
dest_series = self._dst / _sanitize_dirname(mangabaka_title)
|
dest_series = self._dst / _sanitize_dirname(mangabaka_title)
|
||||||
dest_series.mkdir(parents=True, exist_ok=True)
|
dest_series.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Skip chapters that have already been moved to Kavita. The index
|
||||||
|
# file in the destination folder is the authoritative source — we
|
||||||
|
# never open CBZ archives or stat them individually.
|
||||||
|
chapter_index = _load_chapter_index(dest_series)
|
||||||
|
already_moved = chapter_index["chapter"]
|
||||||
|
skipped: list[tuple[Path, str]] = []
|
||||||
|
pending: list[tuple[Path, dict, str]] = []
|
||||||
|
for item in chapter_items:
|
||||||
|
chapter_dir, _fields, chapter_num = item
|
||||||
|
if chapter_num in already_moved:
|
||||||
|
skipped.append((chapter_dir, chapter_num))
|
||||||
|
else:
|
||||||
|
pending.append(item)
|
||||||
|
|
||||||
|
for chapter_dir, chapter_num in skipped:
|
||||||
|
print(f" Chapter {chapter_num}: skip (already in Kavita)")
|
||||||
|
if self._delete_source:
|
||||||
|
shutil.rmtree(chapter_dir, ignore_errors=True)
|
||||||
|
|
||||||
chapter_results: list[dict] = []
|
chapter_results: list[dict] = []
|
||||||
for chapter_dir, _fields, chapter_num in chapter_items:
|
for chapter_dir, _fields, chapter_num in pending:
|
||||||
result = self._process_chapter(
|
result = self._process_chapter(
|
||||||
builder, chapter_num, chapter_dir, dest_series)
|
builder, chapter_num, chapter_dir, dest_series, series_rec)
|
||||||
chapter_results.append(result)
|
chapter_results.append(result)
|
||||||
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
|
status = "ok" if result["ok"] else f"ERROR: {result.get('error')}"
|
||||||
print(f" Chapter {chapter_num}: {status}")
|
print(f" Chapter {chapter_num}: {status}")
|
||||||
|
if result["ok"]:
|
||||||
|
already_moved[chapter_num] = {
|
||||||
|
"volume": _normalise_volume_value(result.get("volume")),
|
||||||
|
"archiveName": Path(result["cbz"]).name,
|
||||||
|
}
|
||||||
|
_save_chapter_index(dest_series, chapter_index)
|
||||||
|
|
||||||
# Sync Kavita persons once per series.
|
# Sync Kavita persons once per series.
|
||||||
# Both MAL and AniList IDs come from MangaBaka's source map;
|
# Both MAL and AniList IDs come from MangaBaka's source map;
|
||||||
# AniList is used as fallback when MAL returns no characters/staff.
|
# AniList is used as fallback when MAL returns no characters/staff.
|
||||||
person_result: "dict | None" = None
|
person_result: "dict | None" = None
|
||||||
if self._person_updater:
|
if self._person_updater:
|
||||||
mal_id = (_mal_id_from_metadata(md) if md else None
|
mal_id = ((ComicInfoBuilder._mal_id_from_source(md) if md else None)
|
||||||
or self._mal.find_mal_id(builder_title))
|
or self._mal.find_mal_id(builder_title))
|
||||||
al_id = _al_id_from_metadata(md) if md else None
|
al_id = ComicInfoBuilder._al_id_from_source(md) if md else None
|
||||||
if mal_id or al_id:
|
if mal_id or al_id:
|
||||||
try:
|
try:
|
||||||
|
with series_rec.measure("person_sync"):
|
||||||
person_result = self._person_updater.update_for_manga(
|
person_result = self._person_updater.update_for_manga(
|
||||||
mal_id, al_manga_id=al_id)
|
mal_id, al_manga_id=al_id)
|
||||||
print(f" Persons: chars={person_result['characters'].get('updated')} "
|
print(f" Persons: chars={person_result['characters'].get('updated')} "
|
||||||
@@ -519,6 +616,7 @@ class SuwayomiMover:
|
|||||||
person_result = {"error": str(exc)}
|
person_result = {"error": str(exc)}
|
||||||
print(f" Persons: ERROR {exc}")
|
print(f" Persons: ERROR {exc}")
|
||||||
|
|
||||||
|
series_rec.finish()
|
||||||
return {"chapters": chapter_results, "persons": person_result}
|
return {"chapters": chapter_results, "persons": person_result}
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
@@ -528,7 +626,8 @@ class SuwayomiMover:
|
|||||||
builder: ComicInfoBuilder,
|
builder: ComicInfoBuilder,
|
||||||
chapter_num: str,
|
chapter_num: str,
|
||||||
chapter_dir: Path,
|
chapter_dir: Path,
|
||||||
dest_series: Path) -> dict:
|
dest_series: Path,
|
||||||
|
series_rec=None) -> dict:
|
||||||
"""
|
"""
|
||||||
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
|
Generates ComicInfo.xml for one chapter, packs it to CBZ, and
|
||||||
optionally removes the source folder.
|
optionally removes the source folder.
|
||||||
@@ -538,38 +637,76 @@ class SuwayomiMover:
|
|||||||
<Pages> element correctly points to the front cover).
|
<Pages> element correctly points to the front cover).
|
||||||
"""
|
"""
|
||||||
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
|
cbz_path = dest_series / f"{chapter_dir.name}.cbz"
|
||||||
|
chap_rec = (series_rec or self._perf.begin_run().begin_series("")
|
||||||
|
).begin_chapter(chapter_num)
|
||||||
|
# add_pages_from_folder records its own sub-steps on this recorder.
|
||||||
|
builder.perf = chap_rec
|
||||||
|
ok = False
|
||||||
try:
|
try:
|
||||||
builder.chapter = chapter_num
|
builder.chapter = chapter_num
|
||||||
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
|
builder.add_pages_from_folder(chapter_dir, cover_filename="000")
|
||||||
|
# Resolving the volume here piggy-backs on caches already warmed
|
||||||
|
# by add_pages_from_folder, so it's effectively free. Used by
|
||||||
|
# the chapter index in the Kavita destination folder.
|
||||||
|
try:
|
||||||
|
with chap_rec.measure("volume"):
|
||||||
|
volume = builder._determine_volume()
|
||||||
|
except Exception:
|
||||||
|
volume = None
|
||||||
|
with chap_rec.measure("save_xml"):
|
||||||
builder.save_xml(chapter_dir)
|
builder.save_xml(chapter_dir)
|
||||||
|
with chap_rec.measure("pack_cbz"):
|
||||||
_pack_to_cbz(chapter_dir, cbz_path)
|
_pack_to_cbz(chapter_dir, cbz_path)
|
||||||
if self._delete_source:
|
if self._delete_source:
|
||||||
|
with chap_rec.measure("delete_source"):
|
||||||
shutil.rmtree(chapter_dir)
|
shutil.rmtree(chapter_dir)
|
||||||
return {"chapter": chapter_num, "cbz": str(cbz_path), "ok": True}
|
ok = True
|
||||||
|
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
||||||
|
"ok": True, "volume": volume}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
return {"chapter": chapter_num, "cbz": str(cbz_path),
|
||||||
"ok": False, "error": str(exc)}
|
"ok": False, "error": str(exc)}
|
||||||
|
finally:
|
||||||
|
builder.perf = None
|
||||||
|
chap_rec.finish(ok=ok)
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# Usage example
|
# Usage example
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas"
|
import os
|
||||||
|
|
||||||
|
# Local (no-Docker) smoke test. Adjust paths to your environment.
|
||||||
|
# Set the KAVITA_API_KEY env var — never commit API keys to the repo.
|
||||||
|
SUWAYOMI_PATH = r"M:\config\downloads\mangas"
|
||||||
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test"
|
||||||
KAVITA_URL = "http://192.168.2.2:5000"
|
KAVITA_URL = "http://192.168.2.2:5000"
|
||||||
KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
|
KAVITA_KEY = os.environ.get("KAVITA_API_KEY", "")
|
||||||
|
|
||||||
|
# matches.json lives next to this script during local testing.
|
||||||
|
MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json"
|
||||||
|
matches_cache = MatchesCache(MATCHES_PATH)
|
||||||
|
|
||||||
mover = SuwayomiMover(
|
mover = SuwayomiMover(
|
||||||
SUWAYOMI_PATH,
|
SUWAYOMI_PATH,
|
||||||
KAVITA_PATH,
|
KAVITA_PATH,
|
||||||
kavita_base_url=KAVITA_URL,
|
kavita_base_url=KAVITA_URL,
|
||||||
kavita_api_key=KAVITA_KEY,
|
kavita_api_key=KAVITA_KEY,
|
||||||
delete_source=False
|
delete_source=False,
|
||||||
|
matches_cache=matches_cache,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process a single series
|
# ---- Option A: build matches.json only (no moves / no Kavita sync) ----
|
||||||
result = mover.process_series("Yofukashi no Uta")
|
# data = mover.build_matches_only()
|
||||||
|
# matches = data.get("matches", {})
|
||||||
|
# print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}")
|
||||||
|
# for title, entry in list(matches.items())[:10]:
|
||||||
|
# print(f" {title!r:50s} id={entry.get('mangabakaId')} "
|
||||||
|
# f"name={entry.get('mangabakaName')!r}")
|
||||||
|
|
||||||
|
# ---- Option B: full pipeline for one series (uses the cache too) ----
|
||||||
|
result = mover.process_series("Wistoria - Wand and Sword")
|
||||||
ok = sum(1 for c in result["chapters"] if c["ok"])
|
ok = sum(1 for c in result["chapters"] if c["ok"])
|
||||||
failed = sum(1 for c in result["chapters"] if not c["ok"])
|
failed = sum(1 for c in result["chapters"] if not c["ok"])
|
||||||
print(f"\nDone: {ok} ok, {failed} failed")
|
print(f"\nDone: {ok} ok, {failed} failed")
|
||||||
Reference in New Issue
Block a user