From 79d64d7ed580b023ee4ae8c75674afbf11608925 Mon Sep 17 00:00:00 2001 From: JohannesBOT Date: Tue, 26 May 2026 21:03:37 +0200 Subject: [PATCH] WebApp changes --- main.py | 8 +- src/ComicInfoBuilder.py | 13 +- src/MangaBakaRateLimit.py | 92 ++++++++++++ src/MatchesWebApp.py | 302 +++++++++++++++++++++++++------------- src/SuwayomiMover.py | 54 +++++-- 5 files changed, 353 insertions(+), 116 deletions(-) create mode 100644 src/MangaBakaRateLimit.py diff --git a/main.py b/main.py index 5c89bae..cecb25b 100644 --- a/main.py +++ b/main.py @@ -74,15 +74,15 @@ def _env_bool(name: str, default: bool) -> bool: def main() -> int: - suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi") + suwayomi_path = _env_str("SUWAYOMI_PATH", r"M:\config\downloads\mangas") kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita") - kavita_url = _env_str("KAVITA_URL", required=True) - kavita_api_key = _env_str("KAVITA_API_KEY", required=True) + kavita_url = _env_str("KAVITA_URL", "http://kavita:5000") + kavita_api_key = _env_str("KAVITA_API_KEY", "") language = _env_str("LANGUAGE", "en") or "en" settle_seconds = _env_int("SETTLE_SECONDS", 600) request_timeout = _env_int("REQUEST_TIMEOUT", 30) delete_source = _env_bool("DELETE_SOURCE", True) - match_path = _env_str("MATCH_PATH", "/config/matches.json") + match_path = _env_str("MATCH_PATH", "matches.json") web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_port = _env_int("WEB_PORT", 8080) diff --git a/src/ComicInfoBuilder.py b/src/ComicInfoBuilder.py index 8fd049f..6f1eb3b 100644 --- a/src/ComicInfoBuilder.py +++ b/src/ComicInfoBuilder.py @@ -49,6 +49,7 @@ from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver from MatchesCache import MatchesCache +from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit try: from PIL import Image @@ -62,6 +63,12 @@ except ImportError: # -------------------------------------------------------------------------- _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} +# Series types accepted by the MangaBaka search endpoint. Light/web novels +# are filtered out because this pipeline only handles image-based manga. +# Passed to `requests` as a list so each value becomes its own `&type=...` +# query parameter (MangaBaka's API expects repeated keys, not a CSV list). +_SEARCH_TYPES = ["manga", "manhwa", "manhua"] + _AGE_RATING_MAP = { "safe": "Everyone", "suggestive": "Teen", @@ -184,6 +191,9 @@ class ComicInfoBuilder: self.request_timeout = request_timeout self._session = session or requests.Session() self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0") + # Throttle every call to api.mangabaka.dev (idempotent — safe even + # when the session was already prepared by a parent class). + _apply_mangabaka_rate_limit(self._session) self._volume_resolver = (volume_resolver or MangaDexVolumeResolver( @@ -378,7 +388,8 @@ class ComicInfoBuilder: url = f"{self.api_base_url}/series/search" resp = self._session.get( - url, params={"q": title, "page": 1, "limit": 1}, + url, params={"q": title, "type": _SEARCH_TYPES, + "page": 1, "limit": 1}, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") or [] diff --git a/src/MangaBakaRateLimit.py b/src/MangaBakaRateLimit.py new file mode 100644 index 0000000..9b34018 --- /dev/null +++ b/src/MangaBakaRateLimit.py @@ -0,0 +1,92 @@ +""" +mangabaka_rate_limit.py +======================= + +Process-wide rate limiter for the MangaBaka API. + +Apply via: + + from MangaBakaRateLimit import apply_to_session + apply_to_session(session) + +This mounts a custom ``requests.adapters.HTTPAdapter`` on the given +``requests.Session`` for the ``api.mangabaka.dev`` host. Every request +going through that adapter is: + + * throttled so that no two requests are dispatched within + ``_MIN_INTERVAL`` seconds of one another, and + * retried on HTTP 429, honouring the ``Retry-After`` header when + present, otherwise exponential backoff capped at ``_MAX_BACKOFF``. + +Throttle state is module-global, so even if several sessions exist in +the same process they share one budget — important because they all hit +the same upstream IP-based limit. +""" + +from __future__ import annotations + +import threading +import time + +from requests.adapters import HTTPAdapter + + +# Tune these if MangaBaka tightens or loosens limits. +_MIN_INTERVAL = 1.1 # seconds between consecutive requests +_MAX_RETRIES = 6 # retries on 429 before giving up +_MAX_BACKOFF = 60.0 # cap on per-attempt backoff sleep + + +# --- shared throttle state -------------------------------------------------- +_state_lock = threading.Lock() +_last_request_time = 0.0 + + +def _wait_for_slot() -> None: + """Block until the next request slot is available, then reserve it.""" + global _last_request_time + while True: + with _state_lock: + now = time.monotonic() + wait = _MIN_INTERVAL - (now - _last_request_time) + if wait <= 0: + _last_request_time = now + return + time.sleep(wait) + + +class _MangaBakaRateLimitAdapter(HTTPAdapter): + def send(self, request, **kwargs): + response = None + for attempt in range(_MAX_RETRIES + 1): + _wait_for_slot() + response = super().send(request, **kwargs) + if response.status_code != 429: + return response + + retry_after = response.headers.get("Retry-After") + try: + wait = (float(retry_after) if retry_after + else min(_MAX_BACKOFF, 2.0 * (2 ** attempt))) + except ValueError: + wait = min(_MAX_BACKOFF, 2.0 * (2 ** attempt)) + + print(f"[MangaBaka] 429 — backing off {wait:.1f}s " + f"(attempt {attempt + 1}/{_MAX_RETRIES})", + flush=True) + response.close() + time.sleep(wait) + + # Retries exhausted — let the caller deal with the last 429. + return response + + +def apply_to_session(session) -> None: + """ + Mount the rate-limit adapter on ``session`` so every MangaBaka call + is automatically throttled. Safe to call multiple times (later mounts + just replace the earlier adapter for the same prefix). + """ + adapter = _MangaBakaRateLimitAdapter() + session.mount("https://api.mangabaka.dev/", adapter) + session.mount("http://api.mangabaka.dev/", adapter) diff --git a/src/MatchesWebApp.py b/src/MatchesWebApp.py index 23b525b..1884da0 100644 --- a/src/MatchesWebApp.py +++ b/src/MatchesWebApp.py @@ -9,25 +9,27 @@ Routes ------ GET / HTML table view (one row per cached match) GET /api/matches JSON dump of the full cache -POST /api/matches Upsert / rename an entry - body: {originalTitle?, title, mangabakaId, - mangabakaName, imageUrl, firstMatchTime?} -POST /api/matches/delete Remove an entry body: {title} -POST /api/build Trigger a full re-scan via SuwayomiMover.build_matches_only - (only available if a mover is wired in) +POST /api/matches Update an entry's mangabakaId + body: {title, mangabakaId} + Server resolves the id against MangaBaka and + refreshes the mangabakaName + imageUrl fields. +POST /api/matches/delete Remove an entry body: {title} +POST /api/build Trigger a full re-scan via + SuwayomiMover.build_matches_only -The Title cell is rendered as a link to MangaBaka's search page, restricted -to the manga / manhwa / manhua types. +The Title cell is rendered as a link to MangaBaka's search page restricted +to the manga / manhwa / manhua types. Only mangabakaId is editable; title +(folder name) and mangabakaName (info only) are read-only. """ from __future__ import annotations import threading -from urllib.parse import quote_plus from flask import Flask, jsonify, request, Response from MatchesCache import MatchesCache +from ComicInfoBuilder import _pick_cover_url _INDEX_HTML = """ @@ -43,35 +45,41 @@ _INDEX_HTML = """ button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; } button.primary { background:#2563eb; border-color:#2563eb; color:white; } button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; } + button:disabled { opacity:.5; cursor:default; } table { border-collapse: collapse; width: 100%; } th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; } th { background: #1d1d1d; text-align: left; position: sticky; top: 0; } + th.sortable { cursor: pointer; user-select: none; } + th.sortable:hover { background:#252525; } + th .arrow { display:inline-block; width:.8em; color:#9ca3af; } tr:nth-child(even) td { background: #161616; } td.image img { max-width: 90px; max-height: 130px; display:block; } - td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; } + td.id input { width: 14rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; } td.title a { color: #60a5fa; text-decoration: none; } td.title a:hover { text-decoration: underline; } td.actions { white-space: nowrap; } .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; } .dirty td { background: #1f2937 !important; } + .count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; } -

MangaBaka matches

+

MangaBaka matches

- + +
- + - + @@ -81,6 +89,8 @@ _INDEX_HTML = """ @@ -250,9 +335,10 @@ load(); class MatchesWebApp: """ - Flask app exposing the MatchesCache. `mover` is optional — if provided, - POST /api/build triggers SuwayomiMover.build_matches_only() on a worker - thread. + Flask app exposing the MatchesCache. `mover` is required when you want + POST /api/matches to resolve a new mangabakaId against MangaBaka (it + uses the mover's rate-limited session) and when POST /api/build should + work. """ def __init__(self, cache: MatchesCache, *, @@ -296,7 +382,7 @@ class MatchesWebApp: return self._thread def wait(self) -> None: - """Blocks until the Flask thread exits (or returns immediately if not started).""" + """Blocks until the Flask thread exits.""" if self._thread is not None: self._thread.join() @@ -321,15 +407,33 @@ class MatchesWebApp: title = (body.get("title") or "").strip() if not title: return Response("title is required", status=400) - original = (body.get("originalTitle") or "").strip() or title - if original != title: - cache.rename(original, title) + + new_id_raw = body.get("mangabakaId") + new_id = str(new_id_raw).strip() if new_id_raw is not None else "" + if not new_id: + return Response("mangabakaId is required", status=400) + + # Resolve the id against MangaBaka so mangabakaName + imageUrl + # always reflect what the id actually points to. + new_name: "str | None" = None + new_image: "str | None" = None + if self._mover is not None: + try: + series = self._mover.fetch_series(new_id) + except Exception as exc: + return Response(f"resolve failed: {exc}", status=502) + if not series: + return Response( + f"MangaBaka has no series with id {new_id}", + status=404) + new_name = series.get("title") or "" + new_image = _pick_cover_url(series.get("cover")) or "" + entry = cache.upsert( title, - mangabaka_id=body.get("mangabakaId"), - mangabaka_name=body.get("mangabakaName"), - image_url=body.get("imageUrl"), - first_match_time=body.get("firstMatchTime"), + mangabaka_id=new_id, + mangabaka_name=new_name, + image_url=new_image, ) return jsonify({"title": title, "entry": entry}) diff --git a/src/SuwayomiMover.py b/src/SuwayomiMover.py index 609bf08..fe44a35 100644 --- a/src/SuwayomiMover.py +++ b/src/SuwayomiMover.py @@ -51,13 +51,14 @@ from pathlib import Path import requests -from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url +from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url, _SEARCH_TYPES from MangadexVolumeResolver import MangaDexVolumeResolver from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver from KavitaPersonUpdater import KavitaPersonUpdater from MatchesCache import MatchesCache +from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} @@ -303,6 +304,8 @@ class SuwayomiMover: # to maximise cache hits and minimise API round-trips. session = requests.Session() session.headers.setdefault("User-Agent", "SuwayomiMover/1.0") + # Throttle every call to api.mangabaka.dev (>=1s gap + retry on 429). + _apply_mangabaka_rate_limit(session) self._session = session self._mal = MALResolver(request_timeout=request_timeout) @@ -362,6 +365,18 @@ class SuwayomiMover: raise FileNotFoundError( f"No Suwayomi directory found for '{manga_title}' under {self._src}") + def fetch_series(self, series_id) -> "dict | None": + """ + Fetches a MangaBaka series by id via the shared (rate-limited) session. + Returns the inner `data` dict, or None if not found / empty. + """ + if series_id is None or str(series_id).strip() == "": + return None + url = f"{self._api_base_url}/series/{series_id}" + resp = self._session.get(url, timeout=self._timeout) + resp.raise_for_status() + return resp.json().get("data") + def build_matches_only(self) -> dict: """ Walks every series under the Suwayomi root and resolves each one @@ -410,7 +425,8 @@ class SuwayomiMover: try: resp = self._session.get( search_url, - params={"q": builder_title, "page": 1, "limit": 1}, + params={"q": builder_title, "type": _SEARCH_TYPES, + "page": 1, "limit": 1}, timeout=self._timeout) resp.raise_for_status() data = resp.json().get("data") or [] @@ -555,27 +571,41 @@ class SuwayomiMover: # Usage example # -------------------------------------------------------------------------- if __name__ == "__main__": - SUWAYOMI_PATH = r"\\192.168.2.2\root\Temp\managdl\mangas" + # Local (no-Docker) smoke test. Adjust paths to your environment. + SUWAYOMI_PATH = r"M:\config\downloads\mangas" KAVITA_PATH = r"\\192.168.2.2\root\ServerData\Kavita\test" KAVITA_URL = "http://192.168.2.2:5000" KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA" + # matches.json lives next to this script during local testing. + MATCHES_PATH = Path(__file__).resolve().parent.parent / "matches.json" + matches_cache = MatchesCache(MATCHES_PATH) + mover = SuwayomiMover( SUWAYOMI_PATH, KAVITA_PATH, kavita_base_url=KAVITA_URL, kavita_api_key=KAVITA_KEY, - delete_source=False + delete_source=False, + matches_cache=matches_cache, ) - # Process a single series - result = mover.process_series("Yofukashi no Uta") - ok = sum(1 for c in result["chapters"] if c["ok"]) - failed = sum(1 for c in result["chapters"] if not c["ok"]) - print(f"\nDone: {ok} ok, {failed} failed") - for c in result["chapters"]: - if not c["ok"]: - print(f" Chapter {c['chapter']}: {c['error']}") + # ---- Option A: build matches.json only (no moves / no Kavita sync) ---- + data = mover.build_matches_only() + matches = data.get("matches", {}) + print(f"\n[matches] {len(matches)} entries total — file: {MATCHES_PATH}") + for title, entry in list(matches.items())[:10]: + print(f" {title!r:50s} id={entry.get('mangabakaId')} " + f"name={entry.get('mangabakaName')!r}") + + # ---- Option B: full pipeline for one series (uses the cache too) ---- + # result = mover.process_series("Yofukashi no Uta") + # ok = sum(1 for c in result["chapters"] if c["ok"]) + # failed = sum(1 for c in result["chapters"] if not c["ok"]) + # print(f"\nDone: {ok} ok, {failed} failed") + # for c in result["chapters"]: + # if not c["ok"]: + # print(f" Chapter {c['chapter']}: {c['error']}") # Or process everything at once: # results = mover.process_all()
TitleTitle mangabakaId mangabakaNamefirstMatchTimefirstMatchTime Image