diff --git a/main.py b/main.py index 5c89bae..cecb25b 100644 --- a/main.py +++ b/main.py @@ -74,15 +74,15 @@ def _env_bool(name: str, default: bool) -> bool: def main() -> int: - suwayomi_path = _env_str("SUWAYOMI_PATH", "/mnt/suwayomi") + suwayomi_path = _env_str("SUWAYOMI_PATH", r"M:\config\downloads\mangas") kavita_path = _env_str("KAVITA_PATH", "/mnt/kavita") - kavita_url = _env_str("KAVITA_URL", required=True) - kavita_api_key = _env_str("KAVITA_API_KEY", required=True) + kavita_url = _env_str("KAVITA_URL", "http://kavita:5000") + kavita_api_key = _env_str("KAVITA_API_KEY", "") language = _env_str("LANGUAGE", "en") or "en" settle_seconds = _env_int("SETTLE_SECONDS", 600) request_timeout = _env_int("REQUEST_TIMEOUT", 30) delete_source = _env_bool("DELETE_SOURCE", True) - match_path = _env_str("MATCH_PATH", "/config/matches.json") + match_path = _env_str("MATCH_PATH", "matches.json") web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0" web_port = _env_int("WEB_PORT", 8080) diff --git a/src/ComicInfoBuilder.py b/src/ComicInfoBuilder.py index 8fd049f..6f1eb3b 100644 --- a/src/ComicInfoBuilder.py +++ b/src/ComicInfoBuilder.py @@ -49,6 +49,7 @@ from MangaBakaWorksResolver import MangaBakaWorksResolver from MALResolver import MALResolver from AniListResolver import AniListResolver from MatchesCache import MatchesCache +from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit try: from PIL import Image @@ -62,6 +63,12 @@ except ImportError: # -------------------------------------------------------------------------- _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} +# Series types accepted by the MangaBaka search endpoint. Light/web novels +# are filtered out because this pipeline only handles image-based manga. +# Passed to `requests` as a list so each value becomes its own `&type=...` +# query parameter (MangaBaka's API expects repeated keys, not a CSV list). +_SEARCH_TYPES = ["manga", "manhwa", "manhua"] + _AGE_RATING_MAP = { "safe": "Everyone", "suggestive": "Teen", @@ -184,6 +191,9 @@ class ComicInfoBuilder: self.request_timeout = request_timeout self._session = session or requests.Session() self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0") + # Throttle every call to api.mangabaka.dev (idempotent — safe even + # when the session was already prepared by a parent class). + _apply_mangabaka_rate_limit(self._session) self._volume_resolver = (volume_resolver or MangaDexVolumeResolver( @@ -378,7 +388,8 @@ class ComicInfoBuilder: url = f"{self.api_base_url}/series/search" resp = self._session.get( - url, params={"q": title, "page": 1, "limit": 1}, + url, params={"q": title, "type": _SEARCH_TYPES, + "page": 1, "limit": 1}, timeout=self.request_timeout) resp.raise_for_status() data = resp.json().get("data") or [] diff --git a/src/MangaBakaRateLimit.py b/src/MangaBakaRateLimit.py new file mode 100644 index 0000000..9b34018 --- /dev/null +++ b/src/MangaBakaRateLimit.py @@ -0,0 +1,92 @@ +""" +mangabaka_rate_limit.py +======================= + +Process-wide rate limiter for the MangaBaka API. + +Apply via: + + from MangaBakaRateLimit import apply_to_session + apply_to_session(session) + +This mounts a custom ``requests.adapters.HTTPAdapter`` on the given +``requests.Session`` for the ``api.mangabaka.dev`` host. Every request +going through that adapter is: + + * throttled so that no two requests are dispatched within + ``_MIN_INTERVAL`` seconds of one another, and + * retried on HTTP 429, honouring the ``Retry-After`` header when + present, otherwise exponential backoff capped at ``_MAX_BACKOFF``. + +Throttle state is module-global, so even if several sessions exist in +the same process they share one budget — important because they all hit +the same upstream IP-based limit. +""" + +from __future__ import annotations + +import threading +import time + +from requests.adapters import HTTPAdapter + + +# Tune these if MangaBaka tightens or loosens limits. +_MIN_INTERVAL = 1.1 # seconds between consecutive requests +_MAX_RETRIES = 6 # retries on 429 before giving up +_MAX_BACKOFF = 60.0 # cap on per-attempt backoff sleep + + +# --- shared throttle state -------------------------------------------------- +_state_lock = threading.Lock() +_last_request_time = 0.0 + + +def _wait_for_slot() -> None: + """Block until the next request slot is available, then reserve it.""" + global _last_request_time + while True: + with _state_lock: + now = time.monotonic() + wait = _MIN_INTERVAL - (now - _last_request_time) + if wait <= 0: + _last_request_time = now + return + time.sleep(wait) + + +class _MangaBakaRateLimitAdapter(HTTPAdapter): + def send(self, request, **kwargs): + response = None + for attempt in range(_MAX_RETRIES + 1): + _wait_for_slot() + response = super().send(request, **kwargs) + if response.status_code != 429: + return response + + retry_after = response.headers.get("Retry-After") + try: + wait = (float(retry_after) if retry_after + else min(_MAX_BACKOFF, 2.0 * (2 ** attempt))) + except ValueError: + wait = min(_MAX_BACKOFF, 2.0 * (2 ** attempt)) + + print(f"[MangaBaka] 429 — backing off {wait:.1f}s " + f"(attempt {attempt + 1}/{_MAX_RETRIES})", + flush=True) + response.close() + time.sleep(wait) + + # Retries exhausted — let the caller deal with the last 429. + return response + + +def apply_to_session(session) -> None: + """ + Mount the rate-limit adapter on ``session`` so every MangaBaka call + is automatically throttled. Safe to call multiple times (later mounts + just replace the earlier adapter for the same prefix). + """ + adapter = _MangaBakaRateLimitAdapter() + session.mount("https://api.mangabaka.dev/", adapter) + session.mount("http://api.mangabaka.dev/", adapter) diff --git a/src/MatchesWebApp.py b/src/MatchesWebApp.py index 23b525b..1884da0 100644 --- a/src/MatchesWebApp.py +++ b/src/MatchesWebApp.py @@ -9,25 +9,27 @@ Routes ------ GET / HTML table view (one row per cached match) GET /api/matches JSON dump of the full cache -POST /api/matches Upsert / rename an entry - body: {originalTitle?, title, mangabakaId, - mangabakaName, imageUrl, firstMatchTime?} -POST /api/matches/delete Remove an entry body: {title} -POST /api/build Trigger a full re-scan via SuwayomiMover.build_matches_only - (only available if a mover is wired in) +POST /api/matches Update an entry's mangabakaId + body: {title, mangabakaId} + Server resolves the id against MangaBaka and + refreshes the mangabakaName + imageUrl fields. +POST /api/matches/delete Remove an entry body: {title} +POST /api/build Trigger a full re-scan via + SuwayomiMover.build_matches_only -The Title cell is rendered as a link to MangaBaka's search page, restricted -to the manga / manhwa / manhua types. +The Title cell is rendered as a link to MangaBaka's search page restricted +to the manga / manhwa / manhua types. Only mangabakaId is editable; title +(folder name) and mangabakaName (info only) are read-only. """ from __future__ import annotations import threading -from urllib.parse import quote_plus from flask import Flask, jsonify, request, Response from MatchesCache import MatchesCache +from ComicInfoBuilder import _pick_cover_url _INDEX_HTML = """ @@ -43,35 +45,41 @@ _INDEX_HTML = """ button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; } button.primary { background:#2563eb; border-color:#2563eb; color:white; } button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; } + button:disabled { opacity:.5; cursor:default; } table { border-collapse: collapse; width: 100%; } th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; } th { background: #1d1d1d; text-align: left; position: sticky; top: 0; } + th.sortable { cursor: pointer; user-select: none; } + th.sortable:hover { background:#252525; } + th .arrow { display:inline-block; width:.8em; color:#9ca3af; } tr:nth-child(even) td { background: #161616; } td.image img { max-width: 90px; max-height: 130px; display:block; } - td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; } + td.id input { width: 14rem; padding: .25rem; background:#222; color:#eee; border:1px solid #444; font-family: monospace; } td.title a { color: #60a5fa; text-decoration: none; } td.title a:hover { text-decoration: underline; } td.actions { white-space: nowrap; } .status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; } .dirty td { background: #1f2937 !important; } + .count { color:#9ca3af; font-size:.9rem; margin-left:.5rem; }
-| Title | +Title | mangabakaId | mangabakaName | -firstMatchTime | +firstMatchTime | Image |
|---|