manga matching and WebApp
Build and Deploy / build (push) Successful in 32s
Build and Deploy / deploy (push) Successful in 25s

This commit is contained in:
2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
+40 -40
View File
@@ -23,43 +23,43 @@ jobs:
- name: Push Image - name: Push Image
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
# deploy: deploy:
# needs: build needs: build
# runs-on: ubuntu-latest runs-on: ubuntu-latest
# steps: steps:
# - name: Checkout - name: Checkout
# uses: actions/checkout@v4 uses: actions/checkout@v4
#
# - name: Create deployment directory - name: Create deployment directory
# uses: appleboy/ssh-action@v1.0.3 uses: appleboy/ssh-action@v1.0.3
# with: with:
# host: ${{ secrets.SSH_HOST }} host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }} username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }} password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }} port: ${{ secrets.SSH_PORT || 22 }}
# script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
#
# - name: Copy docker-compose via SCP - name: Copy docker-compose via SCP
# uses: appleboy/scp-action@v0.1.7 uses: appleboy/scp-action@v0.1.7
# with: with:
# host: ${{ secrets.SSH_HOST }} host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }} username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }} password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }} port: ${{ secrets.SSH_PORT || 22 }}
# source: "docker-compose.prod.yml" source: "docker-compose.prod.yml"
# target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector" target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
#
# - name: Deploy via SSH - name: Deploy via SSH
# uses: appleboy/ssh-action@v1.0.3 uses: appleboy/ssh-action@v1.0.3
# with: with:
# host: ${{ secrets.SSH_HOST }} host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }} username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }} password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }} port: ${{ secrets.SSH_PORT || 22 }}
# script: | script: |
# cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
# mv docker-compose.prod.yml docker-compose.yml mv docker-compose.prod.yml docker-compose.yml
# echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
# sudo docker compose pull sudo docker compose pull
# sudo docker compose up -d --remove-orphans sudo docker compose up -d --remove-orphans
# sudo docker image prune -f sudo docker image prune -f
+3 -1
View File
@@ -18,6 +18,8 @@ ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 PYTHONDONTWRITEBYTECODE=1
# Mount points used by main.py defaults # Mount points used by main.py defaults
VOLUME ["/mnt/suwayomi", "/mnt/kavita"] VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
EXPOSE 8080
CMD ["python", "/app/main.py"] CMD ["python", "/app/main.py"]
+5
View File
@@ -9,6 +9,11 @@ services:
LANGUAGE: "${LANGUAGE:-en}" LANGUAGE: "${LANGUAGE:-en}"
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}" SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}" DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
WEB_PORT: "${WEB_PORT:-8080}"
ports:
- "${WEB_PORT:-8080}:8080"
volumes: volumes:
- "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi" - "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
- "${HOST_KAVITA_PATH}:/mnt/kavita" - "${HOST_KAVITA_PATH}:/mnt/kavita"
- "${HOST_CONFIG_PATH}:/config"
+25 -10
View File
@@ -24,6 +24,9 @@ Environment variables
SETTLE_SECONDS default 600 (10-minute quiet window) SETTLE_SECONDS default 600 (10-minute quiet window)
REQUEST_TIMEOUT default 30 REQUEST_TIMEOUT default 30
DELETE_SOURCE default true (delete source folders after pack) DELETE_SOURCE default true (delete source folders after pack)
MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0
""" """
from __future__ import annotations from __future__ import annotations
@@ -38,6 +41,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
from src.SuwayomiMover import SuwayomiMover # noqa: E402 from src.SuwayomiMover import SuwayomiMover # noqa: E402
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402 from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
from src.MatchesCache import MatchesCache # noqa: E402
from src.MatchesWebApp import MatchesWebApp # noqa: E402
def _env_str(name: str, default: "str | None" = None, def _env_str(name: str, default: "str | None" = None,
@@ -77,6 +82,9 @@ def main() -> int:
settle_seconds = _env_int("SETTLE_SECONDS", 600) settle_seconds = _env_int("SETTLE_SECONDS", 600)
request_timeout = _env_int("REQUEST_TIMEOUT", 30) request_timeout = _env_int("REQUEST_TIMEOUT", 30)
delete_source = _env_bool("DELETE_SOURCE", True) delete_source = _env_bool("DELETE_SOURCE", True)
match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
print(f"[main] suwayomi = {suwayomi_path}", flush=True) print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True) print(f"[main] kavita = {kavita_path}", flush=True)
@@ -84,6 +92,10 @@ def main() -> int:
print(f"[main] settle = {settle_seconds}s", flush=True) print(f"[main] settle = {settle_seconds}s", flush=True)
print(f"[main] language = {language}", flush=True) print(f"[main] language = {language}", flush=True)
print(f"[main] delete src= {delete_source}", flush=True) print(f"[main] delete src= {delete_source}", flush=True)
print(f"[main] match path= {match_path}", flush=True)
print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path)
mover = SuwayomiMover( mover = SuwayomiMover(
suwayomi_path, kavita_path, suwayomi_path, kavita_path,
@@ -92,20 +104,23 @@ def main() -> int:
language=language, language=language,
request_timeout=request_timeout, request_timeout=request_timeout,
delete_source=delete_source, delete_source=delete_source,
matches_cache=matches_cache,
) )
watcher = SuwayomiFolderWatcher( # watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
suwayomi_path, mover, settle_seconds=settle_seconds)
def shutdown(signum, _frame): web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
print(f"[main] received signal {signum}", flush=True) web_app.start()
watcher.stop()
signal.signal(signal.SIGTERM, shutdown) # def shutdown(signum, _frame):
signal.signal(signal.SIGINT, shutdown) # print(f"[main] received signal {signum}", flush=True)
# watcher.stop()
watcher.start() #
watcher.wait() # blocks until stop() is called via a signal # signal.signal(signal.SIGTERM, shutdown)
# signal.signal(signal.SIGINT, shutdown)
#
# watcher.start()
# watcher.wait() # blocks until stop() is called via a signal
return 0 return 0
+1
View File
@@ -1,3 +1,4 @@
requests>=2.31 requests>=2.31
Pillow>=10.0 Pillow>=10.0
watchdog>=4.0 watchdog>=4.0
Flask>=3.0
+35 -3
View File
@@ -48,6 +48,7 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
try: try:
from PIL import Image from PIL import Image
@@ -170,7 +171,8 @@ class ComicInfoBuilder:
volume_resolver: "MangaDexVolumeResolver | None" = None, volume_resolver: "MangaDexVolumeResolver | None" = None,
works_resolver: "MangaBakaWorksResolver | None" = None, works_resolver: "MangaBakaWorksResolver | None" = None,
mal_resolver: "MALResolver | None" = None, mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None): al_resolver: "AniListResolver | None" = None,
matches_cache: "MatchesCache | None" = None):
if not manga_title or not str(manga_title).strip(): if not manga_title or not str(manga_title).strip():
raise ValueError("manga_title must not be empty.") raise ValueError("manga_title must not be empty.")
@@ -197,6 +199,7 @@ class ComicInfoBuilder:
request_timeout=request_timeout) request_timeout=request_timeout)
self._al_resolver = al_resolver or AniListResolver( self._al_resolver = al_resolver or AniListResolver(
request_timeout=request_timeout) request_timeout=request_timeout)
self._matches_cache = matches_cache
self._metadata: "dict | None" = None self._metadata: "dict | None" = None
self._pages: list[dict] = [] self._pages: list[dict] = []
@@ -353,14 +356,43 @@ class ComicInfoBuilder:
return series return series
def _search_best_series(self, title: str): def _search_best_series(self, title: str):
"""Searches for `title` and returns the best matching series entry.""" """
Resolves `title` to a MangaBaka series.
Lookup order:
1. matches.json cache (if attached) — uses the stored series ID
to fetch the full series, skipping the search step entirely.
2. Fresh MangaBaka search — top hit. The match is persisted to
matches.json before being returned so it survives a crash.
"""
if self._matches_cache is not None:
cached = self._matches_cache.get(title)
if cached and cached.get("mangabakaId"):
try:
return self._fetch_series_by_id(cached["mangabakaId"])
except Exception as exc:
print(f"[ComicInfoBuilder] cached id "
f"{cached['mangabakaId']} for {title!r} failed "
f"({exc}); falling back to fresh search",
flush=True)
url = f"{self.api_base_url}/series/search" url = f"{self.api_base_url}/series/search"
resp = self._session.get( resp = self._session.get(
url, params={"q": title, "page": 1, "limit": 1}, url, params={"q": title, "page": 1, "limit": 1},
timeout=self.request_timeout) timeout=self.request_timeout)
resp.raise_for_status() resp.raise_for_status()
data = resp.json().get("data") or [] data = resp.json().get("data") or []
return data[0] if data else None series = data[0] if data else None
if series and self._matches_cache is not None:
self._matches_cache.add(
title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=_pick_cover_url(series.get("cover")),
)
return series
def _fetch_series_by_id(self, series_id) -> dict: def _fetch_series_by_id(self, series_id) -> dict:
url = f"{self.api_base_url}/series/{series_id}" url = f"{self.api_base_url}/series/{series_id}"
+139
View File
@@ -0,0 +1,139 @@
"""
matches_cache.py
================
Persistent JSON cache that maps a Suwayomi/series search title to the
MangaBaka series it was matched against.
Structure on disk::
{
"matches": {
"<search title>": {
"mangabakaId": "12345",
"mangabakaName": "One-Punch Man",
"imageUrl": "https://.../cover.jpg",
"firstMatchTime": 1700000000
},
...
}
}
The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
search request, and is written back to disk on every mutation so a crash
does not lose matches that were resolved in the current run.
"""
from __future__ import annotations
import json
import threading
import time
from pathlib import Path
class MatchesCache:
def __init__(self, path):
self._path = Path(path)
self._lock = threading.RLock()
self._data: dict = {"matches": {}}
self._load()
# ------------------------------------------------------------------
# Public lookup / mutation API
# ------------------------------------------------------------------
def get(self, title: str) -> "dict | None":
with self._lock:
entry = self._data["matches"].get(title)
return dict(entry) if entry else None
def add(self, title: str, *,
mangabaka_id,
mangabaka_name: str,
image_url: "str | None") -> dict:
entry = {
"mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "",
"mangabakaName": mangabaka_name or "",
"imageUrl": image_url or "",
"firstMatchTime": int(time.time()),
}
with self._lock:
self._data["matches"][title] = entry
self._save_unlocked()
return dict(entry)
def upsert(self, title: str, *,
mangabaka_id=None,
mangabaka_name=None,
image_url=None,
first_match_time=None) -> dict:
with self._lock:
entry = self._data["matches"].get(title)
if entry is None:
entry = {
"mangabakaId": "",
"mangabakaName": "",
"imageUrl": "",
"firstMatchTime": int(time.time()),
}
self._data["matches"][title] = entry
if mangabaka_id is not None:
entry["mangabakaId"] = str(mangabaka_id)
if mangabaka_name is not None:
entry["mangabakaName"] = mangabaka_name
if image_url is not None:
entry["imageUrl"] = image_url
if first_match_time is not None:
try:
entry["firstMatchTime"] = int(first_match_time)
except (TypeError, ValueError):
pass
self._save_unlocked()
return dict(entry)
def rename(self, old_title: str, new_title: str) -> bool:
if not new_title or old_title == new_title:
return False
with self._lock:
entry = self._data["matches"].pop(old_title, None)
if entry is None:
return False
self._data["matches"][new_title] = entry
self._save_unlocked()
return True
def remove(self, title: str) -> bool:
with self._lock:
existed = title in self._data["matches"]
if existed:
del self._data["matches"][title]
self._save_unlocked()
return existed
def all(self) -> dict:
with self._lock:
return {"matches": {k: dict(v)
for k, v in self._data["matches"].items()}}
# ------------------------------------------------------------------
# Internal IO
# ------------------------------------------------------------------
def _load(self) -> None:
if not self._path.is_file():
return
try:
with self._path.open("r", encoding="utf-8") as f:
loaded = json.load(f)
except (OSError, json.JSONDecodeError) as exc:
print(f"[MatchesCache] failed to load {self._path}: {exc}",
flush=True)
return
if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
self._data = loaded
def _save_unlocked(self) -> None:
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(self._data, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+341
View File
@@ -0,0 +1,341 @@
"""
matches_web_app.py
==================
Flask web UI for inspecting and editing the matches.json file produced by
MatchesCache.
Routes
------
GET / HTML table view (one row per cached match)
GET /api/matches JSON dump of the full cache
POST /api/matches Upsert / rename an entry
body: {originalTitle?, title, mangabakaId,
mangabakaName, imageUrl, firstMatchTime?}
POST /api/matches/delete Remove an entry body: {title}
POST /api/build Trigger a full re-scan via SuwayomiMover.build_matches_only
(only available if a mover is wired in)
The Title cell is rendered as a link to MangaBaka's search page, restricted
to the manga / manhwa / manhua types.
"""
from __future__ import annotations
import threading
from urllib.parse import quote_plus
from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
_INDEX_HTML = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>MangaBaka matches</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
tr:nth-child(even) td { background: #161616; }
td.image img { max-width: 90px; max-height: 130px; display:block; }
td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
td.title a { color: #60a5fa; text-decoration: none; }
td.title a:hover { text-decoration: underline; }
td.actions { white-space: nowrap; }
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
.dirty td { background: #1f2937 !important; }
</style>
</head>
<body>
<h1>MangaBaka matches</h1>
<div class="bar">
<input id="filter" type="search" placeholder="Filter by title…">
<button id="reload">Reload</button>
<button id="build" class="primary">Build all (rescan)</button>
<span class="status" id="status"></span>
</div>
<table>
<thead>
<tr>
<th>Title</th>
<th>mangabakaId</th>
<th>mangabakaName</th>
<th>firstMatchTime</th>
<th>Image</th>
<th></th>
</tr>
</thead>
<tbody id="rows"></tbody>
</table>
<script>
const TYPES = "&type=manhwa&type=manhua&type=manga";
function fmtTime(unix) {
if (!unix) return "";
const d = new Date(unix * 1000);
return d.toLocaleString();
}
function searchUrl(title) {
return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
}
function setStatus(msg) { document.getElementById("status").textContent = msg; }
function makeRow(title, e) {
const tr = document.createElement("tr");
tr.dataset.originalTitle = title;
const titleTd = document.createElement("td");
titleTd.className = "title";
const titleLink = document.createElement("a");
titleLink.href = searchUrl(title);
titleLink.target = "_blank";
titleLink.rel = "noopener";
titleLink.textContent = title;
const titleInput = document.createElement("input");
titleInput.value = title;
titleInput.style.marginTop = ".25rem";
titleInput.addEventListener("input", () => {
titleLink.textContent = titleInput.value;
titleLink.href = searchUrl(titleInput.value);
tr.classList.add("dirty");
});
titleTd.append(titleLink, titleInput);
tr.appendChild(titleTd);
function field(value) {
const td = document.createElement("td");
const inp = document.createElement("input");
inp.value = value || "";
inp.addEventListener("input", () => tr.classList.add("dirty"));
td.appendChild(inp);
return [td, inp];
}
const [idTd, idInp] = field(e.mangabakaId);
const [nameTd, nameInp] = field(e.mangabakaName);
const [urlTd, urlInp] = field(e.imageUrl);
tr.appendChild(idTd);
tr.appendChild(nameTd);
const timeTd = document.createElement("td");
timeTd.textContent = fmtTime(e.firstMatchTime);
tr.appendChild(timeTd);
const imgTd = document.createElement("td");
imgTd.className = "image";
const img = document.createElement("img");
img.src = e.imageUrl || "";
img.alt = "";
img.loading = "lazy";
urlInp.addEventListener("input", () => { img.src = urlInp.value; });
imgTd.append(img, urlInp);
tr.appendChild(imgTd);
const actTd = document.createElement("td");
actTd.className = "actions";
const save = document.createElement("button");
save.textContent = "Save";
save.className = "primary";
save.addEventListener("click", async () => {
save.disabled = true;
setStatus("Saving " + titleInput.value + "");
const body = {
originalTitle: tr.dataset.originalTitle,
title: titleInput.value,
mangabakaId: idInp.value,
mangabakaName: nameInp.value,
imageUrl: urlInp.value,
};
try {
const r = await fetch("/api/matches", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
});
if (!r.ok) throw new Error(await r.text());
tr.dataset.originalTitle = titleInput.value;
tr.classList.remove("dirty");
setStatus("Saved " + titleInput.value);
} catch (err) {
setStatus("Save failed: " + err.message);
} finally {
save.disabled = false;
}
});
const del = document.createElement("button");
del.textContent = "Delete";
del.className = "danger";
del.style.marginLeft = ".25rem";
del.addEventListener("click", async () => {
if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
setStatus("Deleting " + tr.dataset.originalTitle + "");
try {
const r = await fetch("/api/matches/delete", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ title: tr.dataset.originalTitle }),
});
if (!r.ok) throw new Error(await r.text());
tr.remove();
setStatus("Deleted");
} catch (err) {
setStatus("Delete failed: " + err.message);
}
});
actTd.append(save, del);
tr.appendChild(actTd);
return tr;
}
async function load() {
setStatus("Loading…");
const tbody = document.getElementById("rows");
tbody.innerHTML = "";
try {
const r = await fetch("/api/matches");
const data = await r.json();
const matches = data.matches || {};
const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
setStatus(titles.length + " entries");
applyFilter();
} catch (err) {
setStatus("Load failed: " + err.message);
}
}
function applyFilter() {
const q = document.getElementById("filter").value.toLowerCase();
for (const tr of document.querySelectorAll("#rows tr")) {
const t = tr.dataset.originalTitle.toLowerCase();
tr.style.display = t.includes(q) ? "" : "none";
}
}
document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("reload").addEventListener("click", load);
document.getElementById("build").addEventListener("click", async () => {
if (!confirm("Run full scan? This may take several minutes.")) return;
setStatus("Building… (running on the server)");
try {
const r = await fetch("/api/build", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
setStatus("Build finished");
load();
} catch (err) {
setStatus("Build failed: " + err.message);
}
});
load();
</script>
</body>
</html>
"""
class MatchesWebApp:
"""
Flask app exposing the MatchesCache. `mover` is optional — if provided,
POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
thread.
"""
def __init__(self, cache: MatchesCache, *,
mover=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._mover = mover
self._host = host
self._port = port
self._build_lock = threading.Lock()
self._app = Flask(__name__)
self._register_routes()
@property
def app(self) -> Flask:
return self._app
def start(self) -> threading.Thread:
"""Starts the Flask server on a daemon thread and returns it."""
thread = threading.Thread(
target=self._app.run,
kwargs={"host": self._host, "port": self._port,
"debug": False, "use_reloader": False,
"threaded": True},
name="MatchesWebApp",
daemon=True,
)
thread.start()
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
flush=True)
return thread
# ------------------------------------------------------------------
# Routes
# ------------------------------------------------------------------
def _register_routes(self) -> None:
app = self._app
cache = self._cache
@app.get("/")
def index() -> Response:
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
@app.get("/api/matches")
def api_list():
return jsonify(cache.all())
@app.post("/api/matches")
def api_upsert():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
original = (body.get("originalTitle") or "").strip() or title
if original != title:
cache.rename(original, title)
entry = cache.upsert(
title,
mangabaka_id=body.get("mangabakaId"),
mangabaka_name=body.get("mangabakaName"),
image_url=body.get("imageUrl"),
first_match_time=body.get("firstMatchTime"),
)
return jsonify({"title": title, "entry": entry})
@app.post("/api/matches/delete")
def api_delete():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
removed = cache.remove(title)
return jsonify({"removed": removed, "title": title})
@app.post("/api/build")
def api_build():
if self._mover is None:
return Response("no mover configured", status=503)
if not self._build_lock.acquire(blocking=False):
return Response("build already running", status=409)
try:
result = self._mover.build_matches_only()
finally:
self._build_lock.release()
return jsonify(result)
+76 -2
View File
@@ -51,12 +51,13 @@ from pathlib import Path
import requests import requests
from ComicInfoBuilder import ComicInfoBuilder from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
from MangadexVolumeResolver import MangaDexVolumeResolver from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver from MALResolver import MALResolver
from AniListResolver import AniListResolver from AniListResolver import AniListResolver
from KavitaPersonUpdater import KavitaPersonUpdater from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"} _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
kavita_api_key: "str | None" = None, kavita_api_key: "str | None" = None,
language: str = "en", language: str = "en",
request_timeout: int = 30, request_timeout: int = 30,
delete_source: bool = True): delete_source: bool = True,
matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1"):
self._src = Path(suwayomi_path) self._src = Path(suwayomi_path)
self._dst = Path(kavita_path) self._dst = Path(kavita_path)
self._language = language self._language = language
self._timeout = request_timeout self._timeout = request_timeout
self._delete_source = delete_source self._delete_source = delete_source
self._matches_cache = matches_cache
self._api_base_url = api_base_url.rstrip("/")
# Shared HTTP session and resolvers — reused across all series/chapters # Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips. # to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
raise FileNotFoundError( raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}") f"No Suwayomi directory found for '{manga_title}' under {self._src}")
def build_matches_only(self) -> dict:
"""
Walks every series under the Suwayomi root and resolves each one
to a MangaBaka match — nothing else.
For every series:
- Reads the first chapter's ComicInfo.xml to obtain the canonical
Series name (falls back to the folder name).
- Cleans the name (strips source labels) the same way the real
move pipeline does.
- If the title is already in the matches cache, skips it.
- Otherwise issues a MangaBaka search and adds the top hit to
the cache (which is persisted to disk immediately).
Returns the full cache contents as a Python dict.
"""
if self._matches_cache is None:
raise RuntimeError(
"build_matches_only requires a MatchesCache instance")
search_url = f"{self._api_base_url}/series/search"
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
continue
raw_series = manga_dir.name
for chapter_dir in sorted(manga_dir.iterdir(),
key=lambda p: _chapter_sort_key(p.name)):
if chapter_dir.is_dir():
fields = _read_suwayomi_fields(chapter_dir)
if fields.get("Series"):
raw_series = fields["Series"]
break
builder_title = _clean_suwayomi_title(raw_series)
if self._matches_cache.get(builder_title):
print(f"[matches] {builder_title} — cached")
continue
print(f"[matches] {builder_title} — searching")
try:
resp = self._session.get(
search_url,
params={"q": builder_title, "page": 1, "limit": 1},
timeout=self._timeout)
resp.raise_for_status()
data = resp.json().get("data") or []
if not data:
print(f" [warn] no MangaBaka match for {builder_title!r}")
continue
series = data[0]
self._matches_cache.add(
builder_title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=_pick_cover_url(series.get("cover")),
)
except Exception as exc:
print(f" [warn] search failed for {builder_title!r}: {exc}")
return self._matches_cache.all()
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Internal: series # Internal: series
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
# One builder per series — metadata fetched once, reused for all chapters. # One builder per series — metadata fetched once, reused for all chapters.
builder = ComicInfoBuilder( builder = ComicInfoBuilder(
builder_title, chapter=1, builder_title, chapter=1,
api_base_url=self._api_base_url,
language=self._language, language=self._language,
request_timeout=self._timeout, request_timeout=self._timeout,
session=self._session, session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
works_resolver=self._works_resolver, works_resolver=self._works_resolver,
mal_resolver=self._mal, mal_resolver=self._mal,
al_resolver=self._al, al_resolver=self._al,
matches_cache=self._matches_cache,
) )
# Fetch MangaBaka metadata now to get the canonical title and MAL ID. # Fetch MangaBaka metadata now to get the canonical title and MAL ID.