manga matching and WebApp
Build and Deploy / build (push) Successful in 32s
Build and Deploy / deploy (push) Successful in 25s

This commit is contained in:
2026-05-26 20:20:24 +02:00
parent 12edb8a5d7
commit 615bd1b468
9 changed files with 665 additions and 56 deletions
+40 -40
View File
@@ -23,43 +23,43 @@ jobs:
- name: Push Image
run: docker push gitea.johannesbot.de/johannesbot/manga-mover-and-metadata-collector:latest
# deploy:
# needs: build
# runs-on: ubuntu-latest
# steps:
# - name: Checkout
# uses: actions/checkout@v4
#
# - name: Create deployment directory
# uses: appleboy/ssh-action@v1.0.3
# with:
# host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }}
# script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
#
# - name: Copy docker-compose via SCP
# uses: appleboy/scp-action@v0.1.7
# with:
# host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }}
# source: "docker-compose.prod.yml"
# target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
#
# - name: Deploy via SSH
# uses: appleboy/ssh-action@v1.0.3
# with:
# host: ${{ secrets.SSH_HOST }}
# username: ${{ secrets.SSH_USER }}
# password: ${{ secrets.SSH_PASSWORD }}
# port: ${{ secrets.SSH_PORT || 22 }}
# script: |
# cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
# mv docker-compose.prod.yml docker-compose.yml
# echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
# sudo docker compose pull
# sudo docker compose up -d --remove-orphans
# sudo docker image prune -f
deploy:
needs: build
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create deployment directory
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
script: mkdir -p /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
- name: Copy docker-compose via SCP
uses: appleboy/scp-action@v0.1.7
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
source: "docker-compose.prod.yml"
target: "/home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector"
- name: Deploy via SSH
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
password: ${{ secrets.SSH_PASSWORD }}
port: ${{ secrets.SSH_PORT || 22 }}
script: |
cd /home/${{ secrets.SSH_USER }}/manga-mover-and-metadata-collector
mv docker-compose.prod.yml docker-compose.yml
echo "${{ secrets.REGISTRY_PASSWORD }}" | sudo docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
sudo docker compose pull
sudo docker compose up -d --remove-orphans
sudo docker image prune -f
+3 -1
View File
@@ -18,6 +18,8 @@ ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
# Mount points used by main.py defaults
VOLUME ["/mnt/suwayomi", "/mnt/kavita"]
VOLUME ["/mnt/suwayomi", "/mnt/kavita", "/config"]
EXPOSE 8080
CMD ["python", "/app/main.py"]
+5
View File
@@ -9,6 +9,11 @@ services:
LANGUAGE: "${LANGUAGE:-en}"
SETTLE_SECONDS: "${SETTLE_SECONDS:-600}"
DELETE_SOURCE: "${DELETE_SOURCE:-true}"
MATCH_PATH: "${MATCH_PATH:-/config/matches.json}"
WEB_PORT: "${WEB_PORT:-8080}"
ports:
- "${WEB_PORT:-8080}:8080"
volumes:
- "${HOST_SUWAYOMI_PATH}:/mnt/suwayomi"
- "${HOST_KAVITA_PATH}:/mnt/kavita"
- "${HOST_CONFIG_PATH}:/config"
+25 -10
View File
@@ -24,6 +24,9 @@ Environment variables
SETTLE_SECONDS default 600 (10-minute quiet window)
REQUEST_TIMEOUT default 30
DELETE_SOURCE default true (delete source folders after pack)
MATCH_PATH default /config/matches.json
WEB_PORT default 8080 (Flask web UI for matches.json)
WEB_HOST default 0.0.0.0
"""
from __future__ import annotations
@@ -38,6 +41,8 @@ sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
from src.SuwayomiMover import SuwayomiMover # noqa: E402
from src.SuwayomiFolderWatcher import SuwayomiFolderWatcher # noqa: E402
from src.MatchesCache import MatchesCache # noqa: E402
from src.MatchesWebApp import MatchesWebApp # noqa: E402
def _env_str(name: str, default: "str | None" = None,
@@ -77,6 +82,9 @@ def main() -> int:
settle_seconds = _env_int("SETTLE_SECONDS", 600)
request_timeout = _env_int("REQUEST_TIMEOUT", 30)
delete_source = _env_bool("DELETE_SOURCE", True)
match_path = _env_str("MATCH_PATH", "/config/matches.json")
web_host = _env_str("WEB_HOST", "0.0.0.0") or "0.0.0.0"
web_port = _env_int("WEB_PORT", 8080)
print(f"[main] suwayomi = {suwayomi_path}", flush=True)
print(f"[main] kavita = {kavita_path}", flush=True)
@@ -84,6 +92,10 @@ def main() -> int:
print(f"[main] settle = {settle_seconds}s", flush=True)
print(f"[main] language = {language}", flush=True)
print(f"[main] delete src= {delete_source}", flush=True)
print(f"[main] match path= {match_path}", flush=True)
print(f"[main] web = {web_host}:{web_port}", flush=True)
matches_cache = MatchesCache(match_path)
mover = SuwayomiMover(
suwayomi_path, kavita_path,
@@ -92,20 +104,23 @@ def main() -> int:
language=language,
request_timeout=request_timeout,
delete_source=delete_source,
matches_cache=matches_cache,
)
watcher = SuwayomiFolderWatcher(
suwayomi_path, mover, settle_seconds=settle_seconds)
# watcher = SuwayomiFolderWatcher(suwayomi_path, mover, settle_seconds=settle_seconds)
def shutdown(signum, _frame):
print(f"[main] received signal {signum}", flush=True)
watcher.stop()
web_app = MatchesWebApp(matches_cache, mover=mover, host=web_host, port=web_port)
web_app.start()
signal.signal(signal.SIGTERM, shutdown)
signal.signal(signal.SIGINT, shutdown)
watcher.start()
watcher.wait() # blocks until stop() is called via a signal
# def shutdown(signum, _frame):
# print(f"[main] received signal {signum}", flush=True)
# watcher.stop()
#
# signal.signal(signal.SIGTERM, shutdown)
# signal.signal(signal.SIGINT, shutdown)
#
# watcher.start()
# watcher.wait() # blocks until stop() is called via a signal
return 0
+1
View File
@@ -1,3 +1,4 @@
requests>=2.31
Pillow>=10.0
watchdog>=4.0
Flask>=3.0
+35 -3
View File
@@ -48,6 +48,7 @@ from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from MatchesCache import MatchesCache
try:
from PIL import Image
@@ -170,7 +171,8 @@ class ComicInfoBuilder:
volume_resolver: "MangaDexVolumeResolver | None" = None,
works_resolver: "MangaBakaWorksResolver | None" = None,
mal_resolver: "MALResolver | None" = None,
al_resolver: "AniListResolver | None" = None):
al_resolver: "AniListResolver | None" = None,
matches_cache: "MatchesCache | None" = None):
if not manga_title or not str(manga_title).strip():
raise ValueError("manga_title must not be empty.")
@@ -197,6 +199,7 @@ class ComicInfoBuilder:
request_timeout=request_timeout)
self._al_resolver = al_resolver or AniListResolver(
request_timeout=request_timeout)
self._matches_cache = matches_cache
self._metadata: "dict | None" = None
self._pages: list[dict] = []
@@ -353,14 +356,43 @@ class ComicInfoBuilder:
return series
def _search_best_series(self, title: str):
"""Searches for `title` and returns the best matching series entry."""
"""
Resolves `title` to a MangaBaka series.
Lookup order:
1. matches.json cache (if attached) — uses the stored series ID
to fetch the full series, skipping the search step entirely.
2. Fresh MangaBaka search — top hit. The match is persisted to
matches.json before being returned so it survives a crash.
"""
if self._matches_cache is not None:
cached = self._matches_cache.get(title)
if cached and cached.get("mangabakaId"):
try:
return self._fetch_series_by_id(cached["mangabakaId"])
except Exception as exc:
print(f"[ComicInfoBuilder] cached id "
f"{cached['mangabakaId']} for {title!r} failed "
f"({exc}); falling back to fresh search",
flush=True)
url = f"{self.api_base_url}/series/search"
resp = self._session.get(
url, params={"q": title, "page": 1, "limit": 1},
timeout=self.request_timeout)
resp.raise_for_status()
data = resp.json().get("data") or []
return data[0] if data else None
series = data[0] if data else None
if series and self._matches_cache is not None:
self._matches_cache.add(
title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=_pick_cover_url(series.get("cover")),
)
return series
def _fetch_series_by_id(self, series_id) -> dict:
url = f"{self.api_base_url}/series/{series_id}"
+139
View File
@@ -0,0 +1,139 @@
"""
matches_cache.py
================
Persistent JSON cache that maps a Suwayomi/series search title to the
MangaBaka series it was matched against.
Structure on disk::
{
"matches": {
"<search title>": {
"mangabakaId": "12345",
"mangabakaName": "One-Punch Man",
"imageUrl": "https://.../cover.jpg",
"firstMatchTime": 1700000000
},
...
}
}
The cache is consulted by ComicInfoBuilder before issuing a MangaBaka
search request, and is written back to disk on every mutation so a crash
does not lose matches that were resolved in the current run.
"""
from __future__ import annotations
import json
import threading
import time
from pathlib import Path
class MatchesCache:
def __init__(self, path):
self._path = Path(path)
self._lock = threading.RLock()
self._data: dict = {"matches": {}}
self._load()
# ------------------------------------------------------------------
# Public lookup / mutation API
# ------------------------------------------------------------------
def get(self, title: str) -> "dict | None":
with self._lock:
entry = self._data["matches"].get(title)
return dict(entry) if entry else None
def add(self, title: str, *,
mangabaka_id,
mangabaka_name: str,
image_url: "str | None") -> dict:
entry = {
"mangabakaId": str(mangabaka_id) if mangabaka_id is not None else "",
"mangabakaName": mangabaka_name or "",
"imageUrl": image_url or "",
"firstMatchTime": int(time.time()),
}
with self._lock:
self._data["matches"][title] = entry
self._save_unlocked()
return dict(entry)
def upsert(self, title: str, *,
mangabaka_id=None,
mangabaka_name=None,
image_url=None,
first_match_time=None) -> dict:
with self._lock:
entry = self._data["matches"].get(title)
if entry is None:
entry = {
"mangabakaId": "",
"mangabakaName": "",
"imageUrl": "",
"firstMatchTime": int(time.time()),
}
self._data["matches"][title] = entry
if mangabaka_id is not None:
entry["mangabakaId"] = str(mangabaka_id)
if mangabaka_name is not None:
entry["mangabakaName"] = mangabaka_name
if image_url is not None:
entry["imageUrl"] = image_url
if first_match_time is not None:
try:
entry["firstMatchTime"] = int(first_match_time)
except (TypeError, ValueError):
pass
self._save_unlocked()
return dict(entry)
def rename(self, old_title: str, new_title: str) -> bool:
if not new_title or old_title == new_title:
return False
with self._lock:
entry = self._data["matches"].pop(old_title, None)
if entry is None:
return False
self._data["matches"][new_title] = entry
self._save_unlocked()
return True
def remove(self, title: str) -> bool:
with self._lock:
existed = title in self._data["matches"]
if existed:
del self._data["matches"][title]
self._save_unlocked()
return existed
def all(self) -> dict:
with self._lock:
return {"matches": {k: dict(v)
for k, v in self._data["matches"].items()}}
# ------------------------------------------------------------------
# Internal IO
# ------------------------------------------------------------------
def _load(self) -> None:
if not self._path.is_file():
return
try:
with self._path.open("r", encoding="utf-8") as f:
loaded = json.load(f)
except (OSError, json.JSONDecodeError) as exc:
print(f"[MatchesCache] failed to load {self._path}: {exc}",
flush=True)
return
if isinstance(loaded, dict) and isinstance(loaded.get("matches"), dict):
self._data = loaded
def _save_unlocked(self) -> None:
self._path.parent.mkdir(parents=True, exist_ok=True)
tmp = self._path.with_suffix(self._path.suffix + ".tmp")
with tmp.open("w", encoding="utf-8") as f:
json.dump(self._data, f, ensure_ascii=False, indent=2)
tmp.replace(self._path)
+341
View File
@@ -0,0 +1,341 @@
"""
matches_web_app.py
==================
Flask web UI for inspecting and editing the matches.json file produced by
MatchesCache.
Routes
------
GET / HTML table view (one row per cached match)
GET /api/matches JSON dump of the full cache
POST /api/matches Upsert / rename an entry
body: {originalTitle?, title, mangabakaId,
mangabakaName, imageUrl, firstMatchTime?}
POST /api/matches/delete Remove an entry body: {title}
POST /api/build Trigger a full re-scan via SuwayomiMover.build_matches_only
(only available if a mover is wired in)
The Title cell is rendered as a link to MangaBaka's search page, restricted
to the manga / manhwa / manhua types.
"""
from __future__ import annotations
import threading
from urllib.parse import quote_plus
from flask import Flask, jsonify, request, Response
from MatchesCache import MatchesCache
_INDEX_HTML = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>MangaBaka matches</title>
<style>
body { font-family: system-ui, sans-serif; margin: 1.5rem; background: #111; color: #eee; }
h1 { margin: 0 0 1rem; font-size: 1.4rem; }
.bar { display: flex; gap: .5rem; align-items: center; margin-bottom: 1rem; flex-wrap: wrap; }
.bar input[type=search] { padding: .3rem .5rem; min-width: 18rem; background:#222; color:#eee; border:1px solid #444; }
button { padding: .35rem .7rem; cursor: pointer; background:#2a2a2a; color:#eee; border:1px solid #555; }
button.primary { background:#2563eb; border-color:#2563eb; color:white; }
button.danger { background:#7f1d1d; border-color:#7f1d1d; color:white; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #333; padding: .4rem .6rem; vertical-align: top; }
th { background: #1d1d1d; text-align: left; position: sticky; top: 0; }
tr:nth-child(even) td { background: #161616; }
td.image img { max-width: 90px; max-height: 130px; display:block; }
td input { width: 100%; padding: .25rem; background:#222; color:#eee; border:1px solid #444; }
td.title a { color: #60a5fa; text-decoration: none; }
td.title a:hover { text-decoration: underline; }
td.actions { white-space: nowrap; }
.status { margin-left: .5rem; color:#9ca3af; font-size: .9rem; }
.dirty td { background: #1f2937 !important; }
</style>
</head>
<body>
<h1>MangaBaka matches</h1>
<div class="bar">
<input id="filter" type="search" placeholder="Filter by title…">
<button id="reload">Reload</button>
<button id="build" class="primary">Build all (rescan)</button>
<span class="status" id="status"></span>
</div>
<table>
<thead>
<tr>
<th>Title</th>
<th>mangabakaId</th>
<th>mangabakaName</th>
<th>firstMatchTime</th>
<th>Image</th>
<th></th>
</tr>
</thead>
<tbody id="rows"></tbody>
</table>
<script>
const TYPES = "&type=manhwa&type=manhua&type=manga";
function fmtTime(unix) {
if (!unix) return "";
const d = new Date(unix * 1000);
return d.toLocaleString();
}
function searchUrl(title) {
return "https://mangabaka.org/search?q=" + encodeURIComponent(title) + TYPES;
}
function setStatus(msg) { document.getElementById("status").textContent = msg; }
function makeRow(title, e) {
const tr = document.createElement("tr");
tr.dataset.originalTitle = title;
const titleTd = document.createElement("td");
titleTd.className = "title";
const titleLink = document.createElement("a");
titleLink.href = searchUrl(title);
titleLink.target = "_blank";
titleLink.rel = "noopener";
titleLink.textContent = title;
const titleInput = document.createElement("input");
titleInput.value = title;
titleInput.style.marginTop = ".25rem";
titleInput.addEventListener("input", () => {
titleLink.textContent = titleInput.value;
titleLink.href = searchUrl(titleInput.value);
tr.classList.add("dirty");
});
titleTd.append(titleLink, titleInput);
tr.appendChild(titleTd);
function field(value) {
const td = document.createElement("td");
const inp = document.createElement("input");
inp.value = value || "";
inp.addEventListener("input", () => tr.classList.add("dirty"));
td.appendChild(inp);
return [td, inp];
}
const [idTd, idInp] = field(e.mangabakaId);
const [nameTd, nameInp] = field(e.mangabakaName);
const [urlTd, urlInp] = field(e.imageUrl);
tr.appendChild(idTd);
tr.appendChild(nameTd);
const timeTd = document.createElement("td");
timeTd.textContent = fmtTime(e.firstMatchTime);
tr.appendChild(timeTd);
const imgTd = document.createElement("td");
imgTd.className = "image";
const img = document.createElement("img");
img.src = e.imageUrl || "";
img.alt = "";
img.loading = "lazy";
urlInp.addEventListener("input", () => { img.src = urlInp.value; });
imgTd.append(img, urlInp);
tr.appendChild(imgTd);
const actTd = document.createElement("td");
actTd.className = "actions";
const save = document.createElement("button");
save.textContent = "Save";
save.className = "primary";
save.addEventListener("click", async () => {
save.disabled = true;
setStatus("Saving " + titleInput.value + "");
const body = {
originalTitle: tr.dataset.originalTitle,
title: titleInput.value,
mangabakaId: idInp.value,
mangabakaName: nameInp.value,
imageUrl: urlInp.value,
};
try {
const r = await fetch("/api/matches", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
});
if (!r.ok) throw new Error(await r.text());
tr.dataset.originalTitle = titleInput.value;
tr.classList.remove("dirty");
setStatus("Saved " + titleInput.value);
} catch (err) {
setStatus("Save failed: " + err.message);
} finally {
save.disabled = false;
}
});
const del = document.createElement("button");
del.textContent = "Delete";
del.className = "danger";
del.style.marginLeft = ".25rem";
del.addEventListener("click", async () => {
if (!confirm("Delete " + tr.dataset.originalTitle + "?")) return;
setStatus("Deleting " + tr.dataset.originalTitle + "");
try {
const r = await fetch("/api/matches/delete", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ title: tr.dataset.originalTitle }),
});
if (!r.ok) throw new Error(await r.text());
tr.remove();
setStatus("Deleted");
} catch (err) {
setStatus("Delete failed: " + err.message);
}
});
actTd.append(save, del);
tr.appendChild(actTd);
return tr;
}
async function load() {
setStatus("Loading…");
const tbody = document.getElementById("rows");
tbody.innerHTML = "";
try {
const r = await fetch("/api/matches");
const data = await r.json();
const matches = data.matches || {};
const titles = Object.keys(matches).sort((a,b)=>a.localeCompare(b));
for (const t of titles) tbody.appendChild(makeRow(t, matches[t]));
setStatus(titles.length + " entries");
applyFilter();
} catch (err) {
setStatus("Load failed: " + err.message);
}
}
function applyFilter() {
const q = document.getElementById("filter").value.toLowerCase();
for (const tr of document.querySelectorAll("#rows tr")) {
const t = tr.dataset.originalTitle.toLowerCase();
tr.style.display = t.includes(q) ? "" : "none";
}
}
document.getElementById("filter").addEventListener("input", applyFilter);
document.getElementById("reload").addEventListener("click", load);
document.getElementById("build").addEventListener("click", async () => {
if (!confirm("Run full scan? This may take several minutes.")) return;
setStatus("Building… (running on the server)");
try {
const r = await fetch("/api/build", { method: "POST" });
if (!r.ok) throw new Error(await r.text());
setStatus("Build finished");
load();
} catch (err) {
setStatus("Build failed: " + err.message);
}
});
load();
</script>
</body>
</html>
"""
class MatchesWebApp:
"""
Flask app exposing the MatchesCache. `mover` is optional — if provided,
POST /api/build triggers SuwayomiMover.build_matches_only() on a worker
thread.
"""
def __init__(self, cache: MatchesCache, *,
mover=None,
host: str = "0.0.0.0",
port: int = 8080):
self._cache = cache
self._mover = mover
self._host = host
self._port = port
self._build_lock = threading.Lock()
self._app = Flask(__name__)
self._register_routes()
@property
def app(self) -> Flask:
return self._app
def start(self) -> threading.Thread:
"""Starts the Flask server on a daemon thread and returns it."""
thread = threading.Thread(
target=self._app.run,
kwargs={"host": self._host, "port": self._port,
"debug": False, "use_reloader": False,
"threaded": True},
name="MatchesWebApp",
daemon=True,
)
thread.start()
print(f"[MatchesWebApp] listening on {self._host}:{self._port}",
flush=True)
return thread
# ------------------------------------------------------------------
# Routes
# ------------------------------------------------------------------
def _register_routes(self) -> None:
app = self._app
cache = self._cache
@app.get("/")
def index() -> Response:
return Response(_INDEX_HTML, mimetype="text/html; charset=utf-8")
@app.get("/api/matches")
def api_list():
return jsonify(cache.all())
@app.post("/api/matches")
def api_upsert():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
original = (body.get("originalTitle") or "").strip() or title
if original != title:
cache.rename(original, title)
entry = cache.upsert(
title,
mangabaka_id=body.get("mangabakaId"),
mangabaka_name=body.get("mangabakaName"),
image_url=body.get("imageUrl"),
first_match_time=body.get("firstMatchTime"),
)
return jsonify({"title": title, "entry": entry})
@app.post("/api/matches/delete")
def api_delete():
body = request.get_json(silent=True) or {}
title = (body.get("title") or "").strip()
if not title:
return Response("title is required", status=400)
removed = cache.remove(title)
return jsonify({"removed": removed, "title": title})
@app.post("/api/build")
def api_build():
if self._mover is None:
return Response("no mover configured", status=503)
if not self._build_lock.acquire(blocking=False):
return Response("build already running", status=409)
try:
result = self._mover.build_matches_only()
finally:
self._build_lock.release()
return jsonify(result)
+76 -2
View File
@@ -51,12 +51,13 @@ from pathlib import Path
import requests
from ComicInfoBuilder import ComicInfoBuilder
from ComicInfoBuilder import ComicInfoBuilder, _pick_cover_url
from MangadexVolumeResolver import MangaDexVolumeResolver
from MangaBakaWorksResolver import MangaBakaWorksResolver
from MALResolver import MALResolver
from AniListResolver import AniListResolver
from KavitaPersonUpdater import KavitaPersonUpdater
from MatchesCache import MatchesCache
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
@@ -287,12 +288,16 @@ class SuwayomiMover:
kavita_api_key: "str | None" = None,
language: str = "en",
request_timeout: int = 30,
delete_source: bool = True):
delete_source: bool = True,
matches_cache: "MatchesCache | None" = None,
api_base_url: str = "https://api.mangabaka.dev/v1"):
self._src = Path(suwayomi_path)
self._dst = Path(kavita_path)
self._language = language
self._timeout = request_timeout
self._delete_source = delete_source
self._matches_cache = matches_cache
self._api_base_url = api_base_url.rstrip("/")
# Shared HTTP session and resolvers — reused across all series/chapters
# to maximise cache hits and minimise API round-trips.
@@ -357,6 +362,73 @@ class SuwayomiMover:
raise FileNotFoundError(
f"No Suwayomi directory found for '{manga_title}' under {self._src}")
def build_matches_only(self) -> dict:
"""
Walks every series under the Suwayomi root and resolves each one
to a MangaBaka match — nothing else.
For every series:
- Reads the first chapter's ComicInfo.xml to obtain the canonical
Series name (falls back to the folder name).
- Cleans the name (strips source labels) the same way the real
move pipeline does.
- If the title is already in the matches cache, skips it.
- Otherwise issues a MangaBaka search and adds the top hit to
the cache (which is persisted to disk immediately).
Returns the full cache contents as a Python dict.
"""
if self._matches_cache is None:
raise RuntimeError(
"build_matches_only requires a MatchesCache instance")
search_url = f"{self._api_base_url}/series/search"
for source_dir in sorted(self._src.iterdir()):
if not source_dir.is_dir():
continue
for manga_dir in sorted(source_dir.iterdir()):
if not manga_dir.is_dir():
continue
raw_series = manga_dir.name
for chapter_dir in sorted(manga_dir.iterdir(),
key=lambda p: _chapter_sort_key(p.name)):
if chapter_dir.is_dir():
fields = _read_suwayomi_fields(chapter_dir)
if fields.get("Series"):
raw_series = fields["Series"]
break
builder_title = _clean_suwayomi_title(raw_series)
if self._matches_cache.get(builder_title):
print(f"[matches] {builder_title} — cached")
continue
print(f"[matches] {builder_title} — searching")
try:
resp = self._session.get(
search_url,
params={"q": builder_title, "page": 1, "limit": 1},
timeout=self._timeout)
resp.raise_for_status()
data = resp.json().get("data") or []
if not data:
print(f" [warn] no MangaBaka match for {builder_title!r}")
continue
series = data[0]
self._matches_cache.add(
builder_title,
mangabaka_id=series.get("id"),
mangabaka_name=series.get("title") or "",
image_url=_pick_cover_url(series.get("cover")),
)
except Exception as exc:
print(f" [warn] search failed for {builder_title!r}: {exc}")
return self._matches_cache.all()
# ------------------------------------------------------------------
# Internal: series
# ------------------------------------------------------------------
@@ -396,6 +468,7 @@ class SuwayomiMover:
# One builder per series — metadata fetched once, reused for all chapters.
builder = ComicInfoBuilder(
builder_title, chapter=1,
api_base_url=self._api_base_url,
language=self._language,
request_timeout=self._timeout,
session=self._session,
@@ -403,6 +476,7 @@ class SuwayomiMover:
works_resolver=self._works_resolver,
mal_resolver=self._mal,
al_resolver=self._al,
matches_cache=self._matches_cache,
)
# Fetch MangaBaka metadata now to get the canonical title and MAL ID.