From 06d6354d29ae42aebb57d53ae91e8da273c4f9601eaff42d9ba9b6ebcca3b18b Mon Sep 17 00:00:00 2001
From: JohannesBOT <maxb12032005@gmail.com>
Date: Fri, 12 Jun 2026 09:53:25 +0200
Subject: [PATCH] improvements

---
 .gitea/workflows/release.yml     |  34 ++++++
 main.py                          |  11 +-
 src/AniListResolver.py           |  19 +--
 src/KavitaClient.py              |  61 ++++++++--
 src/KavitaPersonUpdater.py       | 201 +++++--------------------------
 src/LightNovelMetadataBuilder.py |  47 ++++----
 src/LightNovelOrchestrator.py    |   3 +-
 src/MALResolver.py               |  19 +--
 src/MatchesCache.py              |  28 ++---
 src/MatchesWebApp.py             |  25 ++--
 src/TextUtils.py                 |  45 +++++++
 11 files changed, 234 insertions(+), 259 deletions(-)
 create mode 100644 .gitea/workflows/release.yml
 create mode 100644 src/TextUtils.py

diff --git a/.gitea/workflows/release.yml b/.gitea/workflows/release.yml
new file mode 100644
index 0000000..83c6d07
--- /dev/null
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,34 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - "v[0-9]+.[0-9]+.[0-9]+"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        run: |
+          git clone ${{ github.server_url }}/${{ github.repository }}.git .
+          git checkout ${{ github.sha }}
+
+      - name: Login to Gitea Registry
+        run: |
+          echo "${{ secrets.REGISTRY_PASSWORD }}" | \
+          docker login https://gitea.johannesbot.de -u ${{ secrets.REGISTRY_USER }} --password-stdin
+
+      - name: Build Image
+        run: |
+          VERSION="${GITHUB_REF_NAME#v}"
+          docker build \
+            -t gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${VERSION} \
+            -t gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${GITHUB_REF_NAME} \
+            .
+
+      - name: Push Image
+        run: |
+          VERSION="${GITHUB_REF_NAME#v}"
+          docker push gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${VERSION}
+          docker push gitea.johannesbot.de/johannesbot/kavita-lightnovel-metadata-fetcher:${GITHUB_REF_NAME}
diff --git a/main.py b/main.py
index 5fdf5d0..584c53d 100644
--- a/main.py
+++ b/main.py
@@ -38,12 +38,15 @@ try:
 except ImportError:
     pass
 
-# Make src/ importable when running as `python main.py`.
+# Make src/ importable when running as `python main.py`.  Import the
+# modules by their plain names (not `src.X`) so they are the same module
+# objects the src-internal imports resolve to — `src.X` would load every
+# module twice under two names.
 sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
 
-from src.MatchesCache import MatchesCache                       # noqa: E402
-from src.LightNovelOrchestrator import LightNovelOrchestrator   # noqa: E402
-from src.MatchesWebApp import MatchesWebApp                     # noqa: E402
+from MatchesCache import MatchesCache                       # noqa: E402
+from LightNovelOrchestrator import LightNovelOrchestrator   # noqa: E402
+from MatchesWebApp import MatchesWebApp                     # noqa: E402
 
 
 def _env_str(name: str, default: "str | None" = None,
diff --git a/src/AniListResolver.py b/src/AniListResolver.py
index 10591c8..9725e60 100644
--- a/src/AniListResolver.py
+++ b/src/AniListResolver.py
@@ -32,12 +32,12 @@ Dependencies
 from __future__ import annotations
 
 import datetime
-import difflib
 import time
 
 import requests
 
 from MediaResolver import MediaResolver
+from TextUtils import best_similarity
 
 
 # --------------------------------------------------------------------------
@@ -469,18 +469,11 @@ class AniListResolver(MediaResolver):
 def _score_title(query: str, entry: dict) -> float:
     """Returns the best title-similarity score for an AniList media entry."""
     title_obj = entry.get("title") or {}
-    candidates = [
-        title_obj.get("romaji") or "",
-        title_obj.get("english") or "",
-        title_obj.get("native") or "",
-    ]
-    best = 0.0
-    q = query.lower()
-    for t in candidates:
-        if t:
-            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
-            best = max(best, ratio)
-    return best
+    return best_similarity(query, (
+        title_obj.get("romaji"),
+        title_obj.get("english"),
+        title_obj.get("native"),
+    ))
 
 
 # --------------------------------------------------------------------------
diff --git a/src/KavitaClient.py b/src/KavitaClient.py
index fd0e77a..11463a1 100644
--- a/src/KavitaClient.py
+++ b/src/KavitaClient.py
@@ -194,25 +194,56 @@ class KavitaClient:
             return {}
 
     # ------------------------------------------------------------------
-    # Series cover upload
+    # Persons
+    # ------------------------------------------------------------------
+    def search_persons(self, name: str) -> list[dict]:
+        """Returns PersonDto entries matching `name` (Kavita's own search)."""
+        r = self._session.get(
+            f"{self._base}/api/Person/search",
+            params={"queryString": name}, timeout=self._timeout)
+        r.raise_for_status()
+        return r.json() or []
+
+    def update_person(self, payload: dict) -> None:
+        """Writes a person record (malId, aniListId, description, …)."""
+        r = self._session.post(f"{self._base}/api/Person/update",
+                               json=payload, timeout=self._timeout)
+        r.raise_for_status()
+
+    # ------------------------------------------------------------------
+    # Cover uploads
     # ------------------------------------------------------------------
     def upload_series_cover(self, series_id: int, image_url: str, *,
                             lock: bool = False) -> None:
-        """
-        Downloads an external image and uploads it as the series cover.
+        """Downloads an external image and uploads it as the series cover."""
+        self._upload_cover("/api/Upload/series", series_id, image_url, lock)
 
-        Mirrors the cover-upload trick used in KavitaPersonUpdater:
-        Kavita's `/api/Upload/series` accepts a raw base64 blob (no
-        ``data:`` prefix) in the ``url`` field.
+    def upload_person_cover(self, person_id: int, image_url: str, *,
+                            lock: bool = False) -> None:
+        """Downloads an external image and uploads it as a person cover."""
+        self._upload_cover("/api/Upload/person", person_id, image_url, lock)
+
+    def _upload_cover(self, endpoint: str, entity_id: int,
+                      image_url: str, lock: bool) -> None:
+        """
+        Shared cover-upload path.  Kavita's upload endpoints accept a raw
+        base64 blob (no ``data:`` prefix) in the ``url`` field — a data
+        URI or the two-step upload-by-url flow are rejected with HTTP 400
+        (verified against Kavita 0.9.0.2).
         """
         img = self._image_session.get(image_url, timeout=self._timeout)
         img.raise_for_status()
         b64 = base64.b64encode(img.content).decode()
         r = self._session.post(
-            f"{self._base}/api/Upload/series",
-            json={"id": series_id, "url": b64, "lockCover": lock},
+            f"{self._base}{endpoint}",
+            json={"id": entity_id, "url": b64, "lockCover": lock},
             timeout=self._timeout)
-        r.raise_for_status()
+        if r.status_code >= 400:
+            # Include the body excerpt — Kavita's upload errors carry the
+            # actual reason there, not in the status line.
+            raise requests.HTTPError(
+                f"{endpoint} HTTP {r.status_code}: {_short_body(r)}",
+                response=r)
 
     # ------------------------------------------------------------------
     # Generic GET helper (used by callers that need a response object)
@@ -227,3 +258,15 @@ class KavitaClient:
         return self._session.post(f"{self._base}{path}",
                                   json=json, params=params,
                                   timeout=self._timeout)
+
+
+def _short_body(resp: requests.Response, limit: int = 400) -> str:
+    """Returns the response body trimmed to `limit` chars for error messages."""
+    try:
+        text = resp.text or ""
+    except Exception:
+        return "<unreadable response body>"
+    text = text.strip().replace("\n", " ").replace("\r", " ")
+    if len(text) > limit:
+        text = text[:limit] + "…"
+    return text or "<empty body>"
diff --git a/src/KavitaPersonUpdater.py b/src/KavitaPersonUpdater.py
index a1fbee1..1a65692 100644
--- a/src/KavitaPersonUpdater.py
+++ b/src/KavitaPersonUpdater.py
@@ -15,46 +15,22 @@ the updater:
      an 'about' text (requires an extra Jikan request per character; only
      performed when update_descriptions=True).
 
-Kavita API version
-------------------
+All HTTP traffic to Kavita goes through the shared :class:`KavitaClient`
+(`/api/Person/search`, `/api/Person/update`, `/api/Upload/person`).
+
 Tested against Kavita 0.9.0.2.
-
-Authentication
---------------
-Uses the `x-api-key` header (API key from Kavita user settings).
-No JWT login is required.
-
-Relevant endpoints (Kavita 0.9.0.2)
--------------------------------------
-  GET  /api/Person/search       find persons by name / alias
-  POST /api/Person/update       write metadata (malId, description, …)
-  POST /api/Upload/person       set cover image (base64 data URI)
-  POST /api/Upload/upload-by-url  download an external URL to temp storage
-                                  (used as an alternative upload path)
-
-Cover upload flow
------------------
-The image is downloaded locally, base64-encoded, and sent as a data URI
-to POST /api/Upload/person.  This is more reliable than the
-upload-by-url → upload/person two-step because it avoids Kavita's temp
-file handling (which had known issues in 0.8.x – 0.9.x, GitHub #3900).
-
-Dependencies
-------------
-    requests    ->  pip install requests
 """
 
 from __future__ import annotations
 
-import base64
 import datetime
-import difflib
-import re
 
 import requests
 
+from KavitaClient import KavitaClient
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
+from TextUtils import best_similarity, paragraphs_to_html
 
 
 class KavitaPersonUpdater:
@@ -63,41 +39,22 @@ class KavitaPersonUpdater:
 
     Parameters
     ----------
-    kavita_base_url : Base URL of the Kavita server, e.g. "http://192.168.2.2:5000"
-    api_key         : Kavita API key (Settings → User → API key)
+    client          : Shared KavitaClient (session, auth, cover uploads)
     mal_resolver    : Shared MALResolver singleton (created automatically if omitted)
-    request_timeout : HTTP timeout in seconds for both Kavita and image requests
+    al_resolver     : Shared AniListResolver singleton (created automatically if omitted)
     min_name_score  : Minimum difflib similarity ratio (0–1) required to accept a
                       Kavita person as a match for a MAL name.  Default 0.80.
     """
 
-    def __init__(self, kavita_base_url: str, api_key: str, *,
+    def __init__(self, client: KavitaClient, *,
                  mal_resolver: "MALResolver | None" = None,
                  al_resolver: "AniListResolver | None" = None,
-                 request_timeout: int = 30,
                  min_name_score: float = 0.80):
-        self._base = kavita_base_url.rstrip("/")
-        self._timeout = request_timeout
+        self._client = client
         self._min_score = min_name_score
         self._mal = mal_resolver or MALResolver()
         self._al  = al_resolver  or AniListResolver()
 
-        # Session used for Kavita API calls.
-        self._session = requests.Session()
-        self._session.headers.update({
-            "x-api-key": api_key,
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        })
-
-        # Plain session used to download external images (MAL CDN etc.).
-        # Must NOT carry the Kavita API headers — Accept: application/json
-        # would prevent MAL CDN from returning the image bytes.
-        self._image_session = requests.Session()
-        self._image_session.headers.update({
-            "User-Agent": "KavitaPersonUpdater/1.0",
-        })
-
         # Cache: normalised name -> list of PersonDto dicts (best matches first)
         self._person_search_cache: dict[str, list[dict]] = {}
 
@@ -230,29 +187,17 @@ class KavitaPersonUpdater:
             return self._person_search_cache[key]
 
         try:
-            resp = self._session.get(
-                f"{self._base}/api/Person/search",
-                params={"queryString": name},
-                timeout=self._timeout,
-            )
-            resp.raise_for_status()
-            persons: list[dict] = resp.json() or []
+            persons = self._client.search_persons(name)
         except requests.RequestException:
             self._person_search_cache[key] = []
             return []
 
-        def score(p: dict) -> float:
-            candidates = [p.get("name") or ""]
-            candidates += [a for a in (p.get("aliases") or []) if a]
-            best = 0.0
-            q = key
-            for c in candidates:
-                r = difflib.SequenceMatcher(None, q, c.lower()).ratio()
-                best = max(best, r)
-            return best
-
-        ranked = sorted(persons, key=score, reverse=True)
-        filtered = [p for p in ranked if score(p) >= self._min_score]
+        scored = []
+        for p in persons:
+            candidates = [p.get("name")] + list(p.get("aliases") or [])
+            scored.append((best_similarity(key, candidates), p))
+        scored.sort(key=lambda pair: pair[0], reverse=True)
+        filtered = [p for score, p in scored if score >= self._min_score]
         self._person_search_cache[key] = filtered
         return filtered
 
@@ -323,12 +268,7 @@ class KavitaPersonUpdater:
                 "aniListId": al_id if needs_al_id  else (current_al_id  or None),
             }
             try:
-                resp = self._session.post(
-                    f"{self._base}/api/Person/update",
-                    json=payload,
-                    timeout=self._timeout,
-                )
-                resp.raise_for_status()
+                self._client.update_person(payload)
                 changed = True
             except requests.RequestException as e:
                 if errors is not None:
@@ -350,88 +290,21 @@ class KavitaPersonUpdater:
                 and bool(person.get("coverImage"))
             )
             if image_url and not already_uploaded:
-                if self._upload_cover(person_id, image_url,
-                                      person_name=person_name,
-                                      errors=errors):
+                try:
+                    self._client.upload_person_cover(person_id, image_url)
                     changed = True
+                except requests.RequestException as e:
+                    if errors is not None:
+                        errors.append(
+                            f"cover upload failed for #{person_id} "
+                            f"'{person_name}' ({image_url}): {e}")
 
         return changed
 
-    # ------------------------------------------------------------------
-    # Internal: cover upload
-    # ------------------------------------------------------------------
-    def _upload_cover(self, person_id: int, image_url: str,
-                      lock: bool = False, *,
-                      person_name: str = "",
-                      errors: "list | None" = None) -> bool:
-        """
-        Uploads a cover image to a Kavita person.
-
-        The image is downloaded with the plain (header-less) image session
-        and posted to `POST /api/Upload/person` as a raw base64 string in
-        the `url` field.
-
-        Notes on protocol quirks discovered against Kavita 0.9.0.2:
-          - The two-step `upload-by-url` -> `Upload/person` flow returns
-            "Unable to save cover image to Person" (HTTP 400).
-          - A `data:image/jpeg;base64,...` data URI is rejected with the
-            same error.
-          - Only the raw base64 blob (no prefix) is accepted.
-        """
-        label = (f"#{person_id} '{person_name}'"
-                 if person_name else f"#{person_id}")
-
-        # 1) Download the image with a clean session — the Kavita session's
-        #    `Accept: application/json` header makes some CDNs refuse to
-        #    return image bytes.
-        try:
-            img_resp = self._image_session.get(image_url,
-                                               timeout=self._timeout)
-            img_resp.raise_for_status()
-        except requests.RequestException as e:
-            if errors is not None:
-                errors.append(
-                    f"image download failed for {label} ({image_url}): {e}")
-            return False
-
-        b64 = base64.b64encode(img_resp.content).decode()
-
-        # 2) POST the raw base64 blob.
-        try:
-            resp = self._session.post(
-                f"{self._base}/api/Upload/person",
-                json={"id": person_id, "url": b64, "lockCover": lock},
-                timeout=self._timeout,
-            )
-            if resp.status_code >= 400:
-                if errors is not None:
-                    errors.append(
-                        f"Upload/person HTTP {resp.status_code} for {label}: "
-                        f"{_short_body(resp)}")
-                return False
-            return True
-        except requests.RequestException as e:
-            if errors is not None:
-                errors.append(
-                    f"Upload/person failed for {label}: {e}")
-            return False
-
 
 # --------------------------------------------------------------------------
 # Module helpers: description builders
 # --------------------------------------------------------------------------
-def _plain_to_html(text: str) -> str:
-    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
-    if not text:
-        return ""
-    parts: list[str] = []
-    for para in re.split(r"\n{2,}", text.strip()):
-        para = para.strip()
-        if para:
-            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
-    return "".join(parts)
-
-
 def _format_birthday(birthday: str) -> str:
     """Converts an ISO 8601 birthday string to "D Month YYYY"."""
     if not birthday:
@@ -457,7 +330,7 @@ def _build_character_description(details: dict) -> str:
         parts.append(f'<p><a href="{url}" target="_blank">Favorites: {favorites:,}</a></p>')
     about = (details.get("about") or "").strip()
     if about:
-        parts.append(_plain_to_html(about))
+        parts.append(paragraphs_to_html(about))
     return "<br>".join(parts)
 
 
@@ -501,33 +374,19 @@ def _build_person_description(details: dict) -> str:
         parts.append(f'<table>{"".join(rows)}</table>')
     about = (details.get("about") or "").strip()
     if about:
-        parts.append(_plain_to_html(about))
+        parts.append(paragraphs_to_html(about))
     return "<br>".join(parts)
 
 
-# --------------------------------------------------------------------------
-# Module helper
-# --------------------------------------------------------------------------
-def _short_body(resp: requests.Response, limit: int = 400) -> str:
-    """Returns the response body trimmed to `limit` chars for error logging."""
-    try:
-        text = resp.text or ""
-    except Exception:
-        return "<unreadable response body>"
-    text = text.strip().replace("\n", " ").replace("\r", " ")
-    if len(text) > limit:
-        text = text[:limit] + "…"
-    return text or "<empty body>"
-
-
 # --------------------------------------------------------------------------
 # Usage example
 # --------------------------------------------------------------------------
 if __name__ == "__main__":
-    KAVITA_URL = "http://192.168.2.2:5000"
-    KAVITA_KEY = "Sq4a3hcV171dn3gzCl0K4eN7hZNk4sOA"
+    import os
 
-    updater = KavitaPersonUpdater(KAVITA_URL, KAVITA_KEY)
+    client = KavitaClient(os.environ["KAVITA_URL"],
+                          os.environ["KAVITA_API_KEY"])
+    updater = KavitaPersonUpdater(client)
 
     mal = MALResolver()
     mal_id = mal.find_mal_id("よふかしのうた")
diff --git a/src/LightNovelMetadataBuilder.py b/src/LightNovelMetadataBuilder.py
index 9ba677f..3a89e02 100644
--- a/src/LightNovelMetadataBuilder.py
+++ b/src/LightNovelMetadataBuilder.py
@@ -26,6 +26,7 @@ from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
 from MALResolver import MALResolver
 from AniListResolver import AniListResolver
 from MatchesCache import MatchesCache
+from TextUtils import paragraphs_to_html
 
 
 # MangaBaka series type for the search endpoint.
@@ -92,12 +93,7 @@ def _md_to_html(text: str) -> str:
     )
     text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
     text = re.sub(r'\*(.+?)\*',     r'<em>\1</em>',         text, flags=re.DOTALL)
-    parts: list[str] = []
-    for para in re.split(r'\n{2,}', text.strip()):
-        para = para.strip()
-        if para:
-            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
-    return "".join(parts)
+    return paragraphs_to_html(text)
 
 
 def pick_cover_url(cover) -> "str | None":
@@ -220,16 +216,25 @@ class LightNovelMetadataBuilder:
         return data[0] if data else None
 
     def fetch_series(self, series_id) -> "dict | None":
-        """Returns the full MangaBaka series dict for the given id."""
+        """
+        Returns the full MangaBaka series dict for the given id, following
+        ``merged_with`` redirects.  A seen-set guards against merge cycles.
+        """
         if series_id is None or str(series_id).strip() == "":
             return None
-        url = f"{self.api_base_url}/series/{series_id}"
-        resp = self._session.get(url, timeout=self.request_timeout)
-        resp.raise_for_status()
-        data = resp.json().get("data")
-        if data and data.get("state") == "merged" and data.get("merged_with"):
-            return self.fetch_series(data["merged_with"])
-        return data
+        seen: set[str] = set()
+        current = series_id
+        while str(current) not in seen:
+            seen.add(str(current))
+            url = f"{self.api_base_url}/series/{current}"
+            resp = self._session.get(url, timeout=self.request_timeout)
+            resp.raise_for_status()
+            data = resp.json().get("data")
+            if data and data.get("state") == "merged" and data.get("merged_with"):
+                current = data["merged_with"]
+                continue
+            return data
+        return None
 
     # ------------------------------------------------------------------
     # Resolve title -> MangaBaka series (caches the match)
@@ -316,14 +321,12 @@ class LightNovelMetadataBuilder:
         # text-only novels).
         cover_artists = list(md.get("artists") or [])
 
-        # Publisher: prefer English licence, else original
-        publishers = self._publishers_by_type(md, "English") \
-                     or self._publishers_by_type(md, "Original")
-        imprint = None
-        if self._publishers_by_type(md, "English") and \
-           self._publishers_by_type(md, "Original"):
-            imprint = self._publishers_by_type(md, "Original")[0] if \
-                      self._publishers_by_type(md, "Original") else None
+        # Publisher: prefer English licence, else original.  When both
+        # exist, the original publisher becomes the imprint.
+        english_pubs  = self._publishers_by_type(md, "English")
+        original_pubs = self._publishers_by_type(md, "Original")
+        publishers = english_pubs or original_pubs
+        imprint = original_pubs[0] if english_pubs and original_pubs else None
 
         # Release year
         release_year = None
diff --git a/src/LightNovelOrchestrator.py b/src/LightNovelOrchestrator.py
index 376c4c4..bbb10e3 100644
--- a/src/LightNovelOrchestrator.py
+++ b/src/LightNovelOrchestrator.py
@@ -71,10 +71,9 @@ class LightNovelOrchestrator:
         )
         self._series_updater = KavitaSeriesUpdater(self._client)
         self._person_updater = KavitaPersonUpdater(
-            kavita_url, kavita_api_key,
+            self._client,
             mal_resolver=self._mal,
             al_resolver=self._al,
-            request_timeout=request_timeout,
         )
         self._relation_sync = RelationshipSync(
             self._client, matches_cache, builder=self._builder)
diff --git a/src/MALResolver.py b/src/MALResolver.py
index b038704..5934823 100644
--- a/src/MALResolver.py
+++ b/src/MALResolver.py
@@ -30,12 +30,12 @@ Dependencies
 from __future__ import annotations
 
 import datetime
-import difflib
 import time
 
 import requests
 
 from MediaResolver import MediaResolver
+from TextUtils import best_similarity
 
 
 class MALResolver(MediaResolver):
@@ -404,19 +404,12 @@ def _clean_mal_name(name: str) -> str:
 def _score_title(query: str, entry: dict) -> float:
     """Returns the best title-similarity score for a Jikan manga entry."""
     candidates = [
-        entry.get("title") or "",
-        entry.get("title_english") or "",
-        entry.get("title_japanese") or "",
+        entry.get("title"),
+        entry.get("title_english"),
+        entry.get("title_japanese"),
     ]
-    for alt in (entry.get("titles") or []):
-        candidates.append(alt.get("title") or "")
-    best = 0.0
-    q = query.lower()
-    for t in candidates:
-        if t:
-            ratio = difflib.SequenceMatcher(None, q, t.lower()).ratio()
-            best = max(best, ratio)
-    return best
+    candidates += [alt.get("title") for alt in (entry.get("titles") or [])]
+    return best_similarity(query, candidates)
 
 
 # --------------------------------------------------------------------------
diff --git a/src/MatchesCache.py b/src/MatchesCache.py
index ff79cae..c7f677f 100644
--- a/src/MatchesCache.py
+++ b/src/MatchesCache.py
@@ -36,6 +36,14 @@ import time
 from pathlib import Path
 
 
+def _set_int(entry: dict, key: str, value) -> None:
+    """Sets entry[key] = int(value); ignores values that don't coerce."""
+    try:
+        entry[key] = int(value)
+    except (TypeError, ValueError):
+        pass
+
+
 class MatchesCache:
     def __init__(self, path):
         self._path = Path(path)
@@ -100,25 +108,13 @@ class MatchesCache:
             if image_url is not None:
                 entry["imageUrl"] = image_url
             if kavita_series_id is not None:
-                try:
-                    entry["kavitaSeriesId"] = int(kavita_series_id)
-                except (TypeError, ValueError):
-                    pass
+                _set_int(entry, "kavitaSeriesId", kavita_series_id)
             if library_id is not None:
-                try:
-                    entry["libraryId"] = int(library_id)
-                except (TypeError, ValueError):
-                    pass
+                _set_int(entry, "libraryId", library_id)
             if first_match_time is not None:
-                try:
-                    entry["firstMatchTime"] = int(first_match_time)
-                except (TypeError, ValueError):
-                    pass
+                _set_int(entry, "firstMatchTime", first_match_time)
             if last_update_time is not None:
-                try:
-                    entry["lastUpdateTime"] = int(last_update_time)
-                except (TypeError, ValueError):
-                    pass
+                _set_int(entry, "lastUpdateTime", last_update_time)
             self._save_unlocked()
             return dict(entry)
 
diff --git a/src/MatchesWebApp.py b/src/MatchesWebApp.py
index 49cbcab..cb70ef1 100644
--- a/src/MatchesWebApp.py
+++ b/src/MatchesWebApp.py
@@ -39,6 +39,19 @@ from MatchesCache import MatchesCache
 from LightNovelMetadataBuilder import pick_thumbnail_url
 
 
+def _int_list(values) -> list[int]:
+    """Coerces an iterable of mixed values to a list of positive ints."""
+    out: list[int] = []
+    for v in (values or []):
+        try:
+            n = int(v)
+        except (TypeError, ValueError):
+            continue
+        if n > 0:
+            out.append(n)
+    return out
+
+
 _INDEX_HTML = r"""<!doctype html>
 <html lang="en">
 <head>
@@ -628,7 +641,7 @@ class MatchesWebApp:
         @app.get("/api/matches")
         def api_list():
             raw = request.args.get("libraryIds") or ""
-            lib_ids = [int(p) for p in raw.split(",") if p.strip().isdigit()]
+            lib_ids = _int_list(raw.split(","))
             if lib_ids:
                 return jsonify(cache.all_in_libraries(lib_ids))
             return jsonify(cache.all())
@@ -680,8 +693,7 @@ class MatchesWebApp:
             if self._orchestrator is None:
                 return Response("no orchestrator configured", status=503)
             body = request.get_json(silent=True) or {}
-            library_ids = [int(i) for i in (body.get("libraryIds") or [])
-                           if str(i).strip().lstrip("-").isdigit()]
+            library_ids = _int_list(body.get("libraryIds"))
             if not library_ids:
                 return Response("libraryIds required", status=400)
 
@@ -720,12 +732,7 @@ class MatchesWebApp:
                 return Response("no orchestrator configured", status=503)
             body = request.get_json(silent=True) or {}
             raw = body.get("libraryIds")
-            library_ids: "list[int] | None"
-            if raw is None:
-                library_ids = None
-            else:
-                library_ids = [int(i) for i in raw
-                               if str(i).strip().lstrip("-").isdigit()]
+            library_ids = None if raw is None else _int_list(raw)
 
             label = ("update all (every library)" if library_ids is None
                      else f"update all in libraries {library_ids}")
diff --git a/src/TextUtils.py b/src/TextUtils.py
new file mode 100644
index 0000000..1924b71
--- /dev/null
+++ b/src/TextUtils.py
@@ -0,0 +1,45 @@
+"""
+text_utils.py
+=============
+
+Small text helpers shared across modules:
+
+* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
+  breaks into compact HTML (used for Kavita summary / description fields,
+  which must not contain raw newlines).
+* ``best_similarity`` — best difflib ratio between a query string and a
+  list of candidate strings (used for title / person-name matching).
+"""
+
+from __future__ import annotations
+
+import difflib
+import re
+from typing import Iterable
+
+
+def paragraphs_to_html(text: str) -> str:
+    """Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
+    if not text:
+        return ""
+    parts: list[str] = []
+    for para in re.split(r"\n{2,}", text.strip()):
+        para = para.strip()
+        if para:
+            parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
+    return "".join(parts)
+
+
+def best_similarity(query: str, candidates: Iterable[str]) -> float:
+    """
+    Returns the best case-insensitive difflib similarity ratio (0..1)
+    between `query` and any non-empty candidate.
+    """
+    q = (query or "").lower()
+    best = 0.0
+    for candidate in candidates:
+        if candidate:
+            ratio = difflib.SequenceMatcher(
+                None, q, str(candidate).lower()).ratio()
+            best = max(best, ratio)
+    return best