""" text_utils.py ============= Small text helpers shared across modules: * ``paragraphs_to_html`` — converts plain text with blank-line paragraph breaks into compact HTML (used for Kavita summary / description fields, which must not contain raw newlines). * ``best_similarity`` — best difflib ratio between a query string and a list of candidate strings (used for title / person-name matching). """ from __future__ import annotations import difflib import re from typing import Iterable def paragraphs_to_html(text: str) -> str: """Converts plain text with paragraph breaks to compact HTML (no raw \\n).""" if not text: return "" parts: list[str] = [] for para in re.split(r"\n{2,}", text.strip()): para = para.strip() if para: parts.append(f"

{para.replace(chr(10), '
')}

") return "".join(parts) def best_similarity(query: str, candidates: Iterable[str]) -> float: """ Returns the best case-insensitive difflib similarity ratio (0..1) between `query` and any non-empty candidate. """ q = (query or "").lower() best = 0.0 for candidate in candidates: if candidate: ratio = difflib.SequenceMatcher( None, q, str(candidate).lower()).ratio() best = max(best, ratio) return best def person_name_with_id(name: str, *, mal_id: "int | None" = None, al_id: "int | None" = None) -> str: """ Disambiguates a character name with its tracker id: "Rem (MAL 118737)". Kavita Person records are global and keyed by name only, so two different characters who share a name would collapse into one record. Suffixing the tracker *character* id keeps them apart while still sharing the record across the manga and light-novel version of the same series (MAL/AniList character ids are per character, not per medium). MAL is preferred; AniList ids get an "AL" marker so the two id spaces cannot collide. Without any id the name is returned as-is. The format must stay in sync with the manga project so both tools address the same Kavita person records. """ name = (name or "").strip() if not name: return name if mal_id: return f"{name} (MAL {mal_id})" if al_id: return f"{name} (AL {al_id})" return name # Matches the suffix produced by person_name_with_id at the end of a name. _TRACKER_ID_RE = re.compile(r"\s*\((MAL|AL)\s+(\d+)\)\s*$", re.IGNORECASE) def parse_person_tracker_id(name: str) -> "tuple[str, int] | None": """ Inverse of person_name_with_id: extracts the tracker id from a disambiguated Kavita person name. "Rem (MAL 118737)" -> ("mal", 118737) "Subaru (AL 88311)" -> ("al", 88311) "Kotoyama" -> None (no id suffix — e.g. an author/staff record) Returns ("mal" | "al", id) or None. """ if not name: return None m = _TRACKER_ID_RE.search(name) if not m: return None source = "mal" if m.group(1).upper() == "MAL" else "al" try: return source, int(m.group(2)) except ValueError: return None