73 lines
2.3 KiB
Python
73 lines
2.3 KiB
Python
"""
|
|
text_utils.py
|
|
=============
|
|
|
|
Small text helpers shared across modules:
|
|
|
|
* ``paragraphs_to_html`` — converts plain text with blank-line paragraph
|
|
breaks into compact HTML (used for Kavita summary / description fields,
|
|
which must not contain raw newlines).
|
|
* ``best_similarity`` — best difflib ratio between a query string and a
|
|
list of candidate strings (used for title / person-name matching).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import difflib
|
|
import re
|
|
from typing import Iterable
|
|
|
|
|
|
def paragraphs_to_html(text: str) -> str:
|
|
"""Converts plain text with paragraph breaks to compact HTML (no raw \\n)."""
|
|
if not text:
|
|
return ""
|
|
parts: list[str] = []
|
|
for para in re.split(r"\n{2,}", text.strip()):
|
|
para = para.strip()
|
|
if para:
|
|
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
|
return "".join(parts)
|
|
|
|
|
|
def best_similarity(query: str, candidates: Iterable[str]) -> float:
|
|
"""
|
|
Returns the best case-insensitive difflib similarity ratio (0..1)
|
|
between `query` and any non-empty candidate.
|
|
"""
|
|
q = (query or "").lower()
|
|
best = 0.0
|
|
for candidate in candidates:
|
|
if candidate:
|
|
ratio = difflib.SequenceMatcher(
|
|
None, q, str(candidate).lower()).ratio()
|
|
best = max(best, ratio)
|
|
return best
|
|
|
|
|
|
def person_name_with_id(name: str, *,
|
|
mal_id: "int | None" = None,
|
|
al_id: "int | None" = None) -> str:
|
|
"""
|
|
Disambiguates a character name with its tracker id: "Rem (MAL 118737)".
|
|
|
|
Kavita Person records are global and keyed by name only, so two
|
|
different characters who share a name would collapse into one record.
|
|
Suffixing the tracker *character* id keeps them apart while still
|
|
sharing the record across the manga and light-novel version of the
|
|
same series (MAL/AniList character ids are per character, not per
|
|
medium). MAL is preferred; AniList ids get an "AL" marker so the two
|
|
id spaces cannot collide. Without any id the name is returned as-is.
|
|
|
|
The format must stay in sync with the manga project so both tools
|
|
address the same Kavita person records.
|
|
"""
|
|
name = (name or "").strip()
|
|
if not name:
|
|
return name
|
|
if mal_id:
|
|
return f"{name} (MAL {mal_id})"
|
|
if al_id:
|
|
return f"{name} (AL {al_id})"
|
|
return name
|