1083 lines
43 KiB
Python
1083 lines
43 KiB
Python
"""
|
||
comicinfo_builder.py
|
||
====================
|
||
|
||
Generates a ComicInfo.xml (compatible with Kavita v0.9.0.2 / ComicInfo v2.1)
|
||
from series metadata provided by the MangaBaka API, enriched with data from
|
||
MangaDex (volume mapping), MangaBaka works (volume covers / ISBN / dates),
|
||
and MyAnimeList / Jikan (statistics and characters).
|
||
|
||
Dependencies
|
||
------------
|
||
requests (required -> API calls / cover download)
|
||
Pillow (PIL) (optional -> image dimensions for <Page> entries)
|
||
|
||
pip install requests pillow
|
||
|
||
The modules MangadexVolumeResolver, MangaBakaWorksResolver and
|
||
MALResolver must reside in the same directory.
|
||
|
||
API address note
|
||
----------------
|
||
The official MangaBaka API is hosted at https://api.mangabaka.dev/v1
|
||
(domain ".dev", not ".org"). Use the `api_base_url` constructor parameter
|
||
to override this if needed.
|
||
|
||
Data source notes
|
||
-----------------
|
||
* Volume assignment per chapter is resolved via MangaDex
|
||
(MangaDexVolumeResolver). Chapters missing from MangaDex are estimated
|
||
from neighbouring volume boundaries and MangaBaka page-count data.
|
||
* Volume-specific covers, ISBNs and publication dates come from MangaBaka
|
||
works (MangaBakaWorksResolver). If no volume is assigned the series
|
||
cover is used instead.
|
||
* MAL statistics and character names are fetched via the Jikan API
|
||
(MALResolver).
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import difflib
|
||
import re
|
||
import xml.etree.ElementTree as ET
|
||
from pathlib import Path
|
||
|
||
import requests
|
||
|
||
from MangadexVolumeResolver import MangaDexVolumeResolver
|
||
from MangaBakaWorksResolver import MangaBakaWorksResolver
|
||
from MALResolver import MALResolver
|
||
from AniListResolver import AniListResolver
|
||
from MatchesCache import MatchesCache
|
||
from MangaBakaRateLimit import apply_to_session as _apply_mangabaka_rate_limit
|
||
|
||
try:
|
||
from PIL import Image
|
||
_HAS_PIL = True
|
||
except ImportError:
|
||
_HAS_PIL = False
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Constants
|
||
# --------------------------------------------------------------------------
|
||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".avif"}
|
||
|
||
# Series types accepted by the MangaBaka search endpoint. Light/web novels
|
||
# are filtered out because this pipeline only handles image-based manga.
|
||
# Passed to `requests` as a list so each value becomes its own `&type=...`
|
||
# query parameter (MangaBaka's API expects repeated keys, not a CSV list).
|
||
_SEARCH_TYPES = ["manga", "manhwa", "manhua"]
|
||
|
||
_AGE_RATING_MAP = {
|
||
"safe": "Everyone",
|
||
"suggestive": "Teen",
|
||
"erotica": "Mature 17+",
|
||
"pornographic": "Adults Only 18+",
|
||
}
|
||
|
||
_TRACKER_URL_TEMPLATES = {
|
||
# Keys are normalised via _normalise_key (alphanumeric only, lowercase),
|
||
# so e.g. the source key "anime_news_network" matches "animenewsnetwork".
|
||
"anilist": "https://anilist.co/manga/{id}",
|
||
"myanimelist": "https://myanimelist.net/manga/{id}",
|
||
"mal": "https://myanimelist.net/manga/{id}",
|
||
"mangaupdates": "https://www.mangaupdates.com/series.html?id={id}",
|
||
"mangadex": "https://mangadex.org/title/{id}",
|
||
"kitsu": "https://kitsu.app/manga/{id}",
|
||
"animenewsnetwork": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||
"ann": "https://www.animenewsnetwork.com/encyclopedia/manga.php?id={id}",
|
||
"animeplanet": "https://www.anime-planet.com/manga/{id}",
|
||
"shikimori": "https://shikimori.one/mangas/{id}",
|
||
}
|
||
|
||
# MangaDex relationship types that indicate child works (spin-offs, sequels …)
|
||
_CHILD_RELATION_TYPES = {"side_story", "spin_off", "sequel", "prequel",
|
||
"doujinshi", "adapted_from", "alternative_story",
|
||
"alternative_version"}
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Module helpers
|
||
# --------------------------------------------------------------------------
|
||
def _natural_key(name: str):
|
||
return [int(p) if p.isdigit() else p.lower()
|
||
for p in re.split(r"(\d+)", name)]
|
||
|
||
|
||
def _normalise_key(key) -> str:
|
||
return re.sub(r"[^a-z0-9]", "", str(key).lower())
|
||
|
||
|
||
def _format_term(value: str) -> str:
|
||
"""Converts a MangaBaka genre slug ('slice_of_life') to display form."""
|
||
return str(value).replace("_", " ").strip().title() if value else ""
|
||
|
||
|
||
# Markdown backslash escape sequences recognised by CommonMark (e.g. \- → -)
|
||
_MD_ESCAPE_RE = re.compile(r'\\([\\`*_{}\[\]()\#+\-.!|~])')
|
||
|
||
|
||
def _md_to_html(text: str) -> str:
|
||
"""
|
||
Converts a subset of Markdown (as produced by MangaBaka) to HTML.
|
||
|
||
Handles: backslash escapes, [text](url) links, **bold**, *italic*,
|
||
blank-line paragraph splits, and single-newline line breaks.
|
||
Produces compact HTML with no raw newline characters — Kavita renders
|
||
every bare \\n as a <br>, so all line-breaks must be explicit.
|
||
"""
|
||
if not text:
|
||
return ""
|
||
# Unescape Markdown backslash sequences (\- → -, \* → *, …)
|
||
text = _MD_ESCAPE_RE.sub(r'\1', text)
|
||
# [text](url) → <a href="url">text</a>
|
||
text = re.sub(
|
||
r'\[([^\]]+)\]\(([^)]+)\)',
|
||
lambda m: f'<a href="{m.group(2)}">{m.group(1)}</a>',
|
||
text,
|
||
)
|
||
# **bold** before *italic* so ** is not mistaken for two *
|
||
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text, flags=re.DOTALL)
|
||
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text, flags=re.DOTALL)
|
||
# Split on blank lines → <p> blocks; single newlines → <br>
|
||
parts: list[str] = []
|
||
for para in re.split(r'\n{2,}', text.strip()):
|
||
para = para.strip()
|
||
if para:
|
||
parts.append(f"<p>{para.replace(chr(10), '<br>')}</p>")
|
||
return "".join(parts) # no raw \n — every \n becomes a <br> in Kavita
|
||
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Main class
|
||
# --------------------------------------------------------------------------
|
||
class ComicInfoBuilder:
|
||
"""
|
||
Builds a ComicInfo.xml for a single manga chapter.
|
||
|
||
Constructor arguments
|
||
---------------------
|
||
manga_title : Title of the manga (used for the API search).
|
||
chapter : Chapter number (int, float, or str — e.g. "10.5").
|
||
|
||
Setter behaviour
|
||
----------------
|
||
* Changing `manga_title` discards both the cached API metadata
|
||
AND the current results (pages / cover).
|
||
* Changing `chapter` discards only the current results;
|
||
the API metadata is kept.
|
||
"""
|
||
|
||
def __init__(self, manga_title, chapter, *,
|
||
api_base_url: str = "https://api.mangabaka.dev/v1",
|
||
language: str = "en",
|
||
request_timeout: int = 30,
|
||
session: "requests.Session | None" = None,
|
||
volume_resolver: "MangaDexVolumeResolver | None" = None,
|
||
works_resolver: "MangaBakaWorksResolver | None" = None,
|
||
mal_resolver: "MALResolver | None" = None,
|
||
al_resolver: "AniListResolver | None" = None,
|
||
matches_cache: "MatchesCache | None" = None):
|
||
if not manga_title or not str(manga_title).strip():
|
||
raise ValueError("manga_title must not be empty.")
|
||
|
||
self._manga_title = str(manga_title).strip()
|
||
self._chapter = chapter
|
||
|
||
self.api_base_url = api_base_url.rstrip("/")
|
||
self.language = language
|
||
self.request_timeout = request_timeout
|
||
self._session = session or requests.Session()
|
||
self._session.headers.setdefault("User-Agent", "ComicInfoBuilder/1.0")
|
||
# Throttle every call to api.mangabaka.dev (idempotent — safe even
|
||
# when the session was already prepared by a parent class).
|
||
_apply_mangabaka_rate_limit(self._session)
|
||
|
||
self._volume_resolver = (volume_resolver
|
||
or MangaDexVolumeResolver(
|
||
request_timeout=request_timeout,
|
||
session=self._session))
|
||
self._works_resolver = (works_resolver
|
||
or MangaBakaWorksResolver(
|
||
api_base_url=api_base_url,
|
||
request_timeout=request_timeout,
|
||
session=self._session))
|
||
# Both resolvers are Singletons — they manage their own sessions/caches.
|
||
self._mal_resolver = mal_resolver or MALResolver(
|
||
request_timeout=request_timeout)
|
||
self._al_resolver = al_resolver or AniListResolver(
|
||
request_timeout=request_timeout)
|
||
self._matches_cache = matches_cache
|
||
|
||
self._metadata: "dict | None" = None
|
||
self._pages: list[dict] = []
|
||
self._cover_path: "Path | None" = None
|
||
self._suwayomi_data: dict = {}
|
||
|
||
# ----- Repr -----------------------------------------------------------
|
||
def __repr__(self) -> str:
|
||
return (f"ComicInfoBuilder(manga_title={self._manga_title!r}, "
|
||
f"chapter={self._chapter!r})")
|
||
|
||
# ======================================================================
|
||
# Properties / setters
|
||
# ======================================================================
|
||
@property
|
||
def manga_title(self) -> str:
|
||
return self._manga_title
|
||
|
||
@manga_title.setter
|
||
def manga_title(self, value):
|
||
value = str(value).strip()
|
||
if not value:
|
||
raise ValueError("manga_title must not be empty.")
|
||
if value == self._manga_title:
|
||
return
|
||
self._manga_title = value
|
||
self._metadata = None
|
||
self._clear_results()
|
||
|
||
@property
|
||
def chapter(self):
|
||
return self._chapter
|
||
|
||
@chapter.setter
|
||
def chapter(self, value):
|
||
if value == self._chapter:
|
||
return
|
||
self._chapter = value
|
||
self._clear_results()
|
||
|
||
def _clear_results(self) -> None:
|
||
self._pages = []
|
||
self._cover_path = None
|
||
self._suwayomi_data = {}
|
||
|
||
# ======================================================================
|
||
# Public XML functions
|
||
# ======================================================================
|
||
def to_xml_string(self, *, pretty: bool = True) -> str:
|
||
"""Returns the ComicInfo.xml as a string."""
|
||
tree = self._build_tree()
|
||
if pretty:
|
||
try:
|
||
ET.indent(tree, space=" ")
|
||
except AttributeError:
|
||
pass
|
||
body = ET.tostring(tree.getroot(), encoding="unicode")
|
||
return '<?xml version="1.0" encoding="UTF-8"?>\n' + body
|
||
|
||
def save_xml(self, path) -> Path:
|
||
"""
|
||
Writes the ComicInfo.xml to `path`.
|
||
If a directory is passed, ComicInfo.xml is created inside it.
|
||
Returns the actual file path used.
|
||
"""
|
||
path = Path(path)
|
||
if path.is_dir():
|
||
path = path / "ComicInfo.xml"
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
path.write_text(self.to_xml_string(), encoding="utf-8")
|
||
return path
|
||
|
||
# ======================================================================
|
||
# Optional: analyse an image folder
|
||
# ======================================================================
|
||
def add_pages_from_folder(self, folder, *,
|
||
download_cover: bool = True,
|
||
cover_filename: str = "cover") -> dict:
|
||
"""
|
||
Scans a chapter image folder and populates <Page> entries.
|
||
Reads an existing Suwayomi ComicInfo.xml for supplementary fields.
|
||
Downloads the cover (volume-specific if a volume is found, otherwise
|
||
the series default cover).
|
||
"""
|
||
folder = Path(folder)
|
||
if not folder.is_dir():
|
||
raise NotADirectoryError(f"Folder not found: {folder}")
|
||
|
||
self._suwayomi_data = self._read_existing_comicinfo(folder)
|
||
|
||
self._cover_path = None
|
||
if download_cover:
|
||
self._cover_path = self._download_cover(folder, cover_filename)
|
||
|
||
cover_resolved = self._cover_path.resolve() if self._cover_path else None
|
||
story_images: list[Path] = []
|
||
for entry in folder.iterdir():
|
||
if not entry.is_file():
|
||
continue
|
||
if entry.suffix.lower() not in _IMAGE_EXTS:
|
||
continue
|
||
if cover_resolved and entry.resolve() == cover_resolved:
|
||
continue
|
||
story_images.append(entry)
|
||
story_images.sort(key=lambda p: _natural_key(p.name))
|
||
|
||
ordered: list[tuple[Path, str]] = []
|
||
if self._cover_path:
|
||
ordered.append((self._cover_path, "FrontCover"))
|
||
ordered.extend((img, "Story") for img in story_images)
|
||
|
||
self._pages = []
|
||
for index, (img_path, page_type) in enumerate(ordered):
|
||
width, height = self._image_dimensions(img_path)
|
||
try:
|
||
size = img_path.stat().st_size
|
||
except OSError:
|
||
size = None
|
||
self._pages.append({
|
||
"image": index,
|
||
"type": page_type,
|
||
"width": width,
|
||
"height": height,
|
||
"size": size,
|
||
"double": bool(width and height and width > height),
|
||
})
|
||
|
||
return {
|
||
"page_count": len(self._pages),
|
||
"cover": str(self._cover_path) if self._cover_path else None,
|
||
"suwayomi_fields": dict(self._suwayomi_data),
|
||
}
|
||
|
||
# ======================================================================
|
||
# Metadata retrieval (MangaBaka API)
|
||
# ======================================================================
|
||
def fetch_metadata(self, *, force: bool = False) -> dict:
|
||
"""Fetches (and caches) the series metadata. Pass force=True to refresh."""
|
||
return self._get_metadata(force=force)
|
||
|
||
def _get_metadata(self, *, force: bool = False) -> dict:
|
||
if self._metadata is not None and not force:
|
||
return self._metadata
|
||
|
||
series = self._search_best_series(self._manga_title)
|
||
if series is None:
|
||
raise RuntimeError(
|
||
f"No series found for '{self._manga_title}' on MangaBaka.")
|
||
|
||
if series.get("state") == "merged" and series.get("merged_with"):
|
||
series = self._fetch_series_by_id(series["merged_with"])
|
||
|
||
self._metadata = series
|
||
return series
|
||
|
||
def _search_best_series(self, title: str):
|
||
"""
|
||
Resolves `title` to a MangaBaka series.
|
||
|
||
Lookup order:
|
||
1. matches.json cache (if attached) — uses the stored series ID
|
||
to fetch the full series, skipping the search step entirely.
|
||
2. Fresh MangaBaka search — top hit. The match is persisted to
|
||
matches.json before being returned so it survives a crash.
|
||
"""
|
||
if self._matches_cache is not None:
|
||
cached = self._matches_cache.get(title)
|
||
if cached and cached.get("mangabakaId"):
|
||
try:
|
||
return self._fetch_series_by_id(cached["mangabakaId"])
|
||
except Exception as exc:
|
||
print(f"[ComicInfoBuilder] cached id "
|
||
f"{cached['mangabakaId']} for {title!r} failed "
|
||
f"({exc}); falling back to fresh search",
|
||
flush=True)
|
||
|
||
url = f"{self.api_base_url}/series/search"
|
||
resp = self._session.get(
|
||
url, params={"q": title, "type": _SEARCH_TYPES,
|
||
"page": 1, "limit": 1},
|
||
timeout=self.request_timeout)
|
||
resp.raise_for_status()
|
||
data = resp.json().get("data") or []
|
||
series = data[0] if data else None
|
||
|
||
if series and self._matches_cache is not None:
|
||
self._matches_cache.add(
|
||
title,
|
||
mangabaka_id=series.get("id"),
|
||
mangabaka_name=series.get("title") or "",
|
||
image_url=_pick_cover_url(series.get("cover")),
|
||
)
|
||
|
||
return series
|
||
|
||
def _fetch_series_by_id(self, series_id) -> dict:
|
||
url = f"{self.api_base_url}/series/{series_id}"
|
||
resp = self._session.get(url, timeout=self.request_timeout)
|
||
resp.raise_for_status()
|
||
data = resp.json().get("data")
|
||
if not data:
|
||
raise RuntimeError(f"Series with ID {series_id} not found.")
|
||
return data
|
||
|
||
# ======================================================================
|
||
# XML construction
|
||
# ======================================================================
|
||
def _build_tree(self) -> "ET.ElementTree":
|
||
md = self._get_metadata()
|
||
sd = self._suwayomi_data
|
||
|
||
volume = self._determine_volume()
|
||
work = self._get_work_for_volume(md, volume) if volume else None
|
||
|
||
root = ET.Element("ComicInfo", {
|
||
"xmlns:xsd": "http://www.w3.org/2001/XMLSchema",
|
||
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
||
})
|
||
|
||
def add(tag: str, value) -> None:
|
||
if value is None:
|
||
return
|
||
text = str(value).strip()
|
||
if text:
|
||
ET.SubElement(root, tag).text = text
|
||
|
||
# ----- Title / Series -----------------------------------------------
|
||
add("Title", sd.get("Title") or f"Chapter {self._chapter}")
|
||
add("Series", md.get("title") or self._manga_title)
|
||
add("LocalizedSeries",
|
||
md.get("native_title") or md.get("romanized_title"))
|
||
add("SeriesSort", self._get_sort_title(md))
|
||
add("Number", sd.get("Number") or self._chapter)
|
||
add("Count", md.get("total_chapters"))
|
||
add("Volume", volume)
|
||
|
||
# ----- Description with MAL stats -----------------------------------
|
||
# Prefer the MAL ID from MangaBaka's source map — avoids an extra
|
||
# Jikan title-search request and is more reliable than fuzzy matching.
|
||
mal_id = (self._mal_id_from_source(md)
|
||
or self._mal_resolver.find_mal_id(
|
||
md.get("title") or self._manga_title))
|
||
al_id = self._al_id_from_source(md)
|
||
|
||
mal_stats = self._mal_resolver.get_stats(mal_id)
|
||
add("Summary", self._build_summary(md, sd, mal_stats))
|
||
|
||
# ----- Release date -------------------------------------------------
|
||
# Volume publication date takes precedence over the chapter date.
|
||
vol_year, vol_month, vol_day = self._parse_work_date(work)
|
||
add("Year", vol_year or sd.get("Year") or md.get("year"))
|
||
add("Month", vol_month or sd.get("Month"))
|
||
add("Day", vol_day or sd.get("Day"))
|
||
|
||
# ----- Contributors -------------------------------------------------
|
||
add("Writer", ", ".join(md.get("authors") or []))
|
||
add("Penciller", ", ".join(md.get("artists") or []))
|
||
add("Translator", sd.get("Translator"))
|
||
|
||
# ----- Publisher ----------------------------------------------------
|
||
eng_pub = self._publishers_by_type(md, "English")
|
||
orig_pub = self._publishers_by_type(md, "Original")
|
||
add("Publisher", eng_pub or orig_pub)
|
||
if eng_pub and orig_pub:
|
||
add("Imprint", orig_pub)
|
||
|
||
# ----- Genres / Tags ------------------------------------------------
|
||
# Genres come back as lowercase snake_case ("slice_of_life"); convert
|
||
# to display form ("Slice Of Life") so Kavita / readers show them
|
||
# consistently with the (already-titled-cased) Tags field.
|
||
add("Genre", ", ".join(_format_term(g) for g in (md.get("genres") or [])))
|
||
add("Tags", ", ".join(_format_term(t) for t in (md.get("tags") or [])))
|
||
|
||
# ----- Characters — MAL first, AniList fallback ---------------------
|
||
characters = self._mal_resolver.get_characters(mal_id)
|
||
if not characters and al_id:
|
||
characters = self._al_resolver.get_characters(al_id)
|
||
add("Characters", ", ".join(characters) if characters else None)
|
||
|
||
# ----- Web links ----------------------------------------------------
|
||
add("Web", " ".join(self._collect_web_links(md, sd)))
|
||
|
||
# ----- Miscellaneous ------------------------------------------------
|
||
add("LanguageISO", self.language)
|
||
add("Manga", self._manga_flag(md))
|
||
add("AgeRating", _AGE_RATING_MAP.get(md.get("content_rating"), "Unknown"))
|
||
|
||
if md.get("rating") is not None:
|
||
try:
|
||
# MangaBaka rating is on a 0..100 scale -> ComicInfo
|
||
# CommunityRating uses 0..5.
|
||
add("CommunityRating", round(float(md["rating"]) / 20, 1))
|
||
except (TypeError, ValueError):
|
||
pass
|
||
|
||
# ----- ISBN (GTIN) from volume work ---------------------------------
|
||
identifiers = (work or {}).get("identifiers") or []
|
||
isbn = identifiers[0].get("id") if identifiers else None
|
||
add("GTIN", isbn)
|
||
|
||
# ----- SeriesGroup from related works -------------------------------
|
||
add("SeriesGroup", self._determine_series_group(md))
|
||
|
||
# ----- Alternate title notes ----------------------------------------
|
||
add("Notes", self._build_notes(md))
|
||
|
||
# ----- Pages --------------------------------------------------------
|
||
if self._pages:
|
||
add("PageCount", len(self._pages))
|
||
pages_el = ET.SubElement(root, "Pages")
|
||
for page in self._pages:
|
||
attrs = {"Image": str(page["image"]), "Type": page["type"]}
|
||
if page.get("size") is not None:
|
||
attrs["ImageSize"] = str(page["size"])
|
||
if page.get("width"):
|
||
attrs["ImageWidth"] = str(page["width"])
|
||
if page.get("height"):
|
||
attrs["ImageHeight"] = str(page["height"])
|
||
if page.get("double"):
|
||
attrs["DoublePage"] = "true"
|
||
ET.SubElement(pages_el, "Page", attrs)
|
||
|
||
return ET.ElementTree(root)
|
||
|
||
# ======================================================================
|
||
# Volume determination
|
||
# ======================================================================
|
||
def _determine_volume(self) -> "str | None":
|
||
"""
|
||
Resolves the volume for the current chapter via MangaDex.
|
||
Falls back to estimation when the chapter is absent from MangaDex.
|
||
Returns None if no volume can be determined.
|
||
"""
|
||
md = self._get_metadata()
|
||
try:
|
||
manga_id = self._mangadex_id_from_source(md)
|
||
if not manga_id:
|
||
manga_id = self._volume_resolver.find_manga_id(
|
||
md.get("native_title") or self._manga_title)
|
||
if not manga_id:
|
||
return None
|
||
|
||
series_id = str(md.get("id") or "")
|
||
page_counts = {}
|
||
if series_id:
|
||
page_counts = self._works_resolver.get_page_counts(series_id)
|
||
|
||
return self._volume_resolver.volume_for_chapter(
|
||
manga_id, self._chapter,
|
||
volume_page_counts=page_counts or None)
|
||
except Exception:
|
||
return None
|
||
|
||
def _get_work_for_volume(self, md: dict,
|
||
volume: "str | None") -> "dict | None":
|
||
"""Returns the MangaBaka work dict for the current volume, or None."""
|
||
if not volume:
|
||
return None
|
||
series_id = str(md.get("id") or "")
|
||
if not series_id:
|
||
return None
|
||
try:
|
||
return self._works_resolver.get_work_for_volume(series_id, volume)
|
||
except Exception:
|
||
return None
|
||
|
||
# ======================================================================
|
||
# Cover download
|
||
# ======================================================================
|
||
def _download_cover(self, folder: Path, cover_filename: str) -> "Path | None":
|
||
"""
|
||
Downloads the cover for the current chapter/volume.
|
||
|
||
If a volume is known and a volume-specific cover exists in MangaBaka
|
||
works, that cover is used. Otherwise the series default cover is
|
||
downloaded (raw variant preferred).
|
||
"""
|
||
md = self._get_metadata()
|
||
volume = self._determine_volume()
|
||
cover_url: "str | None" = None
|
||
|
||
if volume:
|
||
series_id = str(md.get("id") or "")
|
||
if series_id:
|
||
try:
|
||
cover_url = self._works_resolver.get_cover_for_volume(
|
||
series_id, volume)
|
||
except Exception:
|
||
pass
|
||
|
||
if not cover_url:
|
||
cover_url = _pick_cover_url(md.get("cover"))
|
||
|
||
if not cover_url:
|
||
return None
|
||
|
||
try:
|
||
resp = self._session.get(cover_url, timeout=self.request_timeout)
|
||
resp.raise_for_status()
|
||
except requests.RequestException:
|
||
return None
|
||
|
||
ext = _guess_extension(cover_url, resp.headers.get("Content-Type", ""))
|
||
target = folder / f"{cover_filename}{ext}"
|
||
target.write_bytes(resp.content)
|
||
return target
|
||
|
||
# ======================================================================
|
||
# Series group
|
||
# ======================================================================
|
||
def _determine_series_group(self, md: dict) -> "str | None":
|
||
"""
|
||
Determines SeriesGroup from MangaBaka's relationships_v2 field.
|
||
|
||
- If the series has a 'parent' relationship entry → fetch the parent
|
||
series and return its MangaBaka title (so arcs/sequels appear under
|
||
the root series in Kavita).
|
||
- Otherwise → return the series' own title (it is the root, or a
|
||
standalone series with no parent).
|
||
"""
|
||
for rel in (md.get("relationships_v2") or []):
|
||
if rel.get("relation_type") == "parent":
|
||
parent_id = rel.get("to_series_id")
|
||
if parent_id is not None:
|
||
try:
|
||
parent_md = self._fetch_series_by_id(parent_id)
|
||
parent_title = parent_md.get("title")
|
||
if parent_title:
|
||
return parent_title
|
||
except Exception:
|
||
pass
|
||
break
|
||
|
||
return md.get("title") or self._manga_title
|
||
|
||
# ======================================================================
|
||
# Title helpers
|
||
# ======================================================================
|
||
def _get_sort_title(self, md: dict) -> "str | None":
|
||
"""
|
||
Returns the SeriesSort title in the configured language.
|
||
Looks for an alt-title with matching language code first;
|
||
falls back to the primary title.
|
||
"""
|
||
lang = self.language.lower()
|
||
alt_titles = self._collect_alt_titles(md)
|
||
if lang in alt_titles:
|
||
return alt_titles[lang]
|
||
# For 'en' the primary MangaBaka title is usually already English
|
||
return md.get("title") or self._manga_title
|
||
|
||
def _collect_alt_titles(self, md: dict) -> "dict[str, str]":
|
||
"""
|
||
Returns {lang_code: title} for EN, DE, JP kanji and JP romaji.
|
||
|
||
MangaBaka stores alt-titles in the `titles` list, where each entry is
|
||
a dict {language, title, traits, is_primary, note}.
|
||
Important caveats observed against the real API:
|
||
* `romanized_title` is the romanization of whatever the series'
|
||
native script is — for a Japanese manga with a Korean licence it
|
||
can hold the Korean romanization, NOT the Japanese romaji.
|
||
Always prefer `titles[language="ja-Latn"]` for romaji instead.
|
||
* `native_title` holds the kanji form for Japanese manga, but
|
||
`titles[language="ja", traits contains "native"]` is more
|
||
reliable when present.
|
||
* Each language can have several entries; primary + official
|
||
traits win over generic ones.
|
||
"""
|
||
titles = md.get("titles") or md.get("alt_titles") or []
|
||
|
||
def pick(language_codes: tuple, prefer_trait: "str | None" = None
|
||
) -> "str | None":
|
||
"""Picks the best title entry for any of the given language codes."""
|
||
if not isinstance(titles, list):
|
||
return None
|
||
best_score = -1
|
||
best_title: "str | None" = None
|
||
for entry in titles:
|
||
if not isinstance(entry, dict):
|
||
continue
|
||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||
if lang not in language_codes:
|
||
continue
|
||
title = entry.get("title")
|
||
if not title:
|
||
continue
|
||
traits = entry.get("traits") or []
|
||
score = 0
|
||
if prefer_trait and prefer_trait in traits:
|
||
score += 4
|
||
if "official" in traits:
|
||
score += 2
|
||
if entry.get("is_primary"):
|
||
score += 1
|
||
if score > best_score:
|
||
best_score, best_title = score, title
|
||
return best_title
|
||
|
||
result: dict[str, str] = {}
|
||
|
||
# JP kanji (prefer entry with "native" trait, fall back to native_title)
|
||
kanji = pick(("ja",), prefer_trait="native") or md.get("native_title")
|
||
if kanji:
|
||
result["jp"] = kanji
|
||
|
||
# JP romaji — explicitly from "ja-Latn" entries. Do NOT fall back to
|
||
# `romanized_title` blindly; that field can hold a non-Japanese
|
||
# romanization (e.g. Korean) for the same series.
|
||
romaji = pick(("ja-latn", "ja-romaji"))
|
||
if not romaji:
|
||
# Heuristic fallback only when romanized_title looks Latin
|
||
rt = md.get("romanized_title") or ""
|
||
if rt and all(ord(c) < 128 for c in rt):
|
||
romaji = rt
|
||
if romaji:
|
||
result["romaji"] = romaji
|
||
|
||
# English (prefer official + primary)
|
||
en = pick(("en",))
|
||
if not en:
|
||
en = md.get("title") if md.get("title") else None
|
||
if en:
|
||
result["en"] = en
|
||
|
||
# German
|
||
de = pick(("de",))
|
||
if de:
|
||
result["de"] = de
|
||
|
||
return result
|
||
|
||
def _collect_all_alt_titles(self, md: dict) -> "dict[str, list[str]]":
|
||
"""
|
||
Returns all known title variants grouped by language/script.
|
||
|
||
Groups collected (skipped when empty):
|
||
"en" – English (language = "en")
|
||
"de" – German (language = "de")
|
||
"ja" – Japanese native kanji (language = "ja")
|
||
"ja-romaji" – Japanese romanized (language = "ja-Latn" / "ja-romaji")
|
||
"ko" – Korean native (language = "ko")
|
||
"ko-romaji" – Korean romanized (language = "ko-Latn" / "ko-romaji")
|
||
"zh" – Chinese native (language = "zh" / "zh-hk" / "zh-tw" / …)
|
||
"zh-romaji" – Chinese romanized (language = "zh-Latn")
|
||
|
||
All variants are included (not just primary), preserving API order.
|
||
Duplicates within a group are removed.
|
||
"""
|
||
_GROUPS: "dict[str, tuple]" = {
|
||
"en": ("en",),
|
||
"de": ("de",),
|
||
"ja": ("ja",),
|
||
"ja-romaji": ("ja-latn", "ja-romaji"),
|
||
"ko": ("ko",),
|
||
"ko-romaji": ("ko-latn", "ko-romaji"),
|
||
"zh": ("zh", "zh-hk", "zh-tw", "zh-hans", "zh-hant"),
|
||
"zh-romaji": ("zh-latn",),
|
||
}
|
||
|
||
# Pre-build a flat lang → group mapping for O(1) lookup
|
||
lang_to_group: "dict[str, str]" = {
|
||
lang: group
|
||
for group, langs in _GROUPS.items()
|
||
for lang in langs
|
||
}
|
||
|
||
result: "dict[str, list[str]]" = {}
|
||
seen: "dict[str, set[str]]" = {}
|
||
|
||
for entry in (md.get("titles") or md.get("alt_titles") or []):
|
||
if not isinstance(entry, dict):
|
||
continue
|
||
lang = (entry.get("language") or entry.get("lang") or "").lower()
|
||
group = lang_to_group.get(lang)
|
||
if not group:
|
||
continue
|
||
title = (entry.get("title") or "").strip()
|
||
if not title:
|
||
continue
|
||
if group not in result:
|
||
result[group] = []
|
||
seen[group] = set()
|
||
if title not in seen[group]:
|
||
result[group].append(title)
|
||
seen[group].add(title)
|
||
|
||
return result
|
||
|
||
# ======================================================================
|
||
# Summary / notes
|
||
# ======================================================================
|
||
def _build_summary(self, md: dict, sd: dict,
|
||
mal_stats: "dict | None") -> "str | None":
|
||
"""
|
||
Builds <Summary> as HTML (Kavita supports HTML in this field).
|
||
|
||
Structure (top → bottom):
|
||
1. MAL statistics — HTML link + table with padded columns
|
||
2. Series description — Markdown converted to HTML
|
||
3. Alternate titles — HTML table
|
||
"""
|
||
# Inline style applied to label cells for readable column spacing.
|
||
_TD = 'style="padding-right:1.5em"'
|
||
|
||
parts: list[str] = []
|
||
|
||
# 1. MAL stats table (top) ----------------------------------------
|
||
if mal_stats:
|
||
url = mal_stats.get("url", "")
|
||
as_of = mal_stats.get("as_of", "")
|
||
score = mal_stats.get("score")
|
||
rank = mal_stats.get("rank")
|
||
scored = mal_stats.get("scored_by")
|
||
pop = mal_stats.get("popularity")
|
||
members = mal_stats.get("members")
|
||
favs = mal_stats.get("favorites")
|
||
|
||
rows: list[str] = []
|
||
if score is not None: rows.append(f"<tr><td {_TD}>Score</td><td>{score}</td></tr>")
|
||
if rank is not None: rows.append(f"<tr><td {_TD}>Ranked</td><td>#{rank}</td></tr>")
|
||
if scored is not None: rows.append(f"<tr><td {_TD}>Scored by</td><td>{scored:,} users</td></tr>")
|
||
if pop is not None: rows.append(f"<tr><td {_TD}>Popularity</td><td>#{pop}</td></tr>")
|
||
if members is not None: rows.append(f"<tr><td {_TD}>Members</td><td>{members:,}</td></tr>")
|
||
if favs is not None: rows.append(f"<tr><td {_TD}>Favorites</td><td>{favs:,}</td></tr>")
|
||
|
||
if rows:
|
||
link = f'<a href="{url}" target="_blank">MyAnimeList</a>' if url else "MyAnimeList"
|
||
parts.append(f"<p>{link} stats as of {as_of}:</p><table>{''.join(rows)}</table>")
|
||
|
||
# 2. Description — Markdown → HTML (middle) -----------------------
|
||
desc_raw = (md.get("description") or sd.get("Summary") or "").strip()
|
||
if desc_raw:
|
||
parts.append(_md_to_html(desc_raw))
|
||
|
||
# 3. Alternate titles table (bottom) — all variants per language ------
|
||
all_alt = self._collect_all_alt_titles(md)
|
||
if all_alt:
|
||
label_map = {
|
||
"en": "EN",
|
||
"de": "DE",
|
||
"ja": "JA",
|
||
"ja-romaji": "JA Romaji",
|
||
"ko": "KO",
|
||
"ko-romaji": "KO Romaji",
|
||
"zh": "ZH",
|
||
"zh-romaji": "ZH Romaji",
|
||
}
|
||
alt_rows: list[str] = []
|
||
for group in ("en", "de", "ja", "ja-romaji",
|
||
"ko", "ko-romaji", "zh", "zh-romaji"):
|
||
titles = all_alt.get(group)
|
||
if not titles:
|
||
continue
|
||
label = label_map[group]
|
||
cell = "<br>".join(titles)
|
||
alt_rows.append(f"<tr><td {_TD}>{label}</td><td>{cell}</td></tr>")
|
||
if alt_rows:
|
||
parts.append(f"<table>{''.join(alt_rows)}</table>")
|
||
|
||
return "<br>".join(parts) if parts else None
|
||
|
||
def _build_notes(self, md: dict) -> "str | None":
|
||
"""Builds the <Notes> field with the MangaBaka metadata source URL."""
|
||
series_id = str(md.get("id") or "")
|
||
return f"Metadata source: https://mangabaka.org/{series_id}" if series_id else None
|
||
|
||
# ======================================================================
|
||
# Static helpers
|
||
# ======================================================================
|
||
@staticmethod
|
||
def _parse_work_date(work: "dict | None") -> tuple:
|
||
"""Returns (year, month, day) strings from a MangaBaka work dict."""
|
||
if not work:
|
||
return (None, None, None)
|
||
raw = (work.get("release_date") or work.get("publication_date") or "")
|
||
if not raw:
|
||
return (None, None, None)
|
||
parts = str(raw).split("-")
|
||
year = parts[0] if len(parts) > 0 and parts[0] else None
|
||
month = parts[1] if len(parts) > 1 and parts[1] else None
|
||
day = parts[2] if len(parts) > 2 and parts[2] else None
|
||
return (year, month, day)
|
||
|
||
@staticmethod
|
||
def _mangadex_id_from_source(md: dict) -> "str | None":
|
||
for raw_key, info in (md.get("source") or {}).items():
|
||
if _normalise_key(raw_key) in ("mangadex", "mangadexorg", "md"):
|
||
if isinstance(info, dict) and info.get("id") is not None:
|
||
return str(info["id"])
|
||
return None
|
||
|
||
@staticmethod
|
||
def _mal_id_from_source(md: dict) -> "int | None":
|
||
for raw_key, info in (md.get("source") or {}).items():
|
||
if _normalise_key(raw_key) in ("myanimelist", "mal"):
|
||
if isinstance(info, dict):
|
||
mid = info.get("id")
|
||
if mid is not None:
|
||
try:
|
||
return int(mid)
|
||
except (TypeError, ValueError):
|
||
pass
|
||
return None
|
||
|
||
@staticmethod
|
||
def _al_id_from_source(md: dict) -> "int | None":
|
||
for raw_key, info in (md.get("source") or {}).items():
|
||
if _normalise_key(raw_key) == "anilist":
|
||
if isinstance(info, dict):
|
||
mid = info.get("id")
|
||
if mid is not None:
|
||
try:
|
||
return int(mid)
|
||
except (TypeError, ValueError):
|
||
pass
|
||
return None
|
||
|
||
@staticmethod
|
||
def _publishers_by_type(md: dict, ptype: str) -> "str | None":
|
||
names = [p.get("name") for p in (md.get("publishers") or [])
|
||
if p.get("type") == ptype and p.get("name")]
|
||
return ", ".join(names) if names else None
|
||
|
||
@staticmethod
|
||
def _manga_flag(md: dict) -> str:
|
||
mtype = (md.get("type") or "").lower()
|
||
if mtype == "manga":
|
||
return "YesAndRightToLeft"
|
||
if mtype in ("manhwa", "manhua", "oel"):
|
||
return "Yes"
|
||
return "Unknown"
|
||
|
||
def _collect_web_links(self, md: dict, sd: dict) -> list[str]:
|
||
links: list[str] = []
|
||
|
||
links.extend(l for l in (md.get("links") or []) if l)
|
||
|
||
for raw_key, info in (md.get("source") or {}).items():
|
||
template = _TRACKER_URL_TEMPLATES.get(_normalise_key(raw_key))
|
||
if not template or not isinstance(info, dict):
|
||
continue
|
||
source_id = info.get("id")
|
||
if source_id is not None:
|
||
links.append(template.format(id=source_id))
|
||
|
||
if sd.get("Web"):
|
||
links.extend(str(sd["Web"]).split())
|
||
|
||
seen: set[str] = set()
|
||
unique: list[str] = []
|
||
for link in links:
|
||
if link not in seen:
|
||
seen.add(link)
|
||
unique.append(link)
|
||
return unique
|
||
|
||
@staticmethod
|
||
def _read_existing_comicinfo(folder: Path) -> dict:
|
||
xml_path = folder / "ComicInfo.xml"
|
||
if not xml_path.is_file():
|
||
return {}
|
||
try:
|
||
root = ET.parse(xml_path).getroot()
|
||
except ET.ParseError:
|
||
return {}
|
||
|
||
wanted = {"Title", "Series", "Number", "Summary", "Writer",
|
||
"Penciller", "Translator", "Genre", "Web",
|
||
"Year", "Month", "Day"}
|
||
data: dict = {}
|
||
for child in root:
|
||
tag = child.tag.split("}")[-1]
|
||
if tag in wanted and child.text and child.text.strip():
|
||
data[tag] = child.text.strip()
|
||
return data
|
||
|
||
@staticmethod
|
||
def _image_dimensions(path: Path):
|
||
if not _HAS_PIL:
|
||
return (None, None)
|
||
try:
|
||
with Image.open(path) as im:
|
||
return im.size
|
||
except Exception:
|
||
return (None, None)
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Module-level helpers (shared with MangaBakaWorksResolver logic)
|
||
# --------------------------------------------------------------------------
|
||
def _pick_cover_url(cover) -> "str | None":
|
||
"""
|
||
Selects the best cover URL from a MangaBaka cover object.
|
||
|
||
Real API shape (from `GET /v1/series/{id}` and `/works`):
|
||
{
|
||
"raw": {"url": "...", "size": ..., "height": ..., "width": ...},
|
||
"x150": {"x1": "...", "x2": "...", "x3": "..."},
|
||
"x250": {"x1": "...", "x2": "...", "x3": "..."},
|
||
"x350": {"x1": "...", "x2": "...", "x3": "..."}
|
||
}
|
||
|
||
Order of preference: raw original > x350@x3 > x250@x3 > x150@x3
|
||
(falling through to lower densities and sizes as needed).
|
||
"""
|
||
if not cover:
|
||
return None
|
||
if isinstance(cover, str):
|
||
return cover
|
||
if not isinstance(cover, dict):
|
||
return None
|
||
|
||
# 1) Preferred: the unscaled "raw" image
|
||
raw = cover.get("raw")
|
||
if isinstance(raw, dict):
|
||
url = raw.get("url")
|
||
if isinstance(url, str) and url:
|
||
return url
|
||
elif isinstance(raw, str) and raw:
|
||
return raw
|
||
|
||
# 2) Fallback: size-keyed variants, largest first, highest density first
|
||
for size_key in ("x350", "x250", "x150"):
|
||
variant = cover.get(size_key)
|
||
if isinstance(variant, dict):
|
||
for density in ("x3", "x2", "x1"):
|
||
url = variant.get(density)
|
||
if isinstance(url, str) and url:
|
||
return url
|
||
elif isinstance(variant, str) and variant:
|
||
return variant
|
||
|
||
# 3) Last-ditch fallback: any http URL anywhere in the structure
|
||
for val in cover.values():
|
||
if isinstance(val, str) and val.startswith("http"):
|
||
return val
|
||
if isinstance(val, dict):
|
||
for sub in val.values():
|
||
if isinstance(sub, str) and sub.startswith("http"):
|
||
return sub
|
||
return None
|
||
|
||
|
||
def _guess_extension(url: str, content_type: str) -> str:
|
||
url_ext = Path(url.split("?")[0]).suffix.lower()
|
||
if url_ext in _IMAGE_EXTS:
|
||
return url_ext
|
||
ct = (content_type or "").lower()
|
||
if "png" in ct: return ".png"
|
||
if "webp" in ct: return ".webp"
|
||
if "gif" in ct: return ".gif"
|
||
return ".jpg"
|
||
|
||
|
||
# --------------------------------------------------------------------------
|
||
# Usage example
|
||
# --------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
builder = ComicInfoBuilder("Yofukashi no Uta", 66)
|
||
|
||
builder.add_pages_from_folder(
|
||
r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)"
|
||
r"\Yofukashi no Uta\Official_Chapter 66")
|
||
builder.save_xml(
|
||
r"\\192.168.2.2\root\Temp\managdl\mangas\ComicK Fanmade (EN)"
|
||
r"\Yofukashi no Uta\Official_Chapter 66\ComicInfo.xml")
|
||
|
||
# Setter behaviour:
|
||
# builder.chapter = 2 # only results discarded, metadata is kept
|
||
# builder.manga_title = "X" # metadata + results discarded
|