manga-mover-and-metadata-co…/src/AniListResolver.py

"""
anilist_resolver.py
===================

Fetches and caches AniList manga metadata (statistics, characters, staff)
using the public AniList GraphQL API.

AniList API: https://graphql.anilist.co  (no authentication required)
Rate limit: 90 req/min  ->  a 700 ms guard between calls is applied.
On HTTP 429 (rate-limit exceeded) the response Retry-After header is
honoured; the request is retried once automatically.

Singleton
---------
Only one instance of this class exists per process.  Subsequent calls to
AniListResolver() return the same object with its warm caches intact.

Provided features
-----------------
- Title-based AniList ID lookup with best-match scoring
- Manga statistics: score (0–10), rank, popularity, members, favorites
- Character list for a manga (names only — for <Characters> XML tag)
- Detailed character list: name, AniList character ID, image URL, role
- Detailed staff list: name, AniList person ID, image URL, positions
- Lazy full-detail fetches per character / person (for descriptions)

Dependencies
------------
    requests    ->  pip install requests
"""

from __future__ import annotations

import datetime
import time

import requests

from MediaResolver import MediaResolver
from TextUtils import best_similarity


# --------------------------------------------------------------------------
# GraphQL query strings
# --------------------------------------------------------------------------
# AniList models both manga and light novels as type MANGA; the format
# clause decides which of the two a search returns.  The placeholder is
# substituted at construction time (see `media_format`).
_SEARCH_MANGA_TEMPLATE = """
query ($search: String) {
  Page(page: 1, perPage: 5) {
    media(search: $search, type: MANGA, __FORMAT_CLAUSE__) {
      id title { romaji english native } siteUrl
    }
  }
}
"""

_FORMAT_CLAUSES = {
    "manga": "format_not_in: [NOVEL]",
    "novel": "format_in: [NOVEL]",
}

_MANGA_STATS = """
query ($id: Int) {
  Media(id: $id, type: MANGA) {
    id title { romaji english native }
    meanScore popularity favourites
    rankings { rank type allTime }
    siteUrl
  }
}
"""

_MANGA_CHARACTERS = """
query ($id: Int) {
  Media(id: $id, type: MANGA) {
    characters(sort: [ROLE, RELEVANCE], perPage: 25) {
      nodes { id name { full } image { large } siteUrl }
      edges { role }
    }
  }
}
"""

_MANGA_STAFF = """
query ($id: Int) {
  Media(id: $id, type: MANGA) {
    staff(perPage: 25) {
      nodes { id name { full } image { large } siteUrl }
      edges { role }
    }
  }
}
"""

_CHARACTER_DETAILS = """
query ($id: Int) {
  Character(id: $id) {
    id name { full } image { large }
    description(asHtml: false)
    favourites siteUrl
  }
}
"""

_PERSON_DETAILS = """
query ($id: Int) {
  Staff(id: $id) {
    id name { full native } image { large }
    description(asHtml: false)
    favourites siteUrl
    dateOfBirth { year month day }
    primaryOccupations
    homeTown
  }
}
"""

_ANILIST_GQL = "https://graphql.anilist.co"


class AniListResolver(MediaResolver):
    """
    Singleton: fetches and caches AniList manga data via GraphQL API.

    The first call to AniListResolver() creates and initialises the instance;
    all subsequent calls return the same object.
    """

    _instance: "AniListResolver | None" = None

    # ------------------------------------------------------------------
    # Singleton machinery
    # ------------------------------------------------------------------
    def __new__(cls, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self, *, request_timeout: int = 30,
                 media_format: str = "manga"):
        """
        media_format : "manga" (excludes novels) or "novel" (novels only).
                       Only the FIRST construction in the process sets it
                       (singleton); construct the resolver with the correct
                       format in the entry point / orchestrator.
        """
        if self._initialized:
            return

        if media_format not in _FORMAT_CLAUSES:
            raise ValueError(f"media_format must be one of "
                             f"{sorted(_FORMAT_CLAUSES)}, got {media_format!r}")
        self.media_format = media_format
        self._search_query = _SEARCH_MANGA_TEMPLATE.replace(
            "__FORMAT_CLAUSE__", _FORMAT_CLAUSES[media_format])

        self.request_timeout = request_timeout

        self._session = requests.Session()
        self._session.headers.update({
            "User-Agent":   "AniListResolver/1.0",
            "Content-Type": "application/json",
            "Accept":       "application/json",
        })

        # title_lower -> al_id
        self._id_cache: dict[str, "int | None"] = {}
        # al_id -> stats dict
        self._stats_cache: dict[int, dict] = {}
        # manga_al_id -> [name_str, ...]
        self._char_names_cache: dict[int, list[str]] = {}
        # manga_al_id -> [{al_id, name, image_url, role}]
        self._char_detailed_cache: dict[int, list[dict]] = {}
        # manga_al_id -> [{al_id, name, image_url, positions}]
        self._staff_detailed_cache: dict[int, list[dict]] = {}
        # char_al_id -> {al_id, name, image_url, about, favorites, url}
        self._char_info_cache: dict[int, dict] = {}
        # person_al_id -> {al_id, name, image_url, about, favorites, url, ...}
        self._person_info_cache: dict[int, dict] = {}

        self._last_request_at: float = 0.0
        self._initialized = True

    # ------------------------------------------------------------------
    # Public: ID lookup
    # ------------------------------------------------------------------
    def find_id(self, title: str) -> "int | None":
        """
        Searches AniList for a manga by title and returns the best-matching
        AniList ID.  Returns None on failure or when no result is found.
        """
        if not title or not title.strip():
            return None

        key = title.strip().lower()
        if key in self._id_cache:
            return self._id_cache[key]

        try:
            data = self._gql(self._search_query, {"search": title})
            results = ((data.get("data") or {})
                       .get("Page", {})
                       .get("media") or [])
        except requests.RequestException:
            return None

        if not results:
            self._id_cache[key] = None
            return None

        results.sort(key=lambda e: _score_title(title, e), reverse=True)
        al_id = results[0].get("id")
        self._id_cache[key] = al_id
        return al_id

    # ------------------------------------------------------------------
    # Public: statistics
    # ------------------------------------------------------------------
    def get_stats(self, tracker_id: "int | None") -> "dict | None":
        """
        Returns a statistics dict for the given AniList manga ID:

            {score, rank, scored_by, popularity, members, favorites,
             url, title, as_of (DD-MM-YYYY)}

        Returns None if tracker_id is None or on network failure.
        """
        if tracker_id is None:
            return None
        if tracker_id in self._stats_cache:
            return self._stats_cache[tracker_id]

        try:
            data = self._gql(_MANGA_STATS, {"id": tracker_id})
            entry = (data.get("data") or {}).get("Media") or {}
        except requests.RequestException:
            return None

        title_obj = entry.get("title") or {}
        title = (title_obj.get("romaji")
                 or title_obj.get("english")
                 or title_obj.get("native") or "")

        # AniList meanScore is 0–100; normalise to 0.0–10.0 for consistency
        # with the MALResolver stats dict shape.
        raw_score = entry.get("meanScore")
        score = round(raw_score / 10, 1) if raw_score is not None else None

        # Ranked and popularity ranks are in the rankings array.
        rated_rank  = None
        popular_rank = None
        for r in (entry.get("rankings") or []):
            if r.get("allTime"):
                if r.get("type") == "RATED"   and rated_rank  is None:
                    rated_rank  = r.get("rank")
                if r.get("type") == "POPULAR" and popular_rank is None:
                    popular_rank = r.get("rank")

        stats: dict = {
            "score":      score,
            "rank":       rated_rank,
            "scored_by":  None,            # not exposed by AniList API
            "popularity": popular_rank,
            "members":    entry.get("popularity"),   # AniList's popularity = member count
            "favorites":  entry.get("favourites"),
            "url":        entry.get("siteUrl") or f"https://anilist.co/manga/{tracker_id}",
            "title":      title,
            "as_of":      datetime.date.today().strftime("%d-%m-%Y"),
        }
        self._stats_cache[tracker_id] = stats
        return stats

    # ------------------------------------------------------------------
    # Public: character names (for ComicInfo <Characters> tag)
    # ------------------------------------------------------------------
    def get_characters(self, tracker_id: "int | None") -> list[str]:
        """Returns a flat list of character names for the manga."""
        if tracker_id is None:
            return []
        if tracker_id in self._char_names_cache:
            return self._char_names_cache[tracker_id]

        detailed = self.get_characters_detailed(tracker_id)
        names = [e["name"] for e in detailed if e.get("name")]
        if names:
            self._char_names_cache[tracker_id] = names
        return names

    # ------------------------------------------------------------------
    # Public: detailed character data
    # ------------------------------------------------------------------
    def get_characters_detailed(self, tracker_id: "int | None") -> list[dict]:
        """
        Returns detailed character entries for a manga:
            [{al_id, mal_id, name, image_url, role, about=None}, ...]
        """
        if tracker_id is None:
            return []
        if tracker_id in self._char_detailed_cache:
            return self._char_detailed_cache[tracker_id]

        try:
            data = self._gql(_MANGA_CHARACTERS, {"id": tracker_id})
            chars = ((data.get("data") or {})
                     .get("Media", {})
                     .get("characters") or {})
            nodes = chars.get("nodes") or []
            edges = chars.get("edges") or []
        except requests.RequestException:
            return []

        results = []
        for node, edge in zip(nodes, edges):
            name = (node.get("name") or {}).get("full") or ""
            if not name:
                continue
            results.append({
                "al_id":     node.get("id"),
                "mal_id":    None,
                "name":      name,
                "raw_name":  name,
                "image_url": (node.get("image") or {}).get("large"),
                "role":      edge.get("role") or "SUPPORTING",
                "about":     None,
            })

        if results:
            self._char_detailed_cache[tracker_id] = results
        return results

    # ------------------------------------------------------------------
    # Public: detailed staff data
    # ------------------------------------------------------------------
    def get_staff_detailed(self, tracker_id: "int | None") -> list[dict]:
        """
        Returns detailed staff entries for a manga:
            [{al_id, mal_id, name, image_url, positions, about=None}, ...]
        """
        if tracker_id is None:
            return []
        if tracker_id in self._staff_detailed_cache:
            return self._staff_detailed_cache[tracker_id]

        try:
            data = self._gql(_MANGA_STAFF, {"id": tracker_id})
            staff = ((data.get("data") or {})
                     .get("Media", {})
                     .get("staff") or {})
            nodes = staff.get("nodes") or []
            edges = staff.get("edges") or []
        except requests.RequestException:
            return []

        results = []
        for node, edge in zip(nodes, edges):
            name = (node.get("name") or {}).get("full") or ""
            if not name:
                continue
            results.append({
                "al_id":     node.get("id"),
                "mal_id":    None,
                "name":      name,
                "raw_name":  name,
                "image_url": (node.get("image") or {}).get("large"),
                "positions": [edge.get("role")] if edge.get("role") else [],
                "about":     None,
            })

        if results:
            self._staff_detailed_cache[tracker_id] = results
        return results

    # ------------------------------------------------------------------
    # Public: individual character / person details
    # ------------------------------------------------------------------
    def get_character_details(self, char_id: "int | None") -> "dict | None":
        """Returns full details for a single AniList character."""
        if char_id is None:
            return None
        if char_id in self._char_info_cache:
            return self._char_info_cache[char_id]

        try:
            data = self._gql(_CHARACTER_DETAILS, {"id": char_id})
            entry = (data.get("data") or {}).get("Character") or {}
        except requests.RequestException:
            return None

        result = {
            "al_id":     entry.get("id"),
            "mal_id":    None,
            "name":      (entry.get("name") or {}).get("full") or "",
            "image_url": (entry.get("image") or {}).get("large"),
            "about":     entry.get("description"),
            "favorites": entry.get("favourites"),
            "url":       entry.get("siteUrl") or f"https://anilist.co/character/{char_id}",
        }
        self._char_info_cache[char_id] = result
        return result

    def get_person_details(self, person_id: "int | None") -> "dict | None":
        """Returns full details for a single AniList staff person."""
        if person_id is None:
            return None
        if person_id in self._person_info_cache:
            return self._person_info_cache[person_id]

        try:
            data = self._gql(_PERSON_DETAILS, {"id": person_id})
            entry = (data.get("data") or {}).get("Staff") or {}
        except requests.RequestException:
            return None

        # dateOfBirth: {year, month, day} → ISO string for _format_birthday
        dob = entry.get("dateOfBirth") or {}
        birthday: "str | None" = None
        if dob.get("year"):
            m = dob.get("month") or 1
            d = dob.get("day") or 1
            birthday = f"{dob['year']}-{m:02d}-{d:02d}"

        name_obj = entry.get("name") or {}
        result = {
            "al_id":       entry.get("id"),
            "mal_id":      None,
            "name":        name_obj.get("full") or "",
            "given_name":  None,      # AniList does not break names into given/family
            "family_name": None,
            "birthday":    birthday,
            "image_url":   (entry.get("image") or {}).get("large"),
            "about":       entry.get("description"),
            "favorites":   entry.get("favourites"),
            "website_url": None,      # not exposed by AniList public API
            "url":         entry.get("siteUrl") or f"https://anilist.co/staff/{person_id}",
        }
        self._person_info_cache[person_id] = result
        return result

    # ------------------------------------------------------------------
    # Public: cache management
    # ------------------------------------------------------------------
    def clear_cache(self) -> None:
        """Clears all internal caches (the Singleton instance is retained)."""
        self._id_cache.clear()
        self._stats_cache.clear()
        self._char_names_cache.clear()
        self._char_detailed_cache.clear()
        self._staff_detailed_cache.clear()
        self._char_info_cache.clear()
        self._person_info_cache.clear()

    # ------------------------------------------------------------------
    # Internal: rate-limited GraphQL POST
    # ------------------------------------------------------------------
    def _gql(self, query: str, variables: "dict | None" = None) -> dict:
        """
        Rate-limited GraphQL POST request (respects AniList's 90 req/min limit).

        On HTTP 429 the Retry-After header is honoured and the request is
        retried once.
        """
        elapsed = time.monotonic() - self._last_request_at
        if elapsed < 0.7:
            time.sleep(0.7 - elapsed)

        payload: dict = {"query": query}
        if variables:
            payload["variables"] = variables

        resp = self._session.post(
            _ANILIST_GQL, json=payload, timeout=self.request_timeout)
        self._last_request_at = time.monotonic()

        if resp.status_code == 429:
            retry_after = int(resp.headers.get("Retry-After", 60))
            time.sleep(retry_after)
            resp = self._session.post(
                _ANILIST_GQL, json=payload, timeout=self.request_timeout)
            self._last_request_at = time.monotonic()

        resp.raise_for_status()
        return resp.json()


# --------------------------------------------------------------------------
# Module helpers
# --------------------------------------------------------------------------
def _score_title(query: str, entry: dict) -> float:
    """Returns the best title-similarity score for an AniList media entry."""
    title_obj = entry.get("title") or {}
    return best_similarity(query, (
        title_obj.get("romaji"),
        title_obj.get("english"),
        title_obj.get("native"),
    ))


# --------------------------------------------------------------------------
# Usage example
# --------------------------------------------------------------------------
if __name__ == "__main__":
    r1 = AniListResolver()
    r2 = AniListResolver()
    assert r1 is r2, "AniListResolver must be a Singleton"

    al_id = r1.find_id("Yofukashi no Uta")
    print("AniList ID   :", al_id)

    stats = r1.get_stats(al_id)
    if stats:
        print("Score        :", stats["score"])
        print("Rank         :", stats["rank"])
        print("Members      :", stats["members"])

    chars = r1.get_characters_detailed(al_id)
    print("Characters (first 3):", [c["name"] for c in chars[:3]])

    staff = r1.get_staff_detailed(al_id)
    print("Staff        :", [s["name"] for s in staff])