j-novel-scrapper-translator/src/EpubHandler.py

import json
import os.path
from enum import unique
from os import scandir, listdir

import natsort
from ebooklib import epub
from bs4 import BeautifulSoup
import aspose.words as aw
from pprint import pprint

from sympy import false

from src.functions import makeDir, readFromFile, readFromJsonFile


class EpubHandler:
    def __init__(self, htmlPath, epubPath):
        self.htmlPath = htmlPath
        self.epubPath = epubPath
        makeDir(self.htmlPath)
        makeDir(self.epubPath)

    def convertHtmlToEpub(self, language: str, coverImagePath=None):
        jsonPath = os.path.join(self.htmlPath, listdir(self.htmlPath)[0].replace(".html", ".json"))
        infosDict = readFromJsonFile(jsonPath)

        book = epub.EpubBook()
        book.set_title(infosDict["seriesTitle"])
        book.set_language(infosDict["currentLanguage"])
        if coverImagePath:
            book.set_cover("cover.jpg", readFromFile("rb", coverImagePath))
        spine = ["nav"]

        files = natsort.os_sorted([x.path for x in scandir(self.htmlPath) if not x.path.endswith(".json")])
        toc = []
        chapterDict = {}
        for htmlFile in files:
            if htmlFile.endswith(".json"):
                continue

            infosDict = readFromJsonFile(htmlFile.replace(".html", ".json"))

            chapter = epub.EpubHtml(title=infosDict["chapterTitle"], file_name=f"chapter{infosDict['chapter']}.xhtml",
                                    lang=language)
            chapter.content = readFromFile("r", htmlFile)

            book.add_item(chapter)
            try:
                chapterDict[int(infosDict["chapter"])] = chapter
            except IndexError:
                pprint(infosDict['chapter'])

        # Dictionary nach Keys sortieren und Kapitel zur spine hinzufügen
        for key in sorted(chapterDict.keys()):
            chapter = chapterDict[key]
            toc.append(epub.Link(chapter.file_name, chapter.title, chapter.title))
            spine.append(chapter)

        book.toc = toc
        book.spine = spine
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        epub.write_epub(os.path.join(self.epubPath, f"{book.title}.epub"), book)


    # ======================================================
    # EPUB → HTML
    # ======================================================
    def epub_to_html(self, epub_file):
        book = epub.read_epub(epub_file)

        images_dir = os.path.join(self.htmlPath, "images")
        makeDir(images_dir)

        chapter_index = 1

        for item_id, _ in book.spine:
            item = book.get_item_with_id(item_id)

            # Kapitel
            if item.get_type() == epub.ITEM_DOCUMENT:
                filename = f"{chapter_index:03d}_{os.path.basename(item.file_name)}"
                filepath = os.path.join(self.htmlPath, filename)

                with open(filepath, "wb") as f:
                    f.write(item.get_content())

                chapter_index += 1

            # Bilder
            elif item.get_type() == epub.ITEM_IMAGE:
                image_path = os.path.join(images_dir, os.path.basename(item.file_name))
                with open(image_path, "wb") as f:
                    f.write(item.get_content())

        print(f"✔ EPUB nach HTML exportiert ({chapter_index - 1} Kapitel)")

    # ======================================================
    # HTML → EPUB
    # ======================================================
    def html_to_epub(self, output_epub, title="Translated Book", lang="de"):
        book = epub.EpubBook()
        book.set_title(title)
        book.set_language(lang)

        # Kapitel laden (sortiert!)
        html_files = sorted(
            f for f in os.listdir(self.htmlPath)
            if f.endswith(".html")
        )

        spine = ["nav"]
        chapters = []

        for html_file in html_files:
            with open(os.path.join(self.htmlPath, html_file), "r", encoding="utf-8") as f:
                content = f.read()

            chapter = epub.EpubHtml(
                title=html_file,
                file_name=html_file,
                content=content
            )
            book.add_item(chapter)
            chapters.append(chapter)
            spine.append(chapter)

        # Bilder wieder einbinden
        images_dir = os.path.join(self.htmlPath, "images")
        if os.path.exists(images_dir):
            for img in os.listdir(images_dir):
                img_path = os.path.join(images_dir, img)
                with open(img_path, "rb") as f:
                    image = epub.EpubItem(
                        uid=img,
                        file_name=f"images/{img}",
                        media_type=self._guess_mime(img),
                        content=f.read()
                    )
                    book.add_item(image)

        book.spine = spine
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        epub.write_epub(output_epub, book)
        print("✔ EPUB neu erstellt")

    # ======================================================
    # MIME helper
    # ======================================================
    def _guess_mime(self, filename):
        ext = filename.lower().split(".")[-1]
        return {
            "jpg": "image/jpeg",
            "jpeg": "image/jpeg",
            "png": "image/png",
            "gif": "image/gif",
            "svg": "image/svg+xml",
            "webp": "image/webp"
        }.get(ext, "application/octet-stream")