Files
j-novel-scrapper-translator/src/EpubHandler.py
T
2026-02-09 19:46:13 +01:00

163 lines
5.4 KiB
Python

import json
import os.path
from enum import unique
from os import scandir, listdir
import natsort
from ebooklib import epub
from bs4 import BeautifulSoup
import aspose.words as aw
from pprint import pprint
from sympy import false
from src.functions import makeDir, readFromFile, readFromJsonFile
class EpubHandler:
def __init__(self, htmlPath, epubPath):
self.htmlPath = htmlPath
self.epubPath = epubPath
makeDir(self.htmlPath)
makeDir(self.epubPath)
def convertHtmlToEpub(self, language: str, coverImagePath=None):
jsonPath = os.path.join(self.htmlPath, listdir(self.htmlPath)[0].replace(".html", ".json"))
infosDict = readFromJsonFile(jsonPath)
book = epub.EpubBook()
book.set_title(infosDict["seriesTitle"])
book.set_language(infosDict["currentLanguage"])
if coverImagePath:
book.set_cover("cover.jpg", readFromFile("rb", coverImagePath))
spine = ["nav"]
files = natsort.os_sorted([x.path for x in scandir(self.htmlPath) if not x.path.endswith(".json")])
toc = []
chapterDict = {}
for htmlFile in files:
if htmlFile.endswith(".json"):
continue
infosDict = readFromJsonFile(htmlFile.replace(".html", ".json"))
chapter = epub.EpubHtml(title=infosDict["chapterTitle"], file_name=f"chapter{infosDict['chapter']}.xhtml",
lang=language)
chapter.content = readFromFile("r", htmlFile)
book.add_item(chapter)
try:
chapterDict[int(infosDict["chapter"])] = chapter
except IndexError:
pprint(infosDict['chapter'])
# Dictionary nach Keys sortieren und Kapitel zur spine hinzufügen
for key in sorted(chapterDict.keys()):
chapter = chapterDict[key]
toc.append(epub.Link(chapter.file_name, chapter.title, chapter.title))
spine.append(chapter)
book.toc = toc
book.spine = spine
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(os.path.join(self.epubPath, f"{book.title}.epub"), book)
# ======================================================
# EPUB → HTML
# ======================================================
def epub_to_html(self, epub_file):
book = epub.read_epub(epub_file)
images_dir = os.path.join(self.htmlPath, "images")
makeDir(images_dir)
chapter_index = 1
for item_id, _ in book.spine:
item = book.get_item_with_id(item_id)
# Kapitel
if item.get_type() == epub.ITEM_DOCUMENT:
filename = f"{chapter_index:03d}_{os.path.basename(item.file_name)}"
filepath = os.path.join(self.htmlPath, filename)
with open(filepath, "wb") as f:
f.write(item.get_content())
chapter_index += 1
# Bilder
elif item.get_type() == epub.ITEM_IMAGE:
image_path = os.path.join(images_dir, os.path.basename(item.file_name))
with open(image_path, "wb") as f:
f.write(item.get_content())
print(f"✔ EPUB nach HTML exportiert ({chapter_index - 1} Kapitel)")
# ======================================================
# HTML → EPUB
# ======================================================
def html_to_epub(self, output_epub, title="Translated Book", lang="de"):
book = epub.EpubBook()
book.set_title(title)
book.set_language(lang)
# Kapitel laden (sortiert!)
html_files = sorted(
f for f in os.listdir(self.htmlPath)
if f.endswith(".html")
)
spine = ["nav"]
chapters = []
for html_file in html_files:
with open(os.path.join(self.htmlPath, html_file), "r", encoding="utf-8") as f:
content = f.read()
chapter = epub.EpubHtml(
title=html_file,
file_name=html_file,
content=content
)
book.add_item(chapter)
chapters.append(chapter)
spine.append(chapter)
# Bilder wieder einbinden
images_dir = os.path.join(self.htmlPath, "images")
if os.path.exists(images_dir):
for img in os.listdir(images_dir):
img_path = os.path.join(images_dir, img)
with open(img_path, "rb") as f:
image = epub.EpubItem(
uid=img,
file_name=f"images/{img}",
media_type=self._guess_mime(img),
content=f.read()
)
book.add_item(image)
book.spine = spine
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(output_epub, book)
print("✔ EPUB neu erstellt")
# ======================================================
# MIME helper
# ======================================================
def _guess_mime(self, filename):
ext = filename.lower().split(".")[-1]
return {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"svg": "image/svg+xml",
"webp": "image/webp"
}.get(ext, "application/octet-stream")