163 lines
5.4 KiB
Python
163 lines
5.4 KiB
Python
import json
|
|
import os.path
|
|
from enum import unique
|
|
from os import scandir, listdir
|
|
|
|
import natsort
|
|
from ebooklib import epub
|
|
from bs4 import BeautifulSoup
|
|
import aspose.words as aw
|
|
from pprint import pprint
|
|
|
|
from sympy import false
|
|
|
|
from src.functions import makeDir, readFromFile, readFromJsonFile
|
|
|
|
|
|
class EpubHandler:
|
|
def __init__(self, htmlPath, epubPath):
|
|
self.htmlPath = htmlPath
|
|
self.epubPath = epubPath
|
|
makeDir(self.htmlPath)
|
|
makeDir(self.epubPath)
|
|
|
|
def convertHtmlToEpub(self, language: str, coverImagePath=None):
|
|
jsonPath = os.path.join(self.htmlPath, listdir(self.htmlPath)[0].replace(".html", ".json"))
|
|
infosDict = readFromJsonFile(jsonPath)
|
|
|
|
book = epub.EpubBook()
|
|
book.set_title(infosDict["seriesTitle"])
|
|
book.set_language(infosDict["currentLanguage"])
|
|
if coverImagePath:
|
|
book.set_cover("cover.jpg", readFromFile("rb", coverImagePath))
|
|
spine = ["nav"]
|
|
|
|
files = natsort.os_sorted([x.path for x in scandir(self.htmlPath) if not x.path.endswith(".json")])
|
|
toc = []
|
|
chapterDict = {}
|
|
for htmlFile in files:
|
|
if htmlFile.endswith(".json"):
|
|
continue
|
|
|
|
infosDict = readFromJsonFile(htmlFile.replace(".html", ".json"))
|
|
|
|
chapter = epub.EpubHtml(title=infosDict["chapterTitle"], file_name=f"chapter{infosDict['chapter']}.xhtml",
|
|
lang=language)
|
|
chapter.content = readFromFile("r", htmlFile)
|
|
|
|
book.add_item(chapter)
|
|
try:
|
|
chapterDict[int(infosDict["chapter"])] = chapter
|
|
except IndexError:
|
|
pprint(infosDict['chapter'])
|
|
|
|
# Dictionary nach Keys sortieren und Kapitel zur spine hinzufügen
|
|
for key in sorted(chapterDict.keys()):
|
|
chapter = chapterDict[key]
|
|
toc.append(epub.Link(chapter.file_name, chapter.title, chapter.title))
|
|
spine.append(chapter)
|
|
|
|
book.toc = toc
|
|
book.spine = spine
|
|
book.add_item(epub.EpubNcx())
|
|
book.add_item(epub.EpubNav())
|
|
epub.write_epub(os.path.join(self.epubPath, f"{book.title}.epub"), book)
|
|
|
|
|
|
# ======================================================
|
|
# EPUB → HTML
|
|
# ======================================================
|
|
def epub_to_html(self, epub_file):
|
|
book = epub.read_epub(epub_file)
|
|
|
|
images_dir = os.path.join(self.htmlPath, "images")
|
|
makeDir(images_dir)
|
|
|
|
chapter_index = 1
|
|
|
|
for item_id, _ in book.spine:
|
|
item = book.get_item_with_id(item_id)
|
|
|
|
# Kapitel
|
|
if item.get_type() == epub.ITEM_DOCUMENT:
|
|
filename = f"{chapter_index:03d}_{os.path.basename(item.file_name)}"
|
|
filepath = os.path.join(self.htmlPath, filename)
|
|
|
|
with open(filepath, "wb") as f:
|
|
f.write(item.get_content())
|
|
|
|
chapter_index += 1
|
|
|
|
# Bilder
|
|
elif item.get_type() == epub.ITEM_IMAGE:
|
|
image_path = os.path.join(images_dir, os.path.basename(item.file_name))
|
|
with open(image_path, "wb") as f:
|
|
f.write(item.get_content())
|
|
|
|
print(f"✔ EPUB nach HTML exportiert ({chapter_index - 1} Kapitel)")
|
|
|
|
# ======================================================
|
|
# HTML → EPUB
|
|
# ======================================================
|
|
def html_to_epub(self, output_epub, title="Translated Book", lang="de"):
|
|
book = epub.EpubBook()
|
|
book.set_title(title)
|
|
book.set_language(lang)
|
|
|
|
# Kapitel laden (sortiert!)
|
|
html_files = sorted(
|
|
f for f in os.listdir(self.htmlPath)
|
|
if f.endswith(".html")
|
|
)
|
|
|
|
spine = ["nav"]
|
|
chapters = []
|
|
|
|
for html_file in html_files:
|
|
with open(os.path.join(self.htmlPath, html_file), "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
chapter = epub.EpubHtml(
|
|
title=html_file,
|
|
file_name=html_file,
|
|
content=content
|
|
)
|
|
book.add_item(chapter)
|
|
chapters.append(chapter)
|
|
spine.append(chapter)
|
|
|
|
# Bilder wieder einbinden
|
|
images_dir = os.path.join(self.htmlPath, "images")
|
|
if os.path.exists(images_dir):
|
|
for img in os.listdir(images_dir):
|
|
img_path = os.path.join(images_dir, img)
|
|
with open(img_path, "rb") as f:
|
|
image = epub.EpubItem(
|
|
uid=img,
|
|
file_name=f"images/{img}",
|
|
media_type=self._guess_mime(img),
|
|
content=f.read()
|
|
)
|
|
book.add_item(image)
|
|
|
|
book.spine = spine
|
|
book.add_item(epub.EpubNcx())
|
|
book.add_item(epub.EpubNav())
|
|
|
|
epub.write_epub(output_epub, book)
|
|
print("✔ EPUB neu erstellt")
|
|
|
|
# ======================================================
|
|
# MIME helper
|
|
# ======================================================
|
|
def _guess_mime(self, filename):
|
|
ext = filename.lower().split(".")[-1]
|
|
return {
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"png": "image/png",
|
|
"gif": "image/gif",
|
|
"svg": "image/svg+xml",
|
|
"webp": "image/webp"
|
|
}.get(ext, "application/octet-stream")
|