fix

2026-05-21 10:53:56 +02:00
parent 6c09053ff0
commit 2bee0bc362
3 changed files with 28 additions and 24 deletions
@@ -1,12 +1,8 @@
-from scr.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
+from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
-from scr.WebScrapper.FenrirealmWebScrapper import FenrirealmWeb
+from src.WebScrapper.WebScrapper import WebScrapper
 from scr.WebScrapper.WebScrapper import WebScrapper
 from scr.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
 from scr.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
-
+scrapper = OniichanyameteWebScrapper(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
 scrapper = WebScrapper.Get(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
 for chapterNumber, link in scrapper._getChapterLinks(1, None):
    print(chapterNumber, link)
@@ -1,6 +1,9 @@
 import re
 import time
 from urllib.parse import urljoin
 import requests
 from pprint import pprint
 from bs4 import BeautifulSoup, NavigableString
 from src.WebScrapper.WebScrapper import WebScrapper
@@ -13,7 +16,7 @@ class OniichanyameteWebScrapper(WebScrapper):
    def _getChapterContent(self, soup:BeautifulSoup) -> str:
-        return soup.select("div", {"class": "chapter-content"})
+        return soup.select("div", {"class": "chapter-content"})[0].prettify()
    def _addChapterTitle(self, soup:BeautifulSoup, content:BeautifulSoup|NavigableString, infoDict:dict) -> str:
@@ -21,15 +24,15 @@ class OniichanyameteWebScrapper(WebScrapper):
        infoDict["chapterTitle"] = ""
        infoDict["author"] = ""
-        return super()._addChapterTitle(soup, content, chapterNumber, infoDict)
+        return super()._addChapterTitle(soup, content, infoDict)
    def _getChapterLinks(self, fromChapter:int, toChapter:int, uriWithFormat:str=None, soup:BeautifulSoup=None):
-        soup = BeautifulSoup(requests.get(self.baseLink, headers=headers).content, "html.parser")
+        soup = BeautifulSoup(requests.get(self.baseLink).content, "html.parser")
        links =soup.select("p[style*='padding-left:60px'] > a")
-        pprint (links)
+        pprint(links)
        for link in links:
@@ -5,6 +5,11 @@ from urllib.parse import urljoin
 from pprint import pprint
 import requests
 from bs4 import BeautifulSoup, NavigableString
 # from src.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
 # from src.WebScrapper.FenrirealmWebScrapper import FenrirealmWebScrapper
 # from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
 # from src.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
 from src.functions import writeToFile, makeDir, writeToJsonFile
@@ -17,18 +22,18 @@ class WebScrapper:
        self.currentLanguage = currentLanguage
        makeDir(self.htmlFolderPath)
-    @staticmethod
+    # @staticmethod
-    def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str) -> self:
+    # def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str):
-        if "fanmtl.com" in baseLink:
+    #     if "fanmtl.com" in baseLink:
-            return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #         return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "syosetu.com" in baseLink:
+    #     elif "syosetu.com" in baseLink:
-            return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #         return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "fenrirealm.com" in baseLink:
+    #     elif "fenrirealm.com" in baseLink:
-            return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #         return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "oniichanyamete.moe" in baseLink:
+    #     elif "oniichanyamete.moe" in baseLink:
-            return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #         return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        else:
+    #     else:
-            raise Exception(f"Website {baseLink} is not supported for scraping yet.")
+    #         raise Exception(f"Website {baseLink} is not supported for scraping yet.")
    @staticmethod
@@ -138,7 +143,7 @@ class WebScrapper:
        currentChapter = fromChapter
        while currentChapter <= toChapter:
-            yield currentChapter, urljoin(self.baseLink, currentChapter)
+            yield currentChapter, urljoin(self.baseLink, str(currentChapter))
            currentChapter += 1