fix

2026-05-21 10:53:56 +02:00
parent 6c09053ff0
commit 2bee0bc362
3 changed files with 28 additions and 24 deletions
@@ -1,12 +1,8 @@
-from scr.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
-from scr.WebScrapper.FenrirealmWebScrapper import FenrirealmWeb
-from scr.WebScrapper.WebScrapper import WebScrapper
-from scr.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
-from scr.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
+from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
+from src.WebScrapper.WebScrapper import WebScrapper


-
-scrapper = WebScrapper.Get(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
+scrapper = OniichanyameteWebScrapper(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
 for chapterNumber, link in scrapper._getChapterLinks(1, None):
    print(chapterNumber, link)

@@ -1,6 +1,9 @@
 import re
 import time
 from urllib.parse import urljoin
+
+import requests
+from pprint import pprint
 from bs4 import BeautifulSoup, NavigableString
 from src.WebScrapper.WebScrapper import WebScrapper

@@ -13,7 +16,7 @@ class OniichanyameteWebScrapper(WebScrapper):


    def _getChapterContent(self, soup:BeautifulSoup) -> str:
-        return soup.select("div", {"class": "chapter-content"})
+        return soup.select("div", {"class": "chapter-content"})[0].prettify()


    def _addChapterTitle(self, soup:BeautifulSoup, content:BeautifulSoup|NavigableString, infoDict:dict) -> str:
@@ -21,15 +24,15 @@ class OniichanyameteWebScrapper(WebScrapper):
        infoDict["chapterTitle"] = ""
        infoDict["author"] = ""

-        return super()._addChapterTitle(soup, content, chapterNumber, infoDict)
+        return super()._addChapterTitle(soup, content, infoDict)



    def _getChapterLinks(self, fromChapter:int, toChapter:int, uriWithFormat:str=None, soup:BeautifulSoup=None):
-        soup = BeautifulSoup(requests.get(self.baseLink, headers=headers).content, "html.parser")
+        soup = BeautifulSoup(requests.get(self.baseLink).content, "html.parser")

        links =soup.select("p[style*='padding-left:60px'] > a")
-        pprint (links)
+        pprint(links)
        
        
        for link in links:
@@ -5,6 +5,11 @@ from urllib.parse import urljoin
 from pprint import pprint
 import requests
 from bs4 import BeautifulSoup, NavigableString
+
+# from src.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
+# from src.WebScrapper.FenrirealmWebScrapper import FenrirealmWebScrapper
+# from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
+# from src.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
 from src.functions import writeToFile, makeDir, writeToJsonFile


@@ -17,18 +22,18 @@ class WebScrapper:
        self.currentLanguage = currentLanguage
        makeDir(self.htmlFolderPath)

-    @staticmethod
-    def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str) -> self:
-        if "fanmtl.com" in baseLink:
-            return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "syosetu.com" in baseLink:
-            return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "fenrirealm.com" in baseLink:
-            return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        elif "oniichanyamete.moe" in baseLink:
-            return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
-        else:
-            raise Exception(f"Website {baseLink} is not supported for scraping yet.")
+    # @staticmethod
+    # def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str):
+    #     if "fanmtl.com" in baseLink:
+    #         return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #     elif "syosetu.com" in baseLink:
+    #         return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #     elif "fenrirealm.com" in baseLink:
+    #         return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #     elif "oniichanyamete.moe" in baseLink:
+    #         return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
+    #     else:
+    #         raise Exception(f"Website {baseLink} is not supported for scraping yet.")
    

    @staticmethod
@@ -138,7 +143,7 @@ class WebScrapper:
        currentChapter = fromChapter
        
        while currentChapter <= toChapter:
-            yield currentChapter, urljoin(self.baseLink, currentChapter)
+            yield currentChapter, urljoin(self.baseLink, str(currentChapter))
            currentChapter += 1