From 2bee0bc3623ec4d4df9202825769c0b53b3ac0d8 Mon Sep 17 00:00:00 2001 From: JohannesBOT Date: Thu, 21 May 2026 10:53:56 +0200 Subject: [PATCH] fix --- scrapperTest.py | 10 ++----- src/WebScrapper/OniichanyameteWebScrapper.py | 11 ++++--- src/WebScrapper/WebScrapper.py | 31 ++++++++++++-------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/scrapperTest.py b/scrapperTest.py index af324ec..8d7d86b 100644 --- a/scrapperTest.py +++ b/scrapperTest.py @@ -1,12 +1,8 @@ -from scr.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper -from scr.WebScrapper.FenrirealmWebScrapper import FenrirealmWeb -from scr.WebScrapper.WebScrapper import WebScrapper -from scr.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper -from scr.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper +from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper +from src.WebScrapper.WebScrapper import WebScrapper - -scrapper = WebScrapper.Get(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en") +scrapper = OniichanyameteWebScrapper(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en") for chapterNumber, link in scrapper._getChapterLinks(1, None): print(chapterNumber, link) diff --git a/src/WebScrapper/OniichanyameteWebScrapper.py b/src/WebScrapper/OniichanyameteWebScrapper.py index 1d28b78..e30ce7d 100644 --- a/src/WebScrapper/OniichanyameteWebScrapper.py +++ b/src/WebScrapper/OniichanyameteWebScrapper.py @@ -1,6 +1,9 @@ import re import time from urllib.parse import urljoin + +import requests +from pprint import pprint from bs4 import BeautifulSoup, NavigableString from src.WebScrapper.WebScrapper import WebScrapper @@ -13,7 +16,7 @@ class OniichanyameteWebScrapper(WebScrapper): def _getChapterContent(self, soup:BeautifulSoup) -> str: - return soup.select("div", {"class": "chapter-content"}) + return soup.select("div", {"class": "chapter-content"})[0].prettify() def _addChapterTitle(self, soup:BeautifulSoup, content:BeautifulSoup|NavigableString, infoDict:dict) -> str: @@ -21,15 +24,15 @@ class OniichanyameteWebScrapper(WebScrapper): infoDict["chapterTitle"] = "" infoDict["author"] = "" - return super()._addChapterTitle(soup, content, chapterNumber, infoDict) + return super()._addChapterTitle(soup, content, infoDict) def _getChapterLinks(self, fromChapter:int, toChapter:int, uriWithFormat:str=None, soup:BeautifulSoup=None): - soup = BeautifulSoup(requests.get(self.baseLink, headers=headers).content, "html.parser") + soup = BeautifulSoup(requests.get(self.baseLink).content, "html.parser") links =soup.select("p[style*='padding-left:60px'] > a") - pprint (links) + pprint(links) for link in links: diff --git a/src/WebScrapper/WebScrapper.py b/src/WebScrapper/WebScrapper.py index 226831c..2724691 100644 --- a/src/WebScrapper/WebScrapper.py +++ b/src/WebScrapper/WebScrapper.py @@ -5,6 +5,11 @@ from urllib.parse import urljoin from pprint import pprint import requests from bs4 import BeautifulSoup, NavigableString + +# from src.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper +# from src.WebScrapper.FenrirealmWebScrapper import FenrirealmWebScrapper +# from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper +# from src.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper from src.functions import writeToFile, makeDir, writeToJsonFile @@ -17,18 +22,18 @@ class WebScrapper: self.currentLanguage = currentLanguage makeDir(self.htmlFolderPath) - @staticmethod - def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str) -> self: - if "fanmtl.com" in baseLink: - return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage) - elif "syosetu.com" in baseLink: - return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage) - elif "fenrirealm.com" in baseLink: - return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage) - elif "oniichanyamete.moe" in baseLink: - return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage) - else: - raise Exception(f"Website {baseLink} is not supported for scraping yet.") + # @staticmethod + # def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str): + # if "fanmtl.com" in baseLink: + # return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage) + # elif "syosetu.com" in baseLink: + # return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage) + # elif "fenrirealm.com" in baseLink: + # return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage) + # elif "oniichanyamete.moe" in baseLink: + # return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage) + # else: + # raise Exception(f"Website {baseLink} is not supported for scraping yet.") @staticmethod @@ -138,7 +143,7 @@ class WebScrapper: currentChapter = fromChapter while currentChapter <= toChapter: - yield currentChapter, urljoin(self.baseLink, currentChapter) + yield currentChapter, urljoin(self.baseLink, str(currentChapter)) currentChapter += 1