This commit is contained in:
2026-05-21 10:53:56 +02:00
parent 6c09053ff0
commit 2bee0bc362
3 changed files with 28 additions and 24 deletions
+3 -7
View File
@@ -1,12 +1,8 @@
from scr.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
from scr.WebScrapper.FenrirealmWebScrapper import FenrirealmWeb
from scr.WebScrapper.WebScrapper import WebScrapper
from scr.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
from scr.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
from src.WebScrapper.WebScrapper import WebScrapper
scrapper = WebScrapper.Get(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
scrapper = OniichanyameteWebScrapper(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
for chapterNumber, link in scrapper._getChapterLinks(1, None):
print(chapterNumber, link)
+7 -4
View File
@@ -1,6 +1,9 @@
import re
import time
from urllib.parse import urljoin
import requests
from pprint import pprint
from bs4 import BeautifulSoup, NavigableString
from src.WebScrapper.WebScrapper import WebScrapper
@@ -13,7 +16,7 @@ class OniichanyameteWebScrapper(WebScrapper):
def _getChapterContent(self, soup:BeautifulSoup) -> str:
return soup.select("div", {"class": "chapter-content"})
return soup.select("div", {"class": "chapter-content"})[0].prettify()
def _addChapterTitle(self, soup:BeautifulSoup, content:BeautifulSoup|NavigableString, infoDict:dict) -> str:
@@ -21,15 +24,15 @@ class OniichanyameteWebScrapper(WebScrapper):
infoDict["chapterTitle"] = ""
infoDict["author"] = ""
return super()._addChapterTitle(soup, content, chapterNumber, infoDict)
return super()._addChapterTitle(soup, content, infoDict)
def _getChapterLinks(self, fromChapter:int, toChapter:int, uriWithFormat:str=None, soup:BeautifulSoup=None):
soup = BeautifulSoup(requests.get(self.baseLink, headers=headers).content, "html.parser")
soup = BeautifulSoup(requests.get(self.baseLink).content, "html.parser")
links =soup.select("p[style*='padding-left:60px'] > a")
pprint (links)
pprint(links)
for link in links:
+18 -13
View File
@@ -5,6 +5,11 @@ from urllib.parse import urljoin
from pprint import pprint
import requests
from bs4 import BeautifulSoup, NavigableString
# from src.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
# from src.WebScrapper.FenrirealmWebScrapper import FenrirealmWebScrapper
# from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
# from src.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
from src.functions import writeToFile, makeDir, writeToJsonFile
@@ -17,18 +22,18 @@ class WebScrapper:
self.currentLanguage = currentLanguage
makeDir(self.htmlFolderPath)
@staticmethod
def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str) -> self:
if "fanmtl.com" in baseLink:
return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
elif "syosetu.com" in baseLink:
return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
elif "fenrirealm.com" in baseLink:
return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
elif "oniichanyamete.moe" in baseLink:
return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
else:
raise Exception(f"Website {baseLink} is not supported for scraping yet.")
# @staticmethod
# def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str):
# if "fanmtl.com" in baseLink:
# return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
# elif "syosetu.com" in baseLink:
# return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
# elif "fenrirealm.com" in baseLink:
# return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
# elif "oniichanyamete.moe" in baseLink:
# return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
# else:
# raise Exception(f"Website {baseLink} is not supported for scraping yet.")
@staticmethod
@@ -138,7 +143,7 @@ class WebScrapper:
currentChapter = fromChapter
while currentChapter <= toChapter:
yield currentChapter, urljoin(self.baseLink, currentChapter)
yield currentChapter, urljoin(self.baseLink, str(currentChapter))
currentChapter += 1