fix
This commit is contained in:
+3
-7
@@ -1,12 +1,8 @@
|
||||
from scr.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
|
||||
from scr.WebScrapper.FenrirealmWebScrapper import FenrirealmWeb
|
||||
from scr.WebScrapper.WebScrapper import WebScrapper
|
||||
from scr.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
|
||||
from scr.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
|
||||
from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
|
||||
from src.WebScrapper.WebScrapper import WebScrapper
|
||||
|
||||
|
||||
|
||||
scrapper = WebScrapper.Get(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
|
||||
scrapper = OniichanyameteWebScrapper(r"https://oniichanyamete.moe/index/bunnygirl/", "html", "en")
|
||||
for chapterNumber, link in scrapper._getChapterLinks(1, None):
|
||||
print(chapterNumber, link)
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
from pprint import pprint
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from src.WebScrapper.WebScrapper import WebScrapper
|
||||
|
||||
@@ -13,7 +16,7 @@ class OniichanyameteWebScrapper(WebScrapper):
|
||||
|
||||
|
||||
def _getChapterContent(self, soup:BeautifulSoup) -> str:
|
||||
return soup.select("div", {"class": "chapter-content"})
|
||||
return soup.select("div", {"class": "chapter-content"})[0].prettify()
|
||||
|
||||
|
||||
def _addChapterTitle(self, soup:BeautifulSoup, content:BeautifulSoup|NavigableString, infoDict:dict) -> str:
|
||||
@@ -21,15 +24,15 @@ class OniichanyameteWebScrapper(WebScrapper):
|
||||
infoDict["chapterTitle"] = ""
|
||||
infoDict["author"] = ""
|
||||
|
||||
return super()._addChapterTitle(soup, content, chapterNumber, infoDict)
|
||||
return super()._addChapterTitle(soup, content, infoDict)
|
||||
|
||||
|
||||
|
||||
def _getChapterLinks(self, fromChapter:int, toChapter:int, uriWithFormat:str=None, soup:BeautifulSoup=None):
|
||||
soup = BeautifulSoup(requests.get(self.baseLink, headers=headers).content, "html.parser")
|
||||
soup = BeautifulSoup(requests.get(self.baseLink).content, "html.parser")
|
||||
|
||||
links =soup.select("p[style*='padding-left:60px'] > a")
|
||||
pprint (links)
|
||||
pprint(links)
|
||||
|
||||
|
||||
for link in links:
|
||||
|
||||
@@ -5,6 +5,11 @@ from urllib.parse import urljoin
|
||||
from pprint import pprint
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
# from src.WebScrapper.FanmtlWebScrapper import FanmtlWebScrapper
|
||||
# from src.WebScrapper.FenrirealmWebScrapper import FenrirealmWebScrapper
|
||||
# from src.WebScrapper.OniichanyameteWebScrapper import OniichanyameteWebScrapper
|
||||
# from src.WebScrapper.SyosetuWebScrapper import SyosetuWebScrapper
|
||||
from src.functions import writeToFile, makeDir, writeToJsonFile
|
||||
|
||||
|
||||
@@ -17,18 +22,18 @@ class WebScrapper:
|
||||
self.currentLanguage = currentLanguage
|
||||
makeDir(self.htmlFolderPath)
|
||||
|
||||
@staticmethod
|
||||
def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str) -> self:
|
||||
if "fanmtl.com" in baseLink:
|
||||
return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
elif "syosetu.com" in baseLink:
|
||||
return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
elif "fenrirealm.com" in baseLink:
|
||||
return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
elif "oniichanyamete.moe" in baseLink:
|
||||
return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
else:
|
||||
raise Exception(f"Website {baseLink} is not supported for scraping yet.")
|
||||
# @staticmethod
|
||||
# def Get(baseLink:str, htmlFolderPath:str, currentLanguage:str):
|
||||
# if "fanmtl.com" in baseLink:
|
||||
# return FanmtlWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
# elif "syosetu.com" in baseLink:
|
||||
# return SyosetuWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
# elif "fenrirealm.com" in baseLink:
|
||||
# return FenrirealmWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
# elif "oniichanyamete.moe" in baseLink:
|
||||
# return OniichanyameteWebScrapper(baseLink, htmlFolderPath, currentLanguage)
|
||||
# else:
|
||||
# raise Exception(f"Website {baseLink} is not supported for scraping yet.")
|
||||
|
||||
|
||||
@staticmethod
|
||||
@@ -138,7 +143,7 @@ class WebScrapper:
|
||||
currentChapter = fromChapter
|
||||
|
||||
while currentChapter <= toChapter:
|
||||
yield currentChapter, urljoin(self.baseLink, currentChapter)
|
||||
yield currentChapter, urljoin(self.baseLink, str(currentChapter))
|
||||
currentChapter += 1
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user