Compare commits
2 Commits
7aff871577
...
79123c8780
| Author | SHA1 | Date | |
|---|---|---|---|
| 79123c8780 | |||
| c6e7aa0da6 |
2
main.py
2
main.py
@@ -5,4 +5,4 @@ if __name__ == "__main__":
|
|||||||
sprit = Spritpreise(location="Linnich", radius=30)
|
sprit = Spritpreise(location="Linnich", radius=30)
|
||||||
sprit.setDbConnection(dbHost, dbPort, dbUser, dbPassword, dbType)
|
sprit.setDbConnection(dbHost, dbPort, dbUser, dbPassword, dbType)
|
||||||
sprit.createDb()
|
sprit.createDb()
|
||||||
#sprit.getAllPricesSchedule()
|
sprit.getAllPrices()
|
||||||
146
src/webScraper/DbEntity.py
Normal file
146
src/webScraper/DbEntity.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
import mysql.connector
|
||||||
|
import sqlite3
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
|
class DbEntity:
|
||||||
|
def __init__(self):
|
||||||
|
self.currentFolder = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
self.dbType = ""
|
||||||
|
self.dbName = ""
|
||||||
|
self.dbHost = ""
|
||||||
|
self.dbPort = ""
|
||||||
|
self.dbUser = ""
|
||||||
|
self.dbPassword = ""
|
||||||
|
|
||||||
|
|
||||||
|
def setDbConnection(self, dbHost:str, dbPort:str, dbUser, dbPassword:str, dbType:Literal["mysql", "pgsql"]):
|
||||||
|
self.dbType = dbType
|
||||||
|
self.dbName = "Spritpreise"
|
||||||
|
self.dbHost = dbHost
|
||||||
|
self.dbPort = dbPort
|
||||||
|
self.dbUser = dbUser
|
||||||
|
self.dbPassword = dbPassword
|
||||||
|
|
||||||
|
|
||||||
|
def _getDbConnection(self):
|
||||||
|
if self.dbType == "mysql":
|
||||||
|
connection = mysql.connector.connect(host=self.dbHost, user=self.dbUser, password=self.dbPassword,
|
||||||
|
database=self.dbName, port=self.dbPort)
|
||||||
|
elif self.dbType == "pgsql":
|
||||||
|
connection = psycopg2.connect(dbname=self.dbName, user=self.dbUser, password=self.dbPassword,
|
||||||
|
host=self.dbHost, port=self.dbPort)
|
||||||
|
else:
|
||||||
|
raise Exception("Wrong DB type")
|
||||||
|
|
||||||
|
cursor = connection.cursor()
|
||||||
|
return connection, cursor
|
||||||
|
|
||||||
|
|
||||||
|
def _getSqlLiteConnection(self):
|
||||||
|
connection = sqlite3.connect(os.path.join(self.currentFolder, "Spritpreise.db"))
|
||||||
|
cursor = connection.cursor()
|
||||||
|
return connection, cursor
|
||||||
|
|
||||||
|
|
||||||
|
def createDb(self):
|
||||||
|
try:
|
||||||
|
if not os.path.isfile(os.path.join(self.currentFolder, "Spritpreise.db")):
|
||||||
|
print("Creating sqllite database")
|
||||||
|
# with open(os.path.join(self.currentFolder, "Spritpreise.db"), "w"): pass
|
||||||
|
|
||||||
|
connection, cursor = self.__getSqlLiteConnection()
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS \"spritpreis_header\" (\
|
||||||
|
\"id\" INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||||
|
\"name\" TEXT NOT NULL,\
|
||||||
|
\"street\" TEXT NOT NULL,\
|
||||||
|
\"houseNumber\" TEXT NOT NULL,\
|
||||||
|
\"city\" TEXT NOT NULL,\
|
||||||
|
\"zipCode\" TEXT NOT NULL\
|
||||||
|
);\
|
||||||
|
-- Indices\
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS \"spritpreise_header_pkey\" ON \"spritpreis_header\" (\"id\");\
|
||||||
|
CREATE INDEX IF NOT EXISTS \"idx\" ON \"spritpreis_header\" (\"city\");"
|
||||||
|
)
|
||||||
|
print("created spritpreis_header for sqlite")
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS \"spritpreis_position\" (\
|
||||||
|
\"id\" INTEGER PRIMARY KEY AUTOINCREMENT,\
|
||||||
|
\"of_spritpreis_header\" INTEGER NOT NULL,\
|
||||||
|
\"time\" TEXT NOT NULL,\
|
||||||
|
\"diesel_price\" REAL,\
|
||||||
|
\"e10_price\" REAL,\
|
||||||
|
\"e5_price\" REAL,\
|
||||||
|
\"super_plus_price\" REAL,\
|
||||||
|
CONSTRAINT \"foreign_idx\" FOREIGN KEY (\"of_spritpreis_header\") REFERENCES \"spritpreis_header\"(\"id\") ON DELETE CASCADE\
|
||||||
|
);\
|
||||||
|
\
|
||||||
|
-- Indices\
|
||||||
|
CREATE INDEX IF NOT EXISTS \"fki_F\" ON \"spritpreis_position\" (\"of_spritpreis_header\");\
|
||||||
|
-- CREATE INDEX IF NOT EXISTS \"idx_time\" ON \"spritpreis_position\" (\"time\");\
|
||||||
|
-- CREATE INDEX IF NOT EXISTS \"idx_diesel\" ON \"spritpreis_position\" (\"diesel_price\");\
|
||||||
|
-- CREATE INDEX IF NOT EXISTS \"idx_e10\" ON \"spritpreis_position\" (\"e10_price\");\
|
||||||
|
-- CREATE INDEX IF NOT EXISTS \"idx_e5\" ON \"spritpreis_position\" (\"e5_price\");\
|
||||||
|
-- CREATE INDEX IF NOT EXISTS \"idx_super_plus\" ON \"spritpreis_position\" (\"super_plus_price\");"
|
||||||
|
)
|
||||||
|
print("created spritpreis_position for sqlite")
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
cursor.close()
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
connection, cursor = self.__getDbConnection()
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE SEQUENCE IF NOT EXISTS spritpreise_header_id_seq;\
|
||||||
|
-- Table Definition\
|
||||||
|
CREATE TABLE IF NOT EXISTS \"public\".\"spritpreis_header\" (\
|
||||||
|
\"id\" int4 NOT NULL DEFAULT nextval('spritpreise_header_id_seq'::regclass),\
|
||||||
|
\"name\" text NOT NULL,\
|
||||||
|
\"street\" text NOT NULL,\
|
||||||
|
\"houseNumber\" text,\
|
||||||
|
\"city\" text NOT NULL,\
|
||||||
|
\"zipCode\" text,\
|
||||||
|
PRIMARY KEY (\"id\")\
|
||||||
|
);\
|
||||||
|
-- Indices\
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS spritpreise_header_pkey ON public.spritpreis_header USING btree (id)\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx ON public.spritpreis_header USING btree (city) INCLUDE (city, street) WITH\
|
||||||
|
(deduplicate_items='false')"
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor.execute(
|
||||||
|
"CREATE SEQUENCE IF NOT EXISTS spritpreis_position_id_seq;\
|
||||||
|
-- Table Definition\
|
||||||
|
CREATE TABLE IF NOT EXISTS \"public\".\"spritpreis_position\" (\
|
||||||
|
\"id\" int4 NOT NULL DEFAULT nextval('spritpreis_position_id_seq'::regclass),\
|
||||||
|
\"of_spritpreis_header\" int4 NOT NULL,\
|
||||||
|
\"time\" timestamptz NOT NULL,\
|
||||||
|
\"diesel_price\" float4,\
|
||||||
|
\"e10_price\" float4,\
|
||||||
|
\"e5_price\" float4,\
|
||||||
|
\"super_plus_price\" float4,\
|
||||||
|
CONSTRAINT \"foreign_idx\" FOREIGN KEY (\"of_spritpreis_header\") REFERENCES \"public\".\"spritpreis_header\"(\"id\") ON DELETE CASCADE,\
|
||||||
|
PRIMARY KEY (\"id\")\
|
||||||
|
);\
|
||||||
|
-- Indices\
|
||||||
|
CREATE INDEX IF NOT EXISTS \"fki_F\" ON public.spritpreis_position USING btree (of_spritpreis_header);\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_time ON public.spritpreis_position USING btree (\"time\") WITH (deduplicate_items='true');\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_diesel ON public.spritpreis_position USING btree (\"diesel_price\") INCLUDE (\"diesel_price\") WITH (deduplicate_items='true');\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_e10 ON public.spritpreis_position USING btree (\"e10_price\") WITH (deduplicate_items='true');\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_e5 ON public.spritpreis_position USING btree (\"e5_price\") WITH (deduplicate_items='true');\
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_super_plus ON public.spritpreis_position USING btree (\"super_plus_price\") WITH (deduplicate_items='true');"
|
||||||
|
)
|
||||||
|
cursor.close()
|
||||||
|
connection.commit()
|
||||||
|
connection.close()
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@@ -1,21 +1,22 @@
|
|||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
import os
|
import os
|
||||||
import mysql.connector
|
|
||||||
import bs4
|
import bs4
|
||||||
import pytz
|
import pytz
|
||||||
import requests
|
import requests
|
||||||
import psycopg2
|
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
import sched
|
import sched
|
||||||
|
|
||||||
from mysql.connector.aio.charsets import charsets
|
from mysql.connector.aio.charsets import charsets
|
||||||
|
from src.webScraper.DbEntity import DbEntity
|
||||||
|
|
||||||
|
|
||||||
class Spritpreise:
|
class Spritpreise(DbEntity):
|
||||||
def __init__(self, location:str, radius:int, fuelType:Literal["diesel", "E5", "E10", "super plus"] = "E10"):
|
def __init__(self, location:str, radius:int, fuelType:Literal["diesel", "E5", "E10", "super plus"] = "E10"):
|
||||||
self.location = location
|
self.location = location
|
||||||
self.radius = radius
|
self.radius = radius
|
||||||
@@ -28,40 +29,14 @@ class Spritpreise:
|
|||||||
"super E5": 7,
|
"super E5": 7,
|
||||||
}
|
}
|
||||||
self.fuelInfos = {}
|
self.fuelInfos = {}
|
||||||
self.currentFolder = os.path.dirname(os.path.realpath(__file__))
|
super().__init__()
|
||||||
self.dbType = ""
|
|
||||||
self.dbName = ""
|
|
||||||
self.dbHost = ""
|
|
||||||
self.dbPort = ""
|
|
||||||
self.dbUser = ""
|
|
||||||
self.dbPassword = ""
|
|
||||||
|
|
||||||
|
|
||||||
def getCurrentTime(self):
|
def getCurrentTime(self):
|
||||||
return datetime.now().strftime("%Y/%m/%d %H:%M:%S")
|
return datetime.now().strftime("%Y/%m/%d %H:%M:%S")
|
||||||
|
|
||||||
|
|
||||||
def setDbConnection(self, dbHost:str, dbPort:str, dbUser, dbPassword:str, dbType:Literal["mysql", "pgsql"]):
|
|
||||||
self.dbType = dbType
|
|
||||||
self.dbName = "Spritpreise"
|
|
||||||
self.dbHost = dbHost
|
|
||||||
self.dbPort = dbPort
|
|
||||||
self.dbUser = dbUser
|
|
||||||
self.dbPassword = dbPassword
|
|
||||||
|
|
||||||
def __getDbConnection(self):
|
|
||||||
if self.dbType == "mysql":
|
|
||||||
connection = mysql.connector.connect(host=self.dbHost, user=self.dbUser, password=self.dbPassword,
|
|
||||||
database=self.dbName, port=self.dbPort)
|
|
||||||
elif self.dbType == "pgsql":
|
|
||||||
connection = psycopg2.connect(dbname=self.dbName, user=self.dbUser, password=self.dbPassword,
|
|
||||||
host=self.dbHost, port=self.dbPort)
|
|
||||||
|
|
||||||
cursor = connection.cursor()
|
|
||||||
|
|
||||||
return connection, cursor
|
|
||||||
|
|
||||||
|
|
||||||
def __writeLog(self, text: str, printOnConsole=True):
|
def __writeLog(self, text: str, printOnConsole=True):
|
||||||
now = ""
|
now = ""
|
||||||
try:
|
try:
|
||||||
@@ -74,6 +49,7 @@ class Spritpreise:
|
|||||||
if printOnConsole:
|
if printOnConsole:
|
||||||
print(f"[{now}] {text}\n")
|
print(f"[{now}] {text}\n")
|
||||||
|
|
||||||
|
|
||||||
def convertType(self, fuelType:str):
|
def convertType(self, fuelType:str):
|
||||||
if isinstance(fuelType, int):
|
if isinstance(fuelType, int):
|
||||||
return fuelType
|
return fuelType
|
||||||
@@ -111,8 +87,14 @@ class Spritpreise:
|
|||||||
entryName = f'{location["city"]} {location["street"]}'
|
entryName = f'{location["city"]} {location["street"]}'
|
||||||
if entryName not in self.fuelInfos:
|
if entryName not in self.fuelInfos:
|
||||||
self.fuelInfos[entryName] = {"3": None, "5": None, "6": None, "7": None}
|
self.fuelInfos[entryName] = {"3": None, "5": None, "6": None, "7": None}
|
||||||
self.fuelInfos[entryName]["street"] = location["street"]
|
|
||||||
self.fuelInfos[entryName]["city"] = location["city"]
|
city = re.match("(\d{5}) (.*?)", location["city"])
|
||||||
|
address = re.match(r"(.+?)\s+(\d+[a-zA-Z]?)$", location["street"])
|
||||||
|
|
||||||
|
self.fuelInfos[entryName]["street"] = address.group(1)
|
||||||
|
self.fuelInfos[entryName]["houseNumber"] = address.group(2)
|
||||||
|
self.fuelInfos[entryName]["zipCode"] = city.group(1)
|
||||||
|
self.fuelInfos[entryName]["city"] = city.group(2)
|
||||||
self.fuelInfos[entryName]["name"] = location["name"]
|
self.fuelInfos[entryName]["name"] = location["name"]
|
||||||
self.fuelInfos[entryName]["time"] = datetime.now(pytz.timezone('Europe/Berlin')).strftime('%Y-%m-%d %H:%M:%S%z')[:-2]
|
self.fuelInfos[entryName]["time"] = datetime.now(pytz.timezone('Europe/Berlin')).strftime('%Y-%m-%d %H:%M:%S%z')[:-2]
|
||||||
self.fuelInfos[entryName][str(self.convertType(fuelType))] = price if price.replace(".", "").isnumeric() else None
|
self.fuelInfos[entryName][str(self.convertType(fuelType))] = price if price.replace(".", "").isnumeric() else None
|
||||||
@@ -121,6 +103,7 @@ class Spritpreise:
|
|||||||
self.__writeLog(f"Error occurred: {e}")
|
self.__writeLog(f"Error occurred: {e}")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
def getAllPrices(self):
|
def getAllPrices(self):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
"""
|
"""
|
||||||
@@ -163,24 +146,22 @@ class Spritpreise:
|
|||||||
time.sleep(sleepTime)
|
time.sleep(sleepTime)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def insertIntoDb(self):
|
def insertIntoDb(self):
|
||||||
connection, cursor = self.__getDbConnection()
|
connection, cursor = self._getSqlLiteConnection()
|
||||||
|
|
||||||
for key, value in self.fuelInfos.items():
|
for key, value in self.fuelInfos.items():
|
||||||
cursor.execute('SELECT id FROM spritpreis_header WHERE city = %s and street = %s', (value['city'], value["street"]))
|
|
||||||
|
cursor.execute('SELECT id FROM spritpreis_header WHERE city = ? and street = ?', (value['city'], value["street"]))
|
||||||
result = cursor.fetchone()
|
result = cursor.fetchone()
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
cursor.execute("INSERT INTO spritpreis_header (city, street, name) VALUES (%s, %s, %s)", (value['city'], value["street"], value["name"]))
|
cursor.execute("INSERT INTO spritpreis_header (city, street, name, houseNumber, zipCode) VALUES (?, ?, ?, ?, ?)", (value['city'], value["street"], value["name"], value["houseNumber"], value["zipCode"]))
|
||||||
cursor.execute('SELECT id FROM spritpreis_header WHERE city = %s and street = %s', (value['city'], value["street"]))
|
cursor.execute('SELECT id FROM spritpreis_header WHERE city = ? and street = ?', (value['city'], value["street"]))
|
||||||
result = cursor.fetchone()
|
result = cursor.fetchone()
|
||||||
print(result)
|
print(result)
|
||||||
|
|
||||||
id = result[0]
|
cursor.execute("INSERT INTO spritpreis_position (of_spritpreis_header, time, diesel_price, e10_price, e5_price, super_plus_price) VALUES (?, ?, ?, ?, ?, ?)",
|
||||||
|
(result[0], value["time"], value["3"], value["5"], value["7"], value["6"]))
|
||||||
cursor.execute("INSERT INTO spritpreis_position (of_spritpreis_header, time, diesel_price, e10_price, e5_price, super_plus_price) VALUES (%s, %s, %s, %s, %s, %s)",
|
|
||||||
(id, value["time"], value["3"], value["5"], value["7"], value["6"]))
|
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
connection.commit()
|
connection.commit()
|
||||||
@@ -189,7 +170,7 @@ class Spritpreise:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
def exportAsJson(self, outputFile):
|
def exportAsJson(self, outputFile:str):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
with open(outputFile, "w", encoding="utf-8") as f:
|
with open(outputFile, "w", encoding="utf-8") as f:
|
||||||
json.dump(self.fuelInfos, f, indent=4)
|
json.dump(self.fuelInfos, f, indent=4)
|
||||||
@@ -198,7 +179,7 @@ class Spritpreise:
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def getDictFromJson(self, inputFile):
|
def getDictFromJson(self, inputFile:str):
|
||||||
with open(inputFile, "r", encoding="utf-8") as f:
|
with open(inputFile, "r", encoding="utf-8") as f:
|
||||||
self.fuelInfos = json.load(f)
|
self.fuelInfos = json.load(f)
|
||||||
return self
|
return self
|
||||||
|
|||||||
Reference in New Issue
Block a user