diff --git a/main.py b/main.py index c833cd7..1f440cd 100644 --- a/main.py +++ b/main.py @@ -4,4 +4,5 @@ from src.db import * if __name__ == "__main__": sprit = Spritpreise(location="Linnich", radius=30) sprit.setDbConnection(dbHost, dbPort, dbUser, dbPassword, dbType) - #sprit.getAllPricesSchedule() \ No newline at end of file + sprit.createDb() + sprit.getAllPrices() \ No newline at end of file diff --git a/src/webScraper/DbEntity.py b/src/webScraper/DbEntity.py new file mode 100644 index 0000000..9c6e630 --- /dev/null +++ b/src/webScraper/DbEntity.py @@ -0,0 +1,146 @@ +import os + +import psycopg2 +import mysql.connector +import sqlite3 +from typing import Literal + + +class DbEntity: + def __init__(self): + self.currentFolder = os.path.dirname(os.path.realpath(__file__)) + self.dbType = "" + self.dbName = "" + self.dbHost = "" + self.dbPort = "" + self.dbUser = "" + self.dbPassword = "" + + + def setDbConnection(self, dbHost:str, dbPort:str, dbUser, dbPassword:str, dbType:Literal["mysql", "pgsql"]): + self.dbType = dbType + self.dbName = "Spritpreise" + self.dbHost = dbHost + self.dbPort = dbPort + self.dbUser = dbUser + self.dbPassword = dbPassword + + + def _getDbConnection(self): + if self.dbType == "mysql": + connection = mysql.connector.connect(host=self.dbHost, user=self.dbUser, password=self.dbPassword, + database=self.dbName, port=self.dbPort) + elif self.dbType == "pgsql": + connection = psycopg2.connect(dbname=self.dbName, user=self.dbUser, password=self.dbPassword, + host=self.dbHost, port=self.dbPort) + else: + raise Exception("Wrong DB type") + + cursor = connection.cursor() + return connection, cursor + + + def _getSqlLiteConnection(self): + connection = sqlite3.connect(os.path.join(self.currentFolder, "Spritpreise.db")) + cursor = connection.cursor() + return connection, cursor + + + def createDb(self): + try: + if not os.path.isfile(os.path.join(self.currentFolder, "Spritpreise.db")): + print("Creating sqllite database") + # with open(os.path.join(self.currentFolder, "Spritpreise.db"), "w"): pass + + connection, cursor = self.__getSqlLiteConnection() + + cursor.execute( + "CREATE TABLE IF NOT EXISTS \"spritpreis_header\" (\ + \"id\" INTEGER PRIMARY KEY AUTOINCREMENT,\ + \"name\" TEXT NOT NULL,\ + \"street\" TEXT NOT NULL,\ + \"houseNumber\" TEXT NOT NULL,\ + \"city\" TEXT NOT NULL,\ + \"zipCode\" TEXT NOT NULL\ + );\ + -- Indices\ + CREATE UNIQUE INDEX IF NOT EXISTS \"spritpreise_header_pkey\" ON \"spritpreis_header\" (\"id\");\ + CREATE INDEX IF NOT EXISTS \"idx\" ON \"spritpreis_header\" (\"city\");" + ) + print("created spritpreis_header for sqlite") + + cursor.execute( + "CREATE TABLE IF NOT EXISTS \"spritpreis_position\" (\ + \"id\" INTEGER PRIMARY KEY AUTOINCREMENT,\ + \"of_spritpreis_header\" INTEGER NOT NULL,\ + \"time\" TEXT NOT NULL,\ + \"diesel_price\" REAL,\ + \"e10_price\" REAL,\ + \"e5_price\" REAL,\ + \"super_plus_price\" REAL,\ + CONSTRAINT \"foreign_idx\" FOREIGN KEY (\"of_spritpreis_header\") REFERENCES \"spritpreis_header\"(\"id\") ON DELETE CASCADE\ + );\ + \ + -- Indices\ + CREATE INDEX IF NOT EXISTS \"fki_F\" ON \"spritpreis_position\" (\"of_spritpreis_header\");\ + -- CREATE INDEX IF NOT EXISTS \"idx_time\" ON \"spritpreis_position\" (\"time\");\ + -- CREATE INDEX IF NOT EXISTS \"idx_diesel\" ON \"spritpreis_position\" (\"diesel_price\");\ + -- CREATE INDEX IF NOT EXISTS \"idx_e10\" ON \"spritpreis_position\" (\"e10_price\");\ + -- CREATE INDEX IF NOT EXISTS \"idx_e5\" ON \"spritpreis_position\" (\"e5_price\");\ + -- CREATE INDEX IF NOT EXISTS \"idx_super_plus\" ON \"spritpreis_position\" (\"super_plus_price\");" + ) + print("created spritpreis_position for sqlite") + + connection.commit() + cursor.close() + connection.close() + + connection, cursor = self.__getDbConnection() + + cursor.execute( + "CREATE SEQUENCE IF NOT EXISTS spritpreise_header_id_seq;\ + -- Table Definition\ + CREATE TABLE IF NOT EXISTS \"public\".\"spritpreis_header\" (\ + \"id\" int4 NOT NULL DEFAULT nextval('spritpreise_header_id_seq'::regclass),\ + \"name\" text NOT NULL,\ + \"street\" text NOT NULL,\ + \"houseNumber\" text,\ + \"city\" text NOT NULL,\ + \"zipCode\" text,\ + PRIMARY KEY (\"id\")\ + );\ + -- Indices\ + CREATE UNIQUE INDEX IF NOT EXISTS spritpreise_header_pkey ON public.spritpreis_header USING btree (id)\ + CREATE INDEX IF NOT EXISTS idx ON public.spritpreis_header USING btree (city) INCLUDE (city, street) WITH\ + (deduplicate_items='false')" + ) + + cursor.execute( + "CREATE SEQUENCE IF NOT EXISTS spritpreis_position_id_seq;\ + -- Table Definition\ + CREATE TABLE IF NOT EXISTS \"public\".\"spritpreis_position\" (\ + \"id\" int4 NOT NULL DEFAULT nextval('spritpreis_position_id_seq'::regclass),\ + \"of_spritpreis_header\" int4 NOT NULL,\ + \"time\" timestamptz NOT NULL,\ + \"diesel_price\" float4,\ + \"e10_price\" float4,\ + \"e5_price\" float4,\ + \"super_plus_price\" float4,\ + CONSTRAINT \"foreign_idx\" FOREIGN KEY (\"of_spritpreis_header\") REFERENCES \"public\".\"spritpreis_header\"(\"id\") ON DELETE CASCADE,\ + PRIMARY KEY (\"id\")\ + );\ + -- Indices\ + CREATE INDEX IF NOT EXISTS \"fki_F\" ON public.spritpreis_position USING btree (of_spritpreis_header);\ + CREATE INDEX IF NOT EXISTS idx_time ON public.spritpreis_position USING btree (\"time\") WITH (deduplicate_items='true');\ + CREATE INDEX IF NOT EXISTS idx_diesel ON public.spritpreis_position USING btree (\"diesel_price\") INCLUDE (\"diesel_price\") WITH (deduplicate_items='true');\ + CREATE INDEX IF NOT EXISTS idx_e10 ON public.spritpreis_position USING btree (\"e10_price\") WITH (deduplicate_items='true');\ + CREATE INDEX IF NOT EXISTS idx_e5 ON public.spritpreis_position USING btree (\"e5_price\") WITH (deduplicate_items='true');\ + CREATE INDEX IF NOT EXISTS idx_super_plus ON public.spritpreis_position USING btree (\"super_plus_price\") WITH (deduplicate_items='true');" + ) + cursor.close() + connection.commit() + connection.close() + except Exception as e: + pass + + diff --git a/src/webScraper/Spritpreise.py b/src/webScraper/Spritpreise.py index 11caf7b..22dd15c 100644 --- a/src/webScraper/Spritpreise.py +++ b/src/webScraper/Spritpreise.py @@ -1,21 +1,22 @@ import json +import re from datetime import datetime, timezone, timedelta from pprint import pprint from typing import Literal import os -import mysql.connector import bs4 import pytz import requests -import psycopg2 + import time import threading import sched from mysql.connector.aio.charsets import charsets +from src.webScraper.DbEntity import DbEntity -class Spritpreise: +class Spritpreise(DbEntity): def __init__(self, location:str, radius:int, fuelType:Literal["diesel", "E5", "E10", "super plus"] = "E10"): self.location = location self.radius = radius @@ -28,40 +29,14 @@ class Spritpreise: "super E5": 7, } self.fuelInfos = {} - self.currentFolder = os.path.dirname(os.path.realpath(__file__)) - self.dbType = "" - self.dbName = "" - self.dbHost = "" - self.dbPort = "" - self.dbUser = "" - self.dbPassword = "" + super().__init__() + def getCurrentTime(self): return datetime.now().strftime("%Y/%m/%d %H:%M:%S") - def setDbConnection(self, dbHost:str, dbPort:str, dbUser, dbPassword:str, dbType:Literal["mysql", "pgsql"]): - self.dbType = dbType - self.dbName = "Spritpreise" - self.dbHost = dbHost - self.dbPort = dbPort - self.dbUser = dbUser - self.dbPassword = dbPassword - - def __getDbConnection(self): - if self.dbType == "mysql": - connection = mysql.connector.connect(host=self.dbHost, user=self.dbUser, password=self.dbPassword, - database=self.dbName, port=self.dbPort) - elif self.dbType == "pgsql": - connection = psycopg2.connect(dbname=self.dbName, user=self.dbUser, password=self.dbPassword, - host=self.dbHost, port=self.dbPort) - - cursor = connection.cursor() - - return connection, cursor - - def __writeLog(self, text: str, printOnConsole=True): now = "" try: @@ -74,6 +49,7 @@ class Spritpreise: if printOnConsole: print(f"[{now}] {text}\n") + def convertType(self, fuelType:str): if isinstance(fuelType, int): return fuelType @@ -111,8 +87,14 @@ class Spritpreise: entryName = f'{location["city"]} {location["street"]}' if entryName not in self.fuelInfos: self.fuelInfos[entryName] = {"3": None, "5": None, "6": None, "7": None} - self.fuelInfos[entryName]["street"] = location["street"] - self.fuelInfos[entryName]["city"] = location["city"] + + city = re.match("(\d{5}) (.*?)", location["city"]) + address = re.match(r"(.+?)\s+(\d+[a-zA-Z]?)$", location["street"]) + + self.fuelInfos[entryName]["street"] = address.group(1) + self.fuelInfos[entryName]["houseNumber"] = address.group(2) + self.fuelInfos[entryName]["zipCode"] = city.group(1) + self.fuelInfos[entryName]["city"] = city.group(2) self.fuelInfos[entryName]["name"] = location["name"] self.fuelInfos[entryName]["time"] = datetime.now(pytz.timezone('Europe/Berlin')).strftime('%Y-%m-%d %H:%M:%S%z')[:-2] self.fuelInfos[entryName][str(self.convertType(fuelType))] = price if price.replace(".", "").isnumeric() else None @@ -121,6 +103,7 @@ class Spritpreise: self.__writeLog(f"Error occurred: {e}") time.sleep(5) + def getAllPrices(self): start = time.time() """ @@ -163,24 +146,22 @@ class Spritpreise: time.sleep(sleepTime) - def insertIntoDb(self): - connection, cursor = self.__getDbConnection() + connection, cursor = self._getSqlLiteConnection() for key, value in self.fuelInfos.items(): - cursor.execute('SELECT id FROM spritpreis_header WHERE city = %s and street = %s', (value['city'], value["street"])) + + cursor.execute('SELECT id FROM spritpreis_header WHERE city = ? and street = ?', (value['city'], value["street"])) result = cursor.fetchone() if result is None: - cursor.execute("INSERT INTO spritpreis_header (city, street, name) VALUES (%s, %s, %s)", (value['city'], value["street"], value["name"])) - cursor.execute('SELECT id FROM spritpreis_header WHERE city = %s and street = %s', (value['city'], value["street"])) + cursor.execute("INSERT INTO spritpreis_header (city, street, name, houseNumber, zipCode) VALUES (?, ?, ?, ?, ?)", (value['city'], value["street"], value["name"], value["houseNumber"], value["zipCode"])) + cursor.execute('SELECT id FROM spritpreis_header WHERE city = ? and street = ?', (value['city'], value["street"])) result = cursor.fetchone() print(result) - id = result[0] - - cursor.execute("INSERT INTO spritpreis_position (of_spritpreis_header, time, diesel_price, e10_price, e5_price, super_plus_price) VALUES (%s, %s, %s, %s, %s, %s)", - (id, value["time"], value["3"], value["5"], value["7"], value["6"])) + cursor.execute("INSERT INTO spritpreis_position (of_spritpreis_header, time, diesel_price, e10_price, e5_price, super_plus_price) VALUES (?, ?, ?, ?, ?, ?)", + (result[0], value["time"], value["3"], value["5"], value["7"], value["6"])) cursor.close() connection.commit() @@ -189,7 +170,7 @@ class Spritpreise: return self - def exportAsJson(self, outputFile): + def exportAsJson(self, outputFile:str): start = time.time() with open(outputFile, "w", encoding="utf-8") as f: json.dump(self.fuelInfos, f, indent=4) @@ -198,7 +179,7 @@ class Spritpreise: return self - def getDictFromJson(self, inputFile): + def getDictFromJson(self, inputFile:str): with open(inputFile, "r", encoding="utf-8") as f: self.fuelInfos = json.load(f) return self