diff --git a/db.py b/db.py new file mode 100644 index 0000000..eaf8737 --- /dev/null +++ b/db.py @@ -0,0 +1,35 @@ +import mysql.connector +import log + + +class DbHandler: + + def __init__(self, host, user, pswd, db_name): + self.host = host + self.user = user + self.pswd = pswd + self.db_name = db_name + self.conn_open() + + def conn_open(self): + try: + self.conn = mysql.connector.connect(host=self.host, + user=self.user, + passwd=self.pswd, + db=self.db_name) + self.cur = self.conn.cursor() + except Exception as e: + log.log(log.LOG_LEVEL_ERROR, "connection error: {}", str(e)) + quit() + + def select(self, query_text, *args): + self.cur.execute(query_text, *args) + return self.cur.fetchall() + + def query(self, query_text, *args): + self.cur.execute(query_text, *args) + self.conn.commit() + + def close(self): + self.cur.close() + self.conn.close() diff --git a/log.py b/log.py new file mode 100644 index 0000000..a6cc1cb --- /dev/null +++ b/log.py @@ -0,0 +1,27 @@ +import datetime + +LOG_LEVEL_INFO = 0 +LOG_LEVEL_ERROR = 1 + + +def log(log_level, format_string, *args, out_file=None): + if log_level == LOG_LEVEL_INFO: + prefix = '[info] ' + else: + prefix = '[error] ' + + now = datetime.datetime.now() + date = '[%d-%02d-%02d %02d:%02d]' % ( + now.year, + now.month, + now.day, + now.hour, + now.minute + ) + + output = ''.join((date, prefix, format_string.format(*args))) + print(output) + + if out_file is not None: + with open(out_file, 'a') as f: + f.write(output + '\n') diff --git a/main.py b/main.py new file mode 100644 index 0000000..53e2881 --- /dev/null +++ b/main.py @@ -0,0 +1,52 @@ +from parse import * +from db import DbHandler +import log + +logfile = 'log.txt' + +db = DbHandler('77.73.65.40', 'root', 'lfrjnf1961', 'ourloc') +log.log(log.LOG_LEVEL_INFO, 'succesfully connected to the database') + +df = parse_domodedovo() +log.log(log.LOG_LEVEL_INFO, "parsed {} flights for domodedovo", len(df), out_file=logfile) +sf = parse_sheremetyevo() +log.log(log.LOG_LEVEL_INFO, "parsed {} flights for sheremetyevo", len(sf), out_file=logfile) +vf = parse_vnukovo() +log.log(log.LOG_LEVEL_INFO, "parsed {} flights for vnukovo", len(vf), out_file=logfile) + + +query = '''\ +INSERT INTO schedule(airport_id, flight_num, direction, planned_time,\ +status, descr, rank) values (%s, %s, %s, %s, %s, %s, %s)''' + +db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=1 AND oldnew=1') +for flight in df: + idx = 0 + db.query(query, (1, flight['flight_id'], + flight['from_city'], flight['time'], + flight['status'], "", idx)) + idx += 1 +db.query('DELETE FROM schedule WHERE airport_id=1 and oldnew=0') +log.log(log.LOG_LEVEL_INFO, "inserted domodedovo flights", out_file=logfile) + +db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=2 AND oldnew=1') +for flight in sf: + idx = 0 + db.query(query, (2, flight['flight_id'], + flight['from_city'], flight['time'], + flight['status'], flight['terminal'], idx)) + idx += 1 +db.query('DELETE FROM schedule WHERE airport_id=2 and oldnew=0') +log.log(log.LOG_LEVEL_INFO, "inserted sheremetyevo flights", out_file=logfile) + +db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=3 AND oldnew=1') +for flight in vf: + idx = 0 + db.query(query, (3, flight['flight_id'], + flight['from_city'], flight['time'], + flight['status'], flight['terminal'], idx)) + idx += 1 +db.query('DELETE FROM schedule WHERE airport_id=3 and oldnew=0') +log.log(log.LOG_LEVEL_INFO, "inserted vnukovo flights", out_file=logfile) + +db.close() diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..ac83c97 --- /dev/null +++ b/parse.py @@ -0,0 +1,77 @@ +import requests +from bs4 import BeautifulSoup + +DOMODEDOVO_URL = 'http://www.domodedovo.ru/passengers/flight/live-board/' +SHEREMETYEVO_URL = 'http://www.svo.aero/ru/timetable/today/#arrival' +VNUKOVO_URL = 'http://www.vnukovo.ru/flights/online-timetable/' + + +def parse_domodedovo(): + flights = [] + page = 0 + + while True: + src = requests.get(DOMODEDOVO_URL + "?page={}".format(page)).text + soup = BeautifulSoup(src, 'lxml') + table = soup.find('table', id='table') + rows = table.find_all('tr')[2:] + if len(rows) == 0: + break + + for row in rows: + cols = row.find_all('td') + statuses = cols[5].find_all('li') + if statuses is not None: + status = ', '.join([s.text.strip() for s in statuses]) + else: + status = None + flights.append({ + 'time': cols[1].text.strip(), + 'flight_id': cols[3].div.a.text.strip(), + 'from_city': cols[4].div.text.strip(), + 'status': status + }) + page += 1 + + return flights + + +def parse_sheremetyevo(): + flights = [] + + src = requests.get(SHEREMETYEVO_URL).text + soup = BeautifulSoup(src, 'lxml') + table = soup.find('div', class_='table') + rows = table.find_all('tr')[1:] + for row in rows: + cols = row.find_all('td') + flights.append({ + 'time': ' '.join((cols[0].text, cols[1].text)), + 'flight_id': cols[3].a.text, + 'from_city': cols[5].a.text, + 'terminal': cols[6].a.text, + 'status': cols[7].text + }) + + return flights + + +def parse_vnukovo(): + flights = [] + + src = requests.get(VNUKOVO_URL) + src.encoding = 'utf-8' + soup = BeautifulSoup(src.text, 'lxml') + table = soup.find('tbody') + rows = table.find_all('tr')[:-1] + for row in rows: + cols = row.find_all('td') + flights.append({ + 'time': cols[0].text.strip(), + 'flight_id': cols[1].a.text, + 'from_city': cols[3].text, + 'terminal': cols[4].text, + 'status': cols[5].text + }) + + return flights