Initial upload

This commit is contained in:
himidori 2018-03-24 17:11:40 +03:00
parent c2af138b45
commit 1d5e172181
4 changed files with 191 additions and 0 deletions

35
db.py Normal file
View File

@ -0,0 +1,35 @@
import mysql.connector
import log
class DbHandler:
def __init__(self, host, user, pswd, db_name):
self.host = host
self.user = user
self.pswd = pswd
self.db_name = db_name
self.conn_open()
def conn_open(self):
try:
self.conn = mysql.connector.connect(host=self.host,
user=self.user,
passwd=self.pswd,
db=self.db_name)
self.cur = self.conn.cursor()
except Exception as e:
log.log(log.LOG_LEVEL_ERROR, "connection error: {}", str(e))
quit()
def select(self, query_text, *args):
self.cur.execute(query_text, *args)
return self.cur.fetchall()
def query(self, query_text, *args):
self.cur.execute(query_text, *args)
self.conn.commit()
def close(self):
self.cur.close()
self.conn.close()

27
log.py Normal file
View File

@ -0,0 +1,27 @@
import datetime
LOG_LEVEL_INFO = 0
LOG_LEVEL_ERROR = 1
def log(log_level, format_string, *args, out_file=None):
if log_level == LOG_LEVEL_INFO:
prefix = '[info] '
else:
prefix = '[error] '
now = datetime.datetime.now()
date = '[%d-%02d-%02d %02d:%02d]' % (
now.year,
now.month,
now.day,
now.hour,
now.minute
)
output = ''.join((date, prefix, format_string.format(*args)))
print(output)
if out_file is not None:
with open(out_file, 'a') as f:
f.write(output + '\n')

52
main.py Normal file
View File

@ -0,0 +1,52 @@
from parse import *
from db import DbHandler
import log
logfile = 'log.txt'
db = DbHandler('77.73.65.40', 'root', 'lfrjnf1961', 'ourloc')
log.log(log.LOG_LEVEL_INFO, 'succesfully connected to the database')
df = parse_domodedovo()
log.log(log.LOG_LEVEL_INFO, "parsed {} flights for domodedovo", len(df), out_file=logfile)
sf = parse_sheremetyevo()
log.log(log.LOG_LEVEL_INFO, "parsed {} flights for sheremetyevo", len(sf), out_file=logfile)
vf = parse_vnukovo()
log.log(log.LOG_LEVEL_INFO, "parsed {} flights for vnukovo", len(vf), out_file=logfile)
query = '''\
INSERT INTO schedule(airport_id, flight_num, direction, planned_time,\
status, descr, rank) values (%s, %s, %s, %s, %s, %s, %s)'''
db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=1 AND oldnew=1')
for flight in df:
idx = 0
db.query(query, (1, flight['flight_id'],
flight['from_city'], flight['time'],
flight['status'], "", idx))
idx += 1
db.query('DELETE FROM schedule WHERE airport_id=1 and oldnew=0')
log.log(log.LOG_LEVEL_INFO, "inserted domodedovo flights", out_file=logfile)
db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=2 AND oldnew=1')
for flight in sf:
idx = 0
db.query(query, (2, flight['flight_id'],
flight['from_city'], flight['time'],
flight['status'], flight['terminal'], idx))
idx += 1
db.query('DELETE FROM schedule WHERE airport_id=2 and oldnew=0')
log.log(log.LOG_LEVEL_INFO, "inserted sheremetyevo flights", out_file=logfile)
db.query('UPDATE schedule SET oldnew=0 WHERE airport_id=3 AND oldnew=1')
for flight in vf:
idx = 0
db.query(query, (3, flight['flight_id'],
flight['from_city'], flight['time'],
flight['status'], flight['terminal'], idx))
idx += 1
db.query('DELETE FROM schedule WHERE airport_id=3 and oldnew=0')
log.log(log.LOG_LEVEL_INFO, "inserted vnukovo flights", out_file=logfile)
db.close()

77
parse.py Normal file
View File

@ -0,0 +1,77 @@
import requests
from bs4 import BeautifulSoup
DOMODEDOVO_URL = 'http://www.domodedovo.ru/passengers/flight/live-board/'
SHEREMETYEVO_URL = 'http://www.svo.aero/ru/timetable/today/#arrival'
VNUKOVO_URL = 'http://www.vnukovo.ru/flights/online-timetable/'
def parse_domodedovo():
flights = []
page = 0
while True:
src = requests.get(DOMODEDOVO_URL + "?page={}".format(page)).text
soup = BeautifulSoup(src, 'lxml')
table = soup.find('table', id='table')
rows = table.find_all('tr')[2:]
if len(rows) == 0:
break
for row in rows:
cols = row.find_all('td')
statuses = cols[5].find_all('li')
if statuses is not None:
status = ', '.join([s.text.strip() for s in statuses])
else:
status = None
flights.append({
'time': cols[1].text.strip(),
'flight_id': cols[3].div.a.text.strip(),
'from_city': cols[4].div.text.strip(),
'status': status
})
page += 1
return flights
def parse_sheremetyevo():
flights = []
src = requests.get(SHEREMETYEVO_URL).text
soup = BeautifulSoup(src, 'lxml')
table = soup.find('div', class_='table')
rows = table.find_all('tr')[1:]
for row in rows:
cols = row.find_all('td')
flights.append({
'time': ' '.join((cols[0].text, cols[1].text)),
'flight_id': cols[3].a.text,
'from_city': cols[5].a.text,
'terminal': cols[6].a.text,
'status': cols[7].text
})
return flights
def parse_vnukovo():
flights = []
src = requests.get(VNUKOVO_URL)
src.encoding = 'utf-8'
soup = BeautifulSoup(src.text, 'lxml')
table = soup.find('tbody')
rows = table.find_all('tr')[:-1]
for row in rows:
cols = row.find_all('td')
flights.append({
'time': cols[0].text.strip(),
'flight_id': cols[1].a.text,
'from_city': cols[3].text,
'terminal': cols[4].text,
'status': cols[5].text
})
return flights