ourloc/parse.py
2018-03-24 17:11:40 +03:00

78 lines
2.1 KiB
Python

import requests
from bs4 import BeautifulSoup
DOMODEDOVO_URL = 'http://www.domodedovo.ru/passengers/flight/live-board/'
SHEREMETYEVO_URL = 'http://www.svo.aero/ru/timetable/today/#arrival'
VNUKOVO_URL = 'http://www.vnukovo.ru/flights/online-timetable/'
def parse_domodedovo():
flights = []
page = 0
while True:
src = requests.get(DOMODEDOVO_URL + "?page={}".format(page)).text
soup = BeautifulSoup(src, 'lxml')
table = soup.find('table', id='table')
rows = table.find_all('tr')[2:]
if len(rows) == 0:
break
for row in rows:
cols = row.find_all('td')
statuses = cols[5].find_all('li')
if statuses is not None:
status = ', '.join([s.text.strip() for s in statuses])
else:
status = None
flights.append({
'time': cols[1].text.strip(),
'flight_id': cols[3].div.a.text.strip(),
'from_city': cols[4].div.text.strip(),
'status': status
})
page += 1
return flights
def parse_sheremetyevo():
flights = []
src = requests.get(SHEREMETYEVO_URL).text
soup = BeautifulSoup(src, 'lxml')
table = soup.find('div', class_='table')
rows = table.find_all('tr')[1:]
for row in rows:
cols = row.find_all('td')
flights.append({
'time': ' '.join((cols[0].text, cols[1].text)),
'flight_id': cols[3].a.text,
'from_city': cols[5].a.text,
'terminal': cols[6].a.text,
'status': cols[7].text
})
return flights
def parse_vnukovo():
flights = []
src = requests.get(VNUKOVO_URL)
src.encoding = 'utf-8'
soup = BeautifulSoup(src.text, 'lxml')
table = soup.find('tbody')
rows = table.find_all('tr')[:-1]
for row in rows:
cols = row.find_all('td')
flights.append({
'time': cols[0].text.strip(),
'flight_id': cols[1].a.text,
'from_city': cols[3].text,
'terminal': cols[4].text,
'status': cols[5].text
})
return flights