import requests from bs4 import BeautifulSoup DOMODEDOVO_URL = 'http://www.domodedovo.ru/passengers/flight/live-board/' SHEREMETYEVO_URL = 'http://www.svo.aero/ru/timetable/today/#arrival' VNUKOVO_URL = 'http://www.vnukovo.ru/flights/online-timetable/' def parse_domodedovo(): flights = [] page = 0 while True: src = requests.get(DOMODEDOVO_URL + "?page={}".format(page)).text soup = BeautifulSoup(src, 'lxml') table = soup.find('table', id='table') rows = table.find_all('tr')[2:] if len(rows) == 0: break for row in rows: cols = row.find_all('td') statuses = cols[5].find_all('li') if statuses is not None: status = ', '.join([s.text.strip() for s in statuses]) else: status = None flights.append({ 'time': cols[1].text.strip(), 'flight_id': cols[3].div.a.text.strip(), 'from_city': cols[4].div.text.strip(), 'status': status }) page += 1 return flights def parse_sheremetyevo(): flights = [] src = requests.get(SHEREMETYEVO_URL).text soup = BeautifulSoup(src, 'lxml') table = soup.find('div', class_='table') rows = table.find_all('tr')[1:] for row in rows: cols = row.find_all('td') flights.append({ 'time': ' '.join((cols[0].text, cols[1].text)), 'flight_id': cols[3].a.text, 'from_city': cols[5].a.text, 'terminal': cols[6].a.text, 'status': cols[7].text }) return flights def parse_vnukovo(): flights = [] src = requests.get(VNUKOVO_URL) src.encoding = 'utf-8' soup = BeautifulSoup(src.text, 'lxml') table = soup.find('tbody') rows = table.find_all('tr')[:-1] for row in rows: cols = row.find_all('td') flights.append({ 'time': cols[0].text.strip(), 'flight_id': cols[1].a.text, 'from_city': cols[3].text, 'terminal': cols[4].text, 'status': cols[5].text }) return flights