2019-05-24 19:42:52 +02:00
|
|
|
#!/usr/bin/env python3
|
2019-05-15 20:21:12 +02:00
|
|
|
import requests
|
|
|
|
import json
|
2019-06-22 01:36:35 +02:00
|
|
|
from typing import List, Dict
|
|
|
|
from io import BytesIO
|
2019-05-18 22:54:37 +02:00
|
|
|
# from pprint import pprint, pformat
|
2019-05-15 21:31:11 +02:00
|
|
|
import datetime
|
|
|
|
import pause
|
2019-05-15 21:42:37 +02:00
|
|
|
import sys
|
2019-05-16 14:11:29 +02:00
|
|
|
import mysql.connector
|
2019-05-18 22:54:37 +02:00
|
|
|
import configparser
|
2019-05-19 18:00:07 +02:00
|
|
|
import argparse
|
2019-06-22 01:36:35 +02:00
|
|
|
from cachecontrol import CacheControl
|
|
|
|
from cachecontrol.caches.file_cache import FileCache
|
2019-05-16 14:11:29 +02:00
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
VRR_TABLE = """
|
2019-05-16 14:11:29 +02:00
|
|
|
CREATE TABLE IF NOT EXISTS vrr (
|
|
|
|
line_code varchar(9) not null,
|
|
|
|
direction_code varchar(1) not null,
|
|
|
|
station_id int not null,
|
|
|
|
orig_datetime datetime not null,
|
|
|
|
status enum('on_time', 'early', 'late', 'cancelled', 'no_data') not null,
|
2019-05-18 22:54:37 +02:00
|
|
|
delay_value int,
|
2019-05-16 14:11:29 +02:00
|
|
|
primary key (line_code, direction_code, station_id, orig_datetime));
|
|
|
|
"""
|
2019-06-22 01:36:35 +02:00
|
|
|
VRR_LINES_TABLE = """
|
|
|
|
CREATE TABLE IF NOT EXISTS vrr_lines (
|
|
|
|
line_code varchar(9) not null primary key,
|
|
|
|
line_number varchar(6) not null);
|
|
|
|
"""
|
2019-05-16 14:11:29 +02:00
|
|
|
|
2019-05-15 20:21:12 +02:00
|
|
|
|
|
|
|
class MOT:
|
|
|
|
LONG_DISTANCE_TRAIN = 0
|
|
|
|
REGIONAL_TRAIN = 1
|
|
|
|
COMMUTER_TRAIN = 2
|
|
|
|
UNDERGROUND_TRAIN = 3
|
|
|
|
TRAM = 4
|
|
|
|
BUS = 15
|
|
|
|
ELEVATED_TRAIN = 6
|
|
|
|
ALL_MODES = [LONG_DISTANCE_TRAIN, REGIONAL_TRAIN, COMMUTER_TRAIN, UNDERGROUND_TRAIN, TRAM, BUS, ELEVATED_TRAIN]
|
|
|
|
|
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
class Line:
|
|
|
|
line_code = str
|
|
|
|
line_name = str
|
|
|
|
|
|
|
|
def __init__(self, code, name):
|
|
|
|
self.line_code = code
|
|
|
|
self.line_name = name
|
2019-05-24 19:41:09 +02:00
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
|
|
|
|
last_reply = None
|
2019-05-19 18:00:07 +02:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("-s", "--setup", help="Run the setup routine.", action="store_true", dest="setup")
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.setup:
|
|
|
|
import setup
|
|
|
|
setup.setup()
|
|
|
|
sys.exit(0)
|
|
|
|
|
2019-05-18 22:54:37 +02:00
|
|
|
# Parse the configuration file:
|
|
|
|
|
|
|
|
cfg = configparser.ConfigParser()
|
2019-05-19 18:00:07 +02:00
|
|
|
try:
|
|
|
|
with open('vrr.ini') as f:
|
|
|
|
cfg.read_file(f)
|
|
|
|
except IOError:
|
|
|
|
sys.exit("Could not open the configuration file.")
|
|
|
|
|
|
|
|
try:
|
|
|
|
db_config = {
|
|
|
|
'user': cfg['db']['user'],
|
|
|
|
'password': cfg['db']['pass'],
|
|
|
|
'host': cfg['db']['host'],
|
|
|
|
'database': cfg['db']['database'],
|
|
|
|
}
|
|
|
|
|
|
|
|
USE_MODES = []
|
|
|
|
if cfg['crawl'].getboolean('use_long_distance'):
|
|
|
|
USE_MODES.append(MOT.LONG_DISTANCE_TRAIN)
|
|
|
|
if cfg['crawl'].getboolean('use_regional_trains'):
|
|
|
|
USE_MODES.append(MOT.REGIONAL_TRAIN)
|
|
|
|
if cfg['crawl'].getboolean('use_commuter_trains'):
|
|
|
|
USE_MODES.append(MOT.COMMUTER_TRAIN)
|
|
|
|
if cfg['crawl'].getboolean('use_trams'):
|
|
|
|
USE_MODES.append(MOT.TRAM)
|
|
|
|
if cfg['crawl'].getboolean('use_buses'):
|
|
|
|
USE_MODES.append(MOT.BUS)
|
|
|
|
if cfg['crawl'].getboolean('use_elevated_trains'):
|
|
|
|
USE_MODES.append(MOT.ELEVATED_TRAIN)
|
|
|
|
|
|
|
|
if cfg['crawl']['station_id'] is not None:
|
|
|
|
USE_STATION_ID = cfg['crawl'].getint('station_id')
|
|
|
|
else:
|
|
|
|
sys.exit("Please specify a station_id in the [crawl] section of vrr.ini")
|
|
|
|
USE_LINES = cfg['crawl']['use_lines'].split(',')
|
|
|
|
except (IndexError, configparser.NoOptionError, configparser.NoSectionError):
|
|
|
|
sys.exit("There is something wrong with the configuration file. Exiting.")
|
2019-05-18 22:54:37 +02:00
|
|
|
|
2019-05-15 20:21:12 +02:00
|
|
|
ALL_LINES = []
|
|
|
|
TRIP_CANCELLED = -9999
|
|
|
|
|
2019-05-19 18:00:07 +02:00
|
|
|
# Initialize Requests session
|
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
HTTP = CacheControl(requests.session(), cache=FileCache('.cache'))
|
2019-05-19 18:00:07 +02:00
|
|
|
|
2019-05-15 20:21:12 +02:00
|
|
|
|
2019-05-16 14:11:29 +02:00
|
|
|
def make_request_data(station_id: int, result_count: int = 8, modes: List = MOT.ALL_MODES,
|
|
|
|
lines: List[str] = ALL_LINES) -> dict:
|
2019-05-15 20:21:12 +02:00
|
|
|
"""
|
|
|
|
Prepare a request data dictionary to put into get_data()
|
|
|
|
:param station_id: an EFA station ID
|
|
|
|
:param result_count: how many departures to return
|
|
|
|
:param modes: which modes of transport to use
|
|
|
|
:param lines: which lines to use (line identifiers look like 'provider:line ID: :direction ID',
|
|
|
|
e.g. 'rbg:70070: :H' for the Rheinbahn U70 to Düsseldorf Hbf.
|
|
|
|
:return: a dictionary with the data necessary to make a request to the Abfahrtsmonitor API.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
The request data dictionary can have the following items:
|
|
|
|
|
|
|
|
stationID: a numerical EFA station ID
|
|
|
|
stationName: (optional) the station's name
|
|
|
|
platformVisibility: (optional) ???
|
|
|
|
transport: a comma-separated list of the modes of transport to be displayed. See the constants for values.
|
|
|
|
useAllLines: display all available lines or filter them using the linesFilter
|
|
|
|
linesFilter: a JSON array with the lines to be displayed. See lines_filter for the format
|
|
|
|
optimizedForStation: (optional) ???
|
|
|
|
rowCount: the amount of results to be returned
|
|
|
|
refreshInterval: (optional) (display parameter) refresh rate in seconds for the browser UI
|
|
|
|
distance: (optional) (display parameter) distance from the monitor to the stop
|
|
|
|
marquee: (optional) (display parameter) make the path text scroll sideways
|
|
|
|
sortBy: (optional) ???
|
|
|
|
"""
|
|
|
|
request_data = {
|
|
|
|
'stationId': int(station_id),
|
|
|
|
'rowCount': result_count
|
|
|
|
}
|
|
|
|
|
|
|
|
# sanity check: do the modes exist?
|
|
|
|
for mode in modes:
|
|
|
|
if mode not in MOT.ALL_MODES:
|
|
|
|
raise ValueError(str(mode) + "Unknown transport mode!")
|
|
|
|
|
|
|
|
# Add the list to the data dictionary
|
|
|
|
request_data['transport'] = ','.join("{0}".format(n) for n in modes).rstrip(',')
|
|
|
|
|
|
|
|
if lines is ALL_LINES:
|
|
|
|
request_data['useAllLines'] = 1
|
|
|
|
else:
|
2019-05-16 14:11:29 +02:00
|
|
|
lines_dictarr = [{'data': v.replace(' ', '+')} for v in lines]
|
2019-05-15 20:21:12 +02:00
|
|
|
request_data['linesFilter'] = json.dumps(lines_dictarr)
|
|
|
|
request_data['useAllLines'] = 0
|
|
|
|
|
|
|
|
# finally, add the HTML naming
|
|
|
|
request_data = {"table[departure][{0}]".format(k): v for k, v in request_data.items()}
|
|
|
|
return request_data
|
|
|
|
|
|
|
|
|
|
|
|
def get_data(request_data: dict, headers: dict = None, cookies: dict = None) -> dict:
|
|
|
|
url = 'https://abfahrtsmonitor.vrr.de/backend/api/stations/table'
|
2019-05-19 18:00:07 +02:00
|
|
|
reply = HTTP.post(url, data=request_data, headers=headers, cookies=cookies)
|
2019-05-24 19:41:09 +02:00
|
|
|
last_reply = reply
|
2019-05-15 20:21:12 +02:00
|
|
|
reply.raise_for_status()
|
2019-05-15 21:42:37 +02:00
|
|
|
print('Request time elapsed: ' + str(reply.elapsed), file=sys.stderr)
|
2019-05-15 20:21:12 +02:00
|
|
|
return reply.json()
|
|
|
|
|
|
|
|
|
|
|
|
def is_cancelled(trip: dict) -> bool:
|
|
|
|
if trip['delay'] is not None:
|
|
|
|
return int(trip['delay']) == TRIP_CANCELLED
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def is_late(trip: dict) -> bool:
|
|
|
|
if trip['delay'] is not None:
|
|
|
|
return int(trip['delay']) > 0
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def is_early(trip: dict) -> bool:
|
|
|
|
if trip['delay'] is not None:
|
|
|
|
return int(trip['delay']) < 0 and int(trip['delay']) != TRIP_CANCELLED
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2019-05-16 14:11:29 +02:00
|
|
|
def is_on_time(trip: dict) -> bool:
|
|
|
|
return int(trip['delay']) == 0
|
|
|
|
|
|
|
|
|
|
|
|
def has_realtime(trip: dict) -> bool:
|
|
|
|
return trip['delay'] != ''
|
2019-05-15 20:21:12 +02:00
|
|
|
|
|
|
|
|
|
|
|
def fixup_data(d: dict) -> dict:
|
|
|
|
for trip in d['departureData']:
|
|
|
|
if trip['delay'] == '':
|
|
|
|
trip['delay'] = None
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
def make_linecode_table(data: dict) -> Dict[str, str]:
|
|
|
|
codes = {}
|
|
|
|
|
|
|
|
for trip in data['departureData']:
|
|
|
|
if trip['lineCode'] in codes:
|
|
|
|
if trip['lineNumber'] != codes[trip['lineCode']]:
|
|
|
|
print('lineCode {} already present as {}! Replacing it with {}.'.format(
|
|
|
|
trip['lineCode'], codes[trip['lineCode']], trip['lineNumber'])
|
|
|
|
)
|
|
|
|
codes[trip['lineCode']] = trip['lineNumber']
|
|
|
|
return codes
|
|
|
|
|
|
|
|
|
2019-05-19 21:33:00 +02:00
|
|
|
def print_trip(trip: dict, full_text: bool = False) -> None:
|
|
|
|
if full_text:
|
|
|
|
fmt = {
|
|
|
|
"cancelled": "is cancelled.",
|
|
|
|
"late": "is {} min late.",
|
|
|
|
"early": "is {} min early.",
|
|
|
|
"on_time": "is on time.",
|
|
|
|
"no_rt": "has no real-time data.",
|
|
|
|
"trip": "The {}:{} {} service to {} "
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
fmt = {
|
|
|
|
"cancelled": "cancelled",
|
|
|
|
"late": "+{} min",
|
|
|
|
"early": "-{} min",
|
|
|
|
"on_time": "on time",
|
|
|
|
"no_rt": "n/a",
|
|
|
|
"trip": "{}:{} {:<6}-> {:<38}"
|
|
|
|
}
|
|
|
|
|
|
|
|
def fmt_trip(fmt_str: str, fmt_args: List[str] = []) -> str:
|
|
|
|
return (fmt['trip'] + fmt_str).format(trip['orgHour'], trip['orgMinute'], trip['lineNumber'], trip['direction'],
|
|
|
|
*fmt_args)
|
|
|
|
|
2019-05-16 14:11:29 +02:00
|
|
|
if has_realtime(trip):
|
|
|
|
if is_cancelled(trip):
|
2019-05-19 21:33:00 +02:00
|
|
|
print(fmt_trip(fmt['cancelled']))
|
2019-05-16 14:11:29 +02:00
|
|
|
elif is_late(trip):
|
2019-05-19 21:33:00 +02:00
|
|
|
print(fmt_trip(fmt['late'], fmt_args=[trip['delay']]))
|
2019-05-16 14:11:29 +02:00
|
|
|
elif is_early(trip):
|
2019-05-19 21:33:00 +02:00
|
|
|
print(fmt_trip(fmt['early'], fmt_args=[-trip['delay']]))
|
2019-05-16 14:11:29 +02:00
|
|
|
elif is_on_time(trip):
|
2019-05-19 21:33:00 +02:00
|
|
|
print(fmt_trip(fmt['on_time']))
|
2019-05-16 14:11:29 +02:00
|
|
|
else:
|
2019-05-19 21:33:00 +02:00
|
|
|
print(fmt_trip(fmt['no_rt']))
|
2019-05-15 20:21:12 +02:00
|
|
|
|
|
|
|
|
2019-05-15 21:31:11 +02:00
|
|
|
def get_next_refresh(data: dict):
|
|
|
|
times = []
|
2019-05-16 16:36:29 +02:00
|
|
|
if data is not None:
|
|
|
|
for trip in data['departureData']:
|
|
|
|
times.append(trip['orgFullTime'])
|
|
|
|
times.append(trip['fullTime'])
|
|
|
|
times = [int(time) for time in times if int(time) > datetime.datetime.now().timestamp()]
|
|
|
|
times.sort()
|
|
|
|
for time in times:
|
|
|
|
if (datetime.datetime.fromtimestamp(time) - datetime.datetime.now()) > datetime.timedelta(seconds=30):
|
|
|
|
if (datetime.datetime.fromtimestamp(time) - datetime.datetime.now()) > datetime.timedelta(minutes=5):
|
|
|
|
return (datetime.datetime.now() + datetime.timedelta(minutes=5)).timestamp()
|
|
|
|
return time
|
2019-05-18 22:54:37 +02:00
|
|
|
return (datetime.datetime.now() + datetime.timedelta(seconds=90)).timestamp()
|
2019-05-15 21:31:11 +02:00
|
|
|
|
|
|
|
|
2019-05-16 14:11:29 +02:00
|
|
|
def _make_delay_value_for_sql(value: str or int or None) -> int or None:
|
|
|
|
if value == '':
|
|
|
|
value = None
|
|
|
|
else:
|
|
|
|
value = int(value)
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
def _make_status_value_for_sql(trip: dict) -> str:
|
|
|
|
if not has_realtime(trip):
|
|
|
|
status = 'no_data'
|
|
|
|
else:
|
|
|
|
if is_late(trip):
|
|
|
|
status = 'late'
|
|
|
|
elif is_cancelled(trip):
|
|
|
|
status = 'cancelled'
|
|
|
|
elif is_early(trip):
|
|
|
|
status = 'early'
|
|
|
|
elif is_on_time(trip):
|
|
|
|
status = 'on_time'
|
|
|
|
else:
|
|
|
|
raise ValueError("unknown delay value")
|
|
|
|
return status
|
|
|
|
|
|
|
|
|
2019-05-18 22:54:37 +02:00
|
|
|
def update(station_id: int, lines: List[str]) -> dict or None:
|
2019-05-16 16:36:29 +02:00
|
|
|
try:
|
|
|
|
reply_data = get_data(
|
|
|
|
make_request_data(
|
|
|
|
station_id,
|
|
|
|
8,
|
2019-05-18 22:54:37 +02:00
|
|
|
lines=lines
|
2019-05-16 16:36:29 +02:00
|
|
|
)
|
2019-05-15 21:31:11 +02:00
|
|
|
)
|
2019-05-17 11:53:25 +02:00
|
|
|
except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError) as e:
|
2019-05-18 22:54:37 +02:00
|
|
|
print("Could not get the data: " + str(e), file=sys.stderr)
|
2019-05-16 16:36:29 +02:00
|
|
|
return None
|
|
|
|
|
2019-05-15 21:31:11 +02:00
|
|
|
for trip in reply_data['departureData']:
|
2019-05-15 21:42:37 +02:00
|
|
|
print_trip(trip)
|
2019-05-15 21:31:11 +02:00
|
|
|
return reply_data
|
|
|
|
|
|
|
|
|
2019-06-22 01:36:35 +02:00
|
|
|
def wait(station_id: int):
|
|
|
|
cxn = mysql.connector.MySQLConnection(**db_config)
|
2019-05-16 14:11:29 +02:00
|
|
|
cur = cxn.cursor()
|
2019-05-15 21:31:11 +02:00
|
|
|
while True:
|
2019-05-18 22:54:37 +02:00
|
|
|
data = update(station_id, USE_LINES)
|
2019-05-16 16:36:29 +02:00
|
|
|
if data is not None:
|
2019-06-22 01:36:35 +02:00
|
|
|
line_codes = make_linecode_table(data)
|
2019-05-16 16:36:29 +02:00
|
|
|
for t in data['departureData']:
|
|
|
|
cur.execute('REPLACE INTO vrr '
|
|
|
|
'(line_code, direction_code, station_id, orig_datetime, status, delay_value)'
|
|
|
|
' VALUES (%s, %s, %s, '
|
|
|
|
'from_unixtime(%s), %s, %s)',
|
|
|
|
(t['lineCode'], t['directionCode'], station_id,
|
2019-05-18 22:54:37 +02:00
|
|
|
t['orgFullTime'], _make_status_value_for_sql(t), _make_delay_value_for_sql(t['delay'])))
|
2019-06-22 01:36:35 +02:00
|
|
|
for k, v in line_codes.items():
|
|
|
|
cur.execute('REPLACE INTO vrr_lines'
|
|
|
|
'(line_code, line_number)'
|
|
|
|
'VALUES (%s, %s)', (k, v))
|
|
|
|
cxn.commit()
|
2019-05-15 21:31:11 +02:00
|
|
|
next_refresh = get_next_refresh(data)
|
2019-05-15 21:42:37 +02:00
|
|
|
print("Sleeping until " + datetime.datetime.fromtimestamp(next_refresh).isoformat(), file=sys.stderr)
|
2019-05-15 21:31:11 +02:00
|
|
|
pause.until(next_refresh)
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2019-05-18 22:54:37 +02:00
|
|
|
cxn = mysql.connector.connect(**db_config)
|
2019-05-16 14:11:29 +02:00
|
|
|
cursor = cxn.cursor()
|
2019-06-22 01:36:35 +02:00
|
|
|
cursor.execute(VRR_TABLE)
|
|
|
|
cursor.execute(VRR_LINES_TABLE)
|
|
|
|
cursor.close()
|
|
|
|
cxn.close()
|
2019-05-24 19:41:09 +02:00
|
|
|
try:
|
2019-06-22 01:36:35 +02:00
|
|
|
wait(USE_STATION_ID)
|
2019-05-24 19:41:09 +02:00
|
|
|
except (ValueError, TypeError) as e:
|
|
|
|
with open("fault.json", "wb") as o:
|
|
|
|
if last_reply is not None:
|
|
|
|
o.write(last_reply.content) # requests.Response.content is a ByteIO
|
|
|
|
raise e
|
2019-05-15 21:31:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
main()
|