#!/usr/bin/env python3 # pylint: disable=missing-docstring,invalid-name import random import re from collections import namedtuple from datetime import date as DTDate from datetime import timedelta as DTTimeDelta from datetime import datetime as DTDateTime from typing import BinaryIO, List, Dict from collections import OrderedDict from sys import stderr import mechanize as m import PyPDF3 from dateutil.relativedelta import FR, relativedelta Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid', 'date']) # Constants CARD_MASTERCARD = ['0'] CARD_VISA = ['1'] class CurrencyResult: def __init__(self): self.rates = Dict[str, Rate] self.card_type = str() self.date = None def _parse_rate(text: str) -> float or None: if re.match('Keine Kursdaten vorhanden', text): _r = None else: # strip whitespace and format decimal numbers correctly for parsing text = text.strip(' ').replace(',', '.') try: _r = float(text) except ValueError: _r = None return _r def _parse_card_type(text: str) -> str: # Method for validating metadata from the PDF against the request data text = text.split(':')[1] text = text.strip('" ') return text def _parse_date(text: str) -> DTDate: # Method for validating metadata from the PDF against the request data text = text.split(': ')[1].rstrip() return DTDateTime.strptime(text, '%d.%m.%Y').date() def _array_remove_empty(obj: list) -> List[str]: # just a macro for removing empty or empty-string array objects try: while True: obj.remove('') except ValueError: return obj return obj def _parse_line(line: str, ctx: CurrencyResult) -> Rate or None: arr = line.split(" ") # 3 spaces = minimum separation in PDF arr = _array_remove_empty(arr) # process currency name names = arr[0].split(" ", 1) rate = Rate( abbr=names[0], full_name=names[1].strip("()"), ask=_parse_rate(arr[1]), bid=_parse_rate(arr[2]), date=ctx.date ) return rate def get_results_from_text(text: str, currency: str = None, quiet: bool = False) -> CurrencyResult: rates = OrderedDict() result = CurrencyResult() lines = text.splitlines() # skip intro lines lines = lines[2:] # card type result.card_type = _parse_card_type(lines.pop(0)) # get date result.date = _parse_date(lines.pop(0)) # skip more lines lines = lines[4:] # now the rates begin if currency is None: for line in lines: line_result = _parse_line(line, result) rates[line_result.abbr] = line_result else: pattern = re.compile("^"+currency) for line in lines: if pattern.match(line): line_result = _parse_line(line, result) rates[line_result.abbr] = line_result result.rates = rates return result def get_results_from_pdf(buf: BinaryIO or str, currency: str = None, quiet: bool = False) -> CurrencyResult: if not quiet: print('Parsing data... ', end='', file=stderr) reader = PyPDF3.PdfFileReader(buf) text = str() for num in range(0, reader.getNumPages()-1): text += reader.getPage(num).extractText() if not quiet: print('Done.', file=stderr) return get_results_from_text(text, currency=currency, quiet=quiet) def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA, quiet: bool = False) -> BinaryIO: # pylint: disable=dangerous-default-value # pylint: disable=no-member # mechanize.Browser has some lazy-loading methods that pylint doesn't see if not quiet: print('Downloading rates for ' + date.strftime('%Y-%m-%d') + '... ', end='', file=stderr) b = m.Browser() # Firefox 64 User-Agent # ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' # b.set_header('User-Agent', ua) # Ignore robots.txt b.set_handle_robots(False) # Debugging flags # b.set_debug_http(True) # b.set_debug_redirects(True) # b.set_debug_responses(True) # PDF URL b.open('https://online.firstdata.com/CurrencyCalculator/fremdwaehrungskurse/pdf') fm = b.forms()[0] # This must be done because I can't change the options otherwise fm.set_all_readonly(False) # Configure form fm['creditCardsRadio'] = card_type fm['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') # Retrieve file using button click; the button is 115x21 pixels in size. # The API apparently doesn't like the max values rq = fm.click(name='submitButton', coord=(random.randint(1, 114), random.randint(1, 20))) rq.add_header('Accept', '*/*') rp = b.retrieve(rq) if not quiet: print(' Done.', file=stderr) # Returns an open file-like object with the PDF as contents return open(rp[0], 'rb') def get_date() -> DTDate: # For Sunday and Monday, use Friday's data; Saturday and Sunday are completely null if DTDate.today().weekday() in [6, 0]: date = DTDate.today() + relativedelta(weekday=FR(-1)) else: # For all other days, the previous day is fine date = DTDate.today() - DTTimeDelta(1) return date def mk_filename(date: DTDate, card_type: List[str]) -> str: # List[str] is used because I don't want to make a class for just this if card_type == CARD_MASTERCARD: fn = date.isoformat() + '_MC.pdf' elif card_type == CARD_VISA: fn = date.isoformat() + '_VISA.pdf' else: raise TypeError("not a valid card type") return fn