#!/usr/bin/env python3 # pylint: disable=missing-docstring,invalid-name import random import re from collections import namedtuple from datetime import date as DTDate from datetime import timedelta as DTTimeDelta from datetime import datetime as DTDateTime from typing import BinaryIO, List import mechanize as m import PyPDF3 from dateutil.relativedelta import FR, relativedelta Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid']) # Constants CARD_MASTERCARD = ['0'] CARD_VISA = ['1'] class CurrencyResult(): def __init__(self): self.rates = list() self.card_type = str() self.date = None def _parse_rate(text: str) -> float or None: if re.match('Keine Kursdaten vorhanden', text): _r = None else: text = text.strip(' ').replace(',', '.') try: _r = float(text) except ValueError: _r = None return _r def _parse_card_type(text: str) -> str: text = text.split(':')[1] text = text.strip('" ') return text def _parse_date(text: str) -> DTDate: text = text.split(': ')[1].rstrip() return DTDateTime.strptime(text, '%d.%m.%Y').date() def _array_remove_empty(obj: list) -> List[str]: try: while True: obj.remove('') except ValueError: return obj return obj def _parse_line(line: str) -> Rate or None: arr = line.split(" ") # 3 spaces = minimum separation in PDF arr = _array_remove_empty(arr) # process currency name names = arr[0].split(" ", 1) rate = Rate( abbr=names[0], full_name=names[1].strip("()"), ask=_parse_rate(arr[1]), bid=_parse_rate(arr[2]) ) return rate def get_results_from_text(text: str, currency: str = None) -> CurrencyResult: rates = {} result = CurrencyResult() lines = text.splitlines() # skip intro lines lines = lines[2:] # card type result.card_type = _parse_card_type(lines.pop(0)) # get date result.date = _parse_date(lines.pop(0)) # skip more lines lines = lines[4:] # now the rates begin if currency is None: for line in lines: line_result = _parse_line(line) rates[line_result.abbr] = line_result else: pattern = re.compile("^"+currency) for line in lines: if pattern.match(line): line_result = _parse_line(line) rates[line_result.abbr] = line_result result.rates = rates return result def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult: print('Parsing data... ', end='') reader = PyPDF3.PdfFileReader(buf) text = str() pages = [] for num in range(0, reader.getNumPages()-1): pages.append(reader.getPage(num)) for page in pages: text += page.extractText() print('Done.') return get_results_from_text(text, currency=currency) def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # pylint: disable=dangerous-default-value # pylint: disable=no-member # mechanize.Browser has some lazy-loading methods that pylint doesn't see print('Downloading rates for ' + date.strftime('%Y-%m-%d') + '... ', end='') b = m.Browser() # Firefox 64 User-Agent # ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' # b.set_header('User-Agent', ua) # Ignore robots.txt # b.set_handle_robots(False) # Debugging flags # b.set_debug_http(True) # b.set_debug_redirects(True) # b.set_debug_responses(True) # PDF URL b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') fm = b.forms() fm_i = fm[0] fm_i.set_all_readonly(False) # Configure form fm_i['creditCardsRadio'] = card_type # VISA fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') # Retrieve file using button click rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20))) rq.add_header('Accept', '*/*') rp = b.retrieve(rq) print(' Done.') return open(rp[0], 'rb') def get_date() -> DTDate: if DTDate.today().weekday() in [6, 0]: date = DTDate.today() + relativedelta(weekday=FR(-1)) else: date = DTDate.today() - DTTimeDelta(1) return date