#!/usr/bin/env python3 # pylint: disable=missing-docstring,invalid-name #import argparse from collections import namedtuple import datetime import re from typing import List, BinaryIO from datetime import date as DTDate from datetime import timedelta as DTTimeDelta import PyPDF3 import mechanize as m import random Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid']) # Constants CARD_MASTERCARD = ['0'] CARD_VISA = ['1'] class CurrencyResult(): def __init__(self): self.rates = list() self.card_type = str() self.date = None def _select_date(): date = datetime.date.today()-datetime.timedelta(1) if datetime.date().isoweekday() in [6, 0]: date = datetime.date.today() def _parse_rate(text: str) -> float or None: if re.match('Keine Kursdaten vorhanden', text): _r = None else: text = text.strip(' ').replace(',', '.') try: _r = float(text) except ValueError: _r = None return _r def _parse_card_type(text: str) -> str: text = text.split(':')[1] text = text.strip('" ') return text def _parse_date(text: str) -> datetime.date: text = text.split(': ')[1].rstrip() return datetime.datetime.strptime(text, '%d.%m.%Y').date() def _array_remove_empty(obj: list) -> List[str]: try: while True: obj.remove('') except ValueError: return obj return obj def _parse_line(line: str) -> Rate or None: arr = line.split(" ") # 3 spaces = minimum separation in PDF arr = _array_remove_empty(arr) # process currency name names = arr[0].split(" ", 1) rate = Rate( abbr=names[0], full_name=names[1].strip("()"), ask=_parse_rate(arr[1]), bid=_parse_rate(arr[2]) ) return rate def get_results_from_text(text: str, currency: str = None) -> CurrencyResult: rates = {} result = CurrencyResult() lines = text.splitlines() # skip intro lines lines = lines[2:] # card type result.card_type = _parse_card_type(lines.pop(0)) # get date result.date = _parse_date(lines.pop(0)) # skip more lines lines = lines[4:] # now the rates begin if currency is None: for line in lines: line_result = _parse_line(line) rates[line_result.abbr] = line_result else: pattern = re.compile("^"+currency) for line in lines: if pattern.match(line): line_result = _parse_line(line) rates[line_result.abbr] = line_result result.rates = rates return result def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult: reader = PyPDF3.PdfFileReader(buf) text = str() pages = [] for num in range(0, reader.getNumPages()-1): pages.append(reader.getPage(num)) for page in pages: text += page.extractText() return get_results_from_text(text, currency=currency) def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # pylint: disable=dangerous-default-value print('Downloading rates for ' + date.strftime('%Y-%m-%d') + '... ', end='') b = m.Browser() # Firefox 64 User-Agent # ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' # b.set_header('User-Agent', ua) # Ignore robots.txt # b.set_handle_robots(False) # Debugging flags # b.set_debug_http(True) # b.set_debug_redirects(True) # b.set_debug_responses(True) # PDF URL b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') fm = b.forms() fm_i = fm[0] fm_i.set_all_readonly(False) # Configure form fm_i['creditCardsRadio'] = card_type # VISA fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') # Retrieve file using button click rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20))) rq.add_header('Accept', '*/*') rp = b.retrieve(rq) print(' Done.') return open(rp[0], 'rb') def get_date() -> DTDate: if DTDate.today().weekday() in [6, 0]: date = DTDate.today() + relativedelta(weekday=FR(-1)) else: date = DTDate.today() - DTTimeDelta(1) return date