#!/usr/bin/env python3 # pylint: disable=missing-docstring,invalid-name #import argparse from collections import namedtuple import datetime import re from typing import List, BinaryIO import PyPDF3 Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid']) class CurrencyResult(): def __init__(self): self.rates = list() self.card_type = str() self.date = None def _select_date(): date = datetime.date.today()-datetime.timedelta(1) if datetime.date().isoweekday() in [6, 0]: date = datetime.date.today() def _parse_rate(text: str) -> float or None: if re.match('Keine Kursdaten vorhanden', text): _r = None else: text = text.strip(' ').replace(',', '.') try: _r = float(text) except ValueError: _r = None return _r def _parse_card_type(text: str) -> str: text = text.split(':')[1] text = text.strip('" ') return text def _parse_date(text: str) -> datetime.date: text = text.split(': ')[1].rstrip() return datetime.datetime.strptime(text, '%d.%m.%Y').date() def _array_remove_empty(obj: list) -> List[str]: try: while True: obj.remove('') except ValueError: return obj return obj def _parse_line(line: str) -> Rate or None: arr = line.split(" ") # 3 spaces = minimum separation in PDF arr = _array_remove_empty(arr) # process currency name names = arr[0].split(" ", 1) rate = Rate( abbr=names[0], full_name=names[1].strip("()"), ask=_parse_rate(arr[1]), bid=_parse_rate(arr[2]) ) return rate def get_results_from_text(text: str, currency: str = None) -> CurrencyResult: rates = {} result = CurrencyResult() lines = text.splitlines() # skip intro lines lines = lines[2:] # card type result.card_type = _parse_card_type(lines.pop(0)) # get date result.date = _parse_date(lines.pop(0)) # skip more lines lines = lines[4:] # now the rates begin if currency is None: for line in lines: line_result = _parse_line(line) rates[line_result.abbr] = line_result else: pattern = re.compile("^"+currency) for line in lines: if pattern.match(line): line_result = _parse_line(line) rates[line_result.abbr] = line_result result.rates = rates return result def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult: reader = PyPDF3.PdfFileReader(buf) text = str() pages = [] for num in range(0, reader.getNumPages()-1): pages.append(reader.getPage(num)) for page in pages: text += page.extractText() return get_results_from_text(text, currency=currency)