commit 4e4e1205b68150a6d4dc400dac0c597b6e072607 Author: Christopher Teutsch Date: Wed Mar 27 17:00:04 2019 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..51f90d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +.vscode +mechanize diff --git a/README.md b/README.md new file mode 100644 index 0000000..13aa192 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# CLI for First Data conversion rates + +## Requirements +* python3 with modules `PyPDF3`, `appdirs`, `mechanize` + +### Caveat: +Currently [python-mechanize](https://github.com/python-mechanize/mechanize) is not available via pip3. +Therefore, you need to clone mechanize and symlink its mechanize/ directory here. + +## Usage: +`python3 crawl.py [-t {VISA,MC}] [-g ISO_DATE] [-r] {-i | CURRENCY AMOUNT}` + +### OPTIONS + +#### `AMOUNT` +This must be a number. +#### `CURRENCY` +This must be the three-letter currency abbreviation, case is irrelevant. + +#### `-t`, `--card-type` +Choose the card type (MasterCard or VISA) + +#### `-g`, `--fetch-date` +Date to get values for (default: yesterday, Friday on Sat-Mon)' +Format: ISO date + +#### `-r`, `--direction` +Reverse conversion direction (EUR to specified currency, instead of specified currency to EUR) +#### `-i`, `--interactive` + +Calculate interactively on stdin + + +##### `q` + Quit. +##### `AMOUNT CURRENCY` + Convert AMOUNT euros to CURRENCY. +##### `CURRENCY AMOUNT` + Convert AMOUNT CURRENCY to euros. + + + diff --git a/crawl.py b/crawl.py new file mode 100644 index 0000000..aa08992 --- /dev/null +++ b/crawl.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# pylint: disable=missing-docstring,invalid-name +# pylint: disable=no-member # mechanize.Browser has some lazy-loading methods that pylint doesn't see +# import logging +import argparse +import random +import sys +import os +import pathlib + +from typing import BinaryIO, List + +from datetime import date as DTDate +from datetime import datetime as DTDateTime +from datetime import timedelta as DTTimeDelta +from dateutil.relativedelta import FR, relativedelta +import appdirs + +import crawl2 +import mechanize as m + +CARD_MASTERCARD = ['0'] +CARD_VISA = ['1'] + +DIRECTION_TO_EUR = 0 +DIRECTION_FROM_EUR = 1 + +## Argument parsing +parser = argparse.ArgumentParser( + description='Currency conversion using First Data cards.') +parser.add_argument( + '-t', '--card-type', + # argument_default='VISA', # UnsupportedError + choices=['VISA', 'MC'], + dest='card_type', + type=str, + help='Card Type' +) +parser.add_argument( + '-g', '--fetch-date', + dest='date', + type=str, + help='Date to get values for (default: yesterday, Friday on Sat-Mon)' +) +parser.add_argument( + '-r', '--direction', + dest='reverse', + action='store_true', + help='Reverse direction (EUR -> currency)' +) +exc_group = parser.add_mutually_exclusive_group() +exc_group.add_argument( + '-i', '--interactive', + dest='interactive', + action='store_true', + help='Calculate interactively on stdin' +) +vals_group = exc_group.add_argument_group() + +vals_group.add_argument( + 'currency', + type=str, + help='Currency abbreviation to convert from/to (e.g. EUR)', + nargs='?' + ) +vals_group.add_argument( + 'amt', + type=float, + help='Amount', + nargs='?' + ) +# args = parser.parse_args('USD 1000'.split()) +args = parser.parse_args() +#logger = logging.getLogger('mechanize') +#logger.addHandler(logging.StreamHandler(sys.stdout)) +#logger.setLevel(logging.DEBUG) + +def _process_stdin(argv: str) -> None: + if argv == 'q': + sys.exit() + argv = argv.split() + try: + if is_float(argv[0]): + print(fmt_and_calc( + cur=argv[1].upper(), + amt=float(argv[0]), + res=results, + direction=DIRECTION_FROM_EUR)) + elif is_float(argv[1]): + print(fmt_and_calc( + cur=argv[0].upper(), + amt=float(argv[1]), + res=results, + direction=DIRECTION_TO_EUR)) + except IndexError: + if len(argv) == 0: + pass + else: + print("Too few arguments: '" + " ".join(argv) + "'") + +def is_float(string: str) -> bool: + try: + float(string) + return True + except ValueError: + return False + +def _retrieve_file(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # pylint: disable=dangerous-default-value + + b = m.Browser() + # Firefox 64 User-Agent + # ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0' + # b.set_header('User-Agent', ua) + # Ignore robots.txt + # b.set_handle_robots(False) + # Debugging flags + b.set_debug_http(True) + b.set_debug_redirects(True) + b.set_debug_responses(True) + # PDF URL + b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') + + fm = b.forms() + fm_i = fm[0] + fm_i.set_all_readonly(False) + # Configure form + fm_i['creditCardsRadio'] = card_type # VISA + fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') + # Retrieve file using button click + rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20))) + rq.add_header('Accept', '*/*') + rp = b.retrieve(rq) + return open(rp[0], 'rb') + +def _get_date() -> DTDate: + if DTDate.today().weekday() in [6, 0]: + date = DTDate.today() + relativedelta(weekday=FR(-1)) + else: + date = DTDate.today() - DTTimeDelta(1) + return date +def _parse_date_from_args(date_str: str) -> DTDate: + return DTDateTime.strptime(date_str).date() +def calc_result(amt: float, rate: crawl2.Rate, direction: int, duty: float = 0) -> float: + if direction == DIRECTION_FROM_EUR: + result = amt * rate.ask / 1+duty + elif direction == DIRECTION_TO_EUR: + result = amt / rate.bid * 1+duty + else: + raise ValueError('direction must be DIRECTION_FROM_EUR or DIRECTION_TO_EUR') + return result +def fmt_and_calc(amt: float, cur: str, res: crawl2.CurrencyResult, direction: str) -> str: + cur = cur.upper() + if cur in results.rates: + numeric_result = calc_result(amt, res.rates[cur], direction) + if direction == DIRECTION_FROM_EUR: + fmt_vals = ('EUR', round(amt, 2), cur, round(numeric_result, 2)) + else: + fmt_vals = (cur, round(amt, 2), 'EUR', round(numeric_result, 2)) + return '%s %0.2f = %s %0.2f' % fmt_vals + else: + return 'Currency %s could not be found' % cur + +# determine card type +if args.card_type == 'VISA': + use_card_type = CARD_VISA +elif args.card_type == 'MC': + use_card_type = CARD_MASTERCARD +elif args.card_type is None: + use_card_type = CARD_VISA +else: + sys.exit('Unsupported card type ' + args.card_type) +if args.reverse: + direction = DIRECTION_FROM_EUR +else: + direction = DIRECTION_TO_EUR +if args.date: + retrieve_date = _parse_date_from_args(args.date) +else: + retrieve_date = _get_date() + +filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder')) +if not filepath.exists(): + filepath.mkdir(parents=True) +filename = filepath / (retrieve_date.strftime('%Y%m%d') + '.pdf') +if os.path.exists(filename): + with open(filename, 'rb') as f: + results = crawl2.get_results_from_pdf(f) +else: + buf = _retrieve_file(retrieve_date, card_type=use_card_type) + with open(filename, 'wb') as f: + f.write(buf.read()) + buf.seek(0) + results = crawl2.get_results_from_pdf(buf) +# +# processing +# + +if args.interactive: + try: + while True: + _process_stdin(input()) + except KeyboardInterrupt: + sys.exit() +else: + print(fmt_and_calc(args.amt, args.currency, results, direction)) diff --git a/crawl2.py b/crawl2.py new file mode 100644 index 0000000..376ad8b --- /dev/null +++ b/crawl2.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# pylint: disable=missing-docstring,invalid-name + +#import argparse +from collections import namedtuple +import datetime +import re +from typing import List, BinaryIO +import PyPDF3 + +Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid']) + +class CurrencyResult(): + def __init__(self): + self.rates = list() + self.card_type = str() + self.date = None + +def _select_date(): + date = datetime.date.today()-datetime.timedelta(1) + if datetime.date().isoweekday() in [6, 0]: + date = datetime.date.today() + +def _parse_rate(text: str) -> float or None: + if re.match('Keine Kursdaten vorhanden', text): + _r = None + else: + text = text.strip(' ').replace(',', '.') + try: + _r = float(text) + except ValueError: + _r = None + return _r + +def _parse_card_type(text: str) -> str: + text = text.split(':')[1] + text = text.strip('" ') + return text + +def _parse_date(text: str) -> datetime.date: + text = text.split(': ')[1].rstrip() + return datetime.datetime.strptime(text, '%d.%m.%Y').date() + +def _array_remove_empty(obj: list) -> List[str]: + try: + while True: + obj.remove('') + except ValueError: + return obj + return obj + +def _parse_line(line: str) -> Rate or None: + arr = line.split(" ") # 3 spaces = minimum separation in PDF + arr = _array_remove_empty(arr) + # process currency name + names = arr[0].split(" ", 1) + rate = Rate( + abbr=names[0], + full_name=names[1].strip("()"), + ask=_parse_rate(arr[1]), + bid=_parse_rate(arr[2]) + ) + return rate + + +def get_results_from_text(text: str, currency: str = None) -> CurrencyResult: + rates = {} + result = CurrencyResult() + lines = text.splitlines() + # skip intro lines + lines = lines[2:] + # card type + result.card_type = _parse_card_type(lines.pop(0)) + # get date + result.date = _parse_date(lines.pop(0)) + # skip more lines + lines = lines[4:] + # now the rates begin + if currency is None: + for line in lines: + line_result = _parse_line(line) + rates[line_result.abbr] = line_result + else: + pattern = re.compile("^"+currency) + for line in lines: + if pattern.match(line): + line_result = _parse_line(line) + rates[line_result.abbr] = line_result + result.rates = rates + return result + +def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult: + reader = PyPDF3.PdfFileReader(buf) + text = str() + pages = [] + for num in range(0, reader.getNumPages()-1): + pages.append(reader.getPage(num)) + for page in pages: + text += page.extractText() + return get_results_from_text(text, currency=currency) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8a026c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +PyPDF3 +appdirs \ No newline at end of file