first-data-crawler-pdf/crawl.py

234 lines
6.7 KiB
Python
Raw Normal View History

2019-03-27 17:00:04 +01:00
#!/usr/bin/env python3
# pylint: disable=missing-docstring,invalid-name
# import logging
import argparse
import sys
import os
import pathlib
from datetime import date as DTDate
from datetime import datetime as DTDateTime
import appdirs
2020-07-18 20:22:33 +02:00
import csv
2019-03-27 17:00:04 +01:00
2019-05-03 23:16:00 +02:00
import utils
2019-03-27 17:00:04 +01:00
DIRECTION_TO_EUR = 0
DIRECTION_FROM_EUR = 1
2020-07-18 20:22:33 +02:00
# Argument parsing
2019-03-27 17:00:04 +01:00
parser = argparse.ArgumentParser(
description='Currency conversion using First Data cards.')
parser.add_argument(
'-t', '--card-type',
# argument_default='VISA', # UnsupportedError
choices=['VISA', 'MC'],
dest='card_type',
type=str,
help='Card Type'
)
parser.add_argument(
'-g', '--fetch-date',
dest='date',
type=str,
help='Date to get values for (default: yesterday, Friday on Sat-Mon)'
)
parser.add_argument(
'-r', '--direction',
dest='reverse',
action='store_true',
help='Reverse direction (EUR -> currency)'
)
2020-07-18 20:22:33 +02:00
parser.add_argument(
'-c', '--csv',
dest='csv',
action='store_true',
help='Write the results to stdout as CSV'
)
2020-08-24 17:20:36 +02:00
parser.add_argument(
'-q', '--quiet',
dest='quiet',
action='store_true',
help='Do not output the \'Downloading...\' and \'Parsing...\' messages.'
)
2019-05-03 23:40:14 +02:00
parser.add_argument(
'--cache-dir',
dest='cache_dir',
2019-05-04 19:30:25 +02:00
type=str,
2019-05-03 23:40:14 +02:00
help='Override the default cache directory with your own path'
)
2019-03-27 17:00:04 +01:00
exc_group = parser.add_mutually_exclusive_group()
exc_group.add_argument(
'-i', '--interactive',
dest='interactive',
action='store_true',
help='Calculate interactively on stdin'
)
vals_group = exc_group.add_argument_group()
vals_group.add_argument(
'currency',
type=str,
help='Currency abbreviation to convert from/to (e.g. EUR)',
nargs='?'
2020-07-18 20:22:33 +02:00
)
2019-03-27 17:00:04 +01:00
vals_group.add_argument(
'amt',
type=float,
help='Amount',
nargs='?'
2020-07-18 20:22:33 +02:00
)
2019-03-27 17:00:04 +01:00
2019-06-21 22:16:16 +02:00
2019-05-03 23:16:00 +02:00
def _process_stdin(argv: str, res: utils.CurrencyResult) -> None:
2019-05-04 19:30:25 +02:00
argv = argv.split()
2019-03-27 17:00:04 +01:00
try:
2019-05-04 21:05:47 +02:00
if argv[0] in ['h', 'man', 'help']:
print("""Usage:
q | exit | quit: Quit the program.
AMOUNT CURRENCY: Convert AMOUNT euros to CURRENCY.
CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros.
d | date: Print the date which the data is from.
""")
elif argv[0] in ['q', 'exit', 'quit']:
2019-05-03 23:16:00 +02:00
sys.exit()
elif argv[0] in ['date', 'd']:
print(res.date)
2019-05-04 19:30:25 +02:00
elif len(argv[0]) == 3 or len(argv[1]) == 3:
2019-05-03 23:28:20 +02:00
# more than 3 letter abbreviations are invalid
cur_use = str
amt_use = float
dir_use = int
if len(argv) == 1 and not is_float(argv[0]):
# only currency specified
cur_use = argv[0].upper()
amt_use = 1
dir_use = DIRECTION_TO_EUR
elif is_float(argv[0]):
# amount first -> convert EUR to currency in argv[1]
cur_use = argv[1].upper()
amt_use = float(argv[0])
dir_use = DIRECTION_FROM_EUR
2019-05-03 23:28:20 +02:00
elif is_float(argv[1]):
# currency first -> convert argv[1] to EUR
cur_use = argv[0].upper()
amt_use = float(argv[1])
dir_use = DIRECTION_TO_EUR
else:
raise ValueError
2020-07-18 20:22:33 +02:00
# check that the currency exists
if cur_use not in res.rates:
raise ValueError
else:
2019-05-03 23:28:20 +02:00
print(fmt_and_calc(
cur=cur_use,
amt=amt_use,
2019-05-03 23:28:20 +02:00
res=res,
direction=dir_use
))
2019-05-03 23:28:20 +02:00
else:
print("Not implemented: '" + " ".join(argv) + "'")
2019-03-27 17:00:04 +01:00
except IndexError:
2019-05-03 23:16:00 +02:00
if argv is None:
2019-03-27 17:00:04 +01:00
pass
else:
print("Too few arguments: '" + " ".join(argv) + "'")
except ValueError:
print("The currency specified does not exist.")
2019-03-27 17:00:04 +01:00
2019-06-21 22:16:16 +02:00
2019-03-27 17:00:04 +01:00
def is_float(string: str) -> bool:
try:
float(string)
return True
except ValueError:
return False
2019-06-21 22:16:16 +02:00
2019-03-27 17:00:04 +01:00
def _parse_date_from_args(date_str: str) -> DTDate:
2020-08-24 17:20:36 +02:00
from dateutil.parser import isoparse
return isoparse(date_str).date()
2019-06-21 22:16:16 +02:00
2019-05-03 23:16:00 +02:00
def calc_result(amt: float, rate: utils.Rate, direction: int, duty: float = 0) -> float:
2019-03-27 17:00:04 +01:00
if direction == DIRECTION_FROM_EUR:
result = amt * rate.ask / 1+duty
elif direction == DIRECTION_TO_EUR:
result = amt / rate.bid * 1+duty
else:
2020-07-18 20:22:33 +02:00
raise ValueError(
'direction must be DIRECTION_FROM_EUR or DIRECTION_TO_EUR')
2019-03-27 17:00:04 +01:00
return result
2019-06-21 22:16:16 +02:00
2019-05-03 23:16:00 +02:00
def fmt_and_calc(amt: float, cur: str, res: utils.CurrencyResult, direction: str) -> str:
2019-03-27 17:00:04 +01:00
cur = cur.upper()
2019-05-03 23:16:00 +02:00
if cur in res.rates:
2019-03-27 17:00:04 +01:00
numeric_result = calc_result(amt, res.rates[cur], direction)
if direction == DIRECTION_FROM_EUR:
fmt_vals = ('EUR', round(amt, 2), cur, round(numeric_result, 2))
else:
fmt_vals = (cur, round(amt, 2), 'EUR', round(numeric_result, 2))
return '%s %0.2f = %s %0.2f' % fmt_vals
else:
return 'Currency %s could not be found' % cur
2020-07-18 20:22:33 +02:00
2019-05-03 23:16:00 +02:00
# args = parser.parse_args('USD 1000'.split())
args = parser.parse_args()
#logger = logging.getLogger('mechanize')
2020-07-18 20:22:33 +02:00
# logger.addHandler(logging.StreamHandler(sys.stdout))
# logger.setLevel(logging.DEBUG)
2019-05-03 23:16:00 +02:00
2019-03-27 17:00:04 +01:00
# determine card type
if args.card_type == 'VISA':
use_card_type = utils.CARD_VISA
2019-03-27 17:00:04 +01:00
elif args.card_type == 'MC':
use_card_type = utils.CARD_MASTERCARD
2019-03-27 17:00:04 +01:00
elif args.card_type is None:
use_card_type = utils.CARD_VISA
2019-03-27 17:00:04 +01:00
else:
sys.exit('Unsupported card type ' + args.card_type)
if args.reverse:
direction = DIRECTION_FROM_EUR
else:
direction = DIRECTION_TO_EUR
if args.date:
retrieve_date = _parse_date_from_args(args.date)
else:
retrieve_date = utils.get_date()
2019-03-27 17:00:04 +01:00
2019-05-03 23:40:14 +02:00
if args.cache_dir is not None:
2019-05-03 23:48:24 +02:00
filepath = pathlib.Path(args.cache_dir).resolve()
2019-05-03 23:40:14 +02:00
else:
2020-07-18 20:22:33 +02:00
filepath = pathlib.Path(appdirs.user_cache_dir(
'FirstDataCrawler', 'iwonder'))
2019-03-27 17:00:04 +01:00
if not filepath.exists():
filepath.mkdir(parents=True)
filename = filepath / utils.mk_filename(retrieve_date, use_card_type)
2019-03-27 17:00:04 +01:00
if os.path.exists(filename):
with open(filename, 'rb') as f:
2020-08-24 17:20:36 +02:00
results = utils.get_results_from_pdf(f, quiet=args.quiet)
2019-03-27 17:00:04 +01:00
else:
2020-08-24 17:20:36 +02:00
buf = utils.get_fileio(retrieve_date, card_type=use_card_type, quiet=args.quiet)
2019-03-27 17:00:04 +01:00
with open(filename, 'wb') as f:
f.write(buf.read())
buf.seek(0)
2020-08-24 17:20:36 +02:00
results = utils.get_results_from_pdf(buf, quiet=args.quiet)
2019-03-27 17:00:04 +01:00
#
# processing
#
if args.interactive:
try:
while True:
2019-05-03 23:16:00 +02:00
_process_stdin(input('> '), results)
except (KeyboardInterrupt, EOFError):
2019-03-27 17:00:04 +01:00
sys.exit()
2020-07-18 20:22:33 +02:00
elif args.csv:
w = csv.writer(sys.stdout)
for rate in results.rates.values():
w.writerow([rate.abbr, rate.full_name, rate.ask, rate.bid, rate.date])
2019-03-27 17:00:04 +01:00
else:
print(fmt_and_calc(args.amt, args.currency, results, direction))