diff --git a/crawl.py b/crawl.py index 442c27a..ab9dbe9 100755 --- a/crawl.py +++ b/crawl.py @@ -5,7 +5,6 @@ import argparse import sys import os import pathlib - from datetime import date as DTDate from datetime import datetime as DTDateTime import appdirs @@ -77,7 +76,7 @@ AMOUNT CURRENCY: Convert AMOUNT euros to CURRENCY. CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros. d | date: Print the date which the data is from. """) - if argv[0] in ['q', 'exit', 'quit']: + elif argv[0] in ['q', 'exit', 'quit']: sys.exit() elif argv[0] in ['date', 'd']: print(res.date) @@ -166,7 +165,7 @@ else: filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder')) if not filepath.exists(): filepath.mkdir(parents=True) -filename = filepath / (retrieve_date.strftime('%Y%m%d') + '.pdf') +filename = filepath / utils.mk_filename(retrieve_date, use_card_type) if os.path.exists(filename): with open(filename, 'rb') as f: results = utils.get_results_from_pdf(f) diff --git a/utils.py b/utils.py index ff6594a..61500d4 100644 --- a/utils.py +++ b/utils.py @@ -29,6 +29,7 @@ def _parse_rate(text: str) -> float or None: if re.match('Keine Kursdaten vorhanden', text): _r = None else: + # strip whitespace and format decimal numbers correctly for parsing text = text.strip(' ').replace(',', '.') try: _r = float(text) @@ -37,15 +38,18 @@ def _parse_rate(text: str) -> float or None: return _r def _parse_card_type(text: str) -> str: + # Method for validating metadata from the PDF against the request data text = text.split(':')[1] text = text.strip('" ') return text def _parse_date(text: str) -> DTDate: + # Method for validating metadata from the PDF against the request data text = text.split(': ')[1].rstrip() return DTDateTime.strptime(text, '%d.%m.%Y').date() def _array_remove_empty(obj: list) -> List[str]: + # just a macro for removing empty or empty-string array objects try: while True: obj.remove('') @@ -119,21 +123,35 @@ def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # py # b.set_debug_responses(True) # PDF URL b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') - fm = b.forms() - fm_i = fm[0] - fm_i.set_all_readonly(False) + fm = b.forms()[0] + # This must be done because I can't change the options otherwise + fm.set_all_readonly(False) # Configure form - fm_i['creditCardsRadio'] = card_type # VISA - fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') - # Retrieve file using button click - rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20))) + fm['creditCardsRadio'] = card_type + fm['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') + # Retrieve file using button click; the button is 115x21 pixels in size. + # The API apparently doesn't like the max values + rq = fm.click(name='submitButton', coord=(random.randint(1, 114), random.randint(1, 20))) rq.add_header('Accept', '*/*') rp = b.retrieve(rq) print(' Done.') + # Returns an open file-like object with the PDF as contents return open(rp[0], 'rb') def get_date() -> DTDate: + # For Sunday and Monday, use Friday's data; Saturday and Sunday are completely null if DTDate.today().weekday() in [6, 0]: date = DTDate.today() + relativedelta(weekday=FR(-1)) else: + # For all other days, the previous day is fine date = DTDate.today() - DTTimeDelta(1) return date + +def mk_filename(date: DTDate, card_type: List[str]) -> str: + # List[str] is used because I don't want to make a class for just this + if card_type == CARD_MASTERCARD: + fn = date.isoformat() + '_MC.pdf' + elif card_type == CARD_VISA: + fn = date.isoformat() + '_VISA.pdf' + else: + raise TypeError("not a valid card type") + return fn