more docs, if/elif fixed, more functions in utils

2019-05-04 22:55:18 +02:00
parent 25c7767b0d
commit 81c0e5cf8f
2 changed files with 27 additions and 10 deletions
--- a/crawl.py
+++ b/crawl.py
@ -5,7 +5,6 @@ import argparse
 import sys
 import os
 import pathlib
 from datetime import date as DTDate
 from datetime import datetime as DTDateTime
 import appdirs
@ -77,7 +76,7 @@ AMOUNT CURRENCY: Convert AMOUNT euros to CURRENCY.
 CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros.
 d | date: Print the date which the data is from.
            """)
-        if argv[0] in ['q', 'exit', 'quit']:
+        elif argv[0] in ['q', 'exit', 'quit']:
            sys.exit()
        elif argv[0] in ['date', 'd']:
            print(res.date)
@ -166,7 +165,7 @@ else:
    filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder'))
 if not filepath.exists():
    filepath.mkdir(parents=True)
-filename = filepath / (retrieve_date.strftime('%Y%m%d') + '.pdf')
+filename = filepath / utils.mk_filename(retrieve_date, use_card_type)
 if os.path.exists(filename):
    with open(filename, 'rb') as f:
        results = utils.get_results_from_pdf(f)
--- a/utils.py
+++ b/utils.py
@ -29,6 +29,7 @@ def _parse_rate(text: str) -> float or None:
    if re.match('Keine Kursdaten vorhanden', text):
        _r = None
    else:
        # strip whitespace and format decimal numbers correctly for parsing
        text = text.strip(' ').replace(',', '.')
        try:
            _r = float(text)
@ -37,15 +38,18 @@ def _parse_rate(text: str) -> float or None:
    return _r
 def _parse_card_type(text: str) -> str:
    # Method for validating metadata from the PDF against the request data
    text = text.split(':')[1]
    text = text.strip('" ')
    return text
 def _parse_date(text: str) -> DTDate:
    # Method for validating metadata from the PDF against the request data
    text = text.split(': ')[1].rstrip()
    return DTDateTime.strptime(text, '%d.%m.%Y').date()
 def _array_remove_empty(obj: list) -> List[str]:
    # just a macro for removing empty or empty-string array objects
    try:
        while True:
            obj.remove('')
@ -119,21 +123,35 @@ def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # py
    # b.set_debug_responses(True)
    # PDF URL
    b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf')
-    fm = b.forms()
+    fm = b.forms()[0]
-    fm_i = fm[0]
+    # This must be done because I can't change the options otherwise
-    fm_i.set_all_readonly(False)
+    fm.set_all_readonly(False)
    # Configure form
-    fm_i['creditCardsRadio'] = card_type # VISA
+    fm['creditCardsRadio'] = card_type
-    fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',')
+    fm['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',')
-    # Retrieve file using button click
+    # Retrieve file using button click; the button is 115x21 pixels in size.
-    rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20)))
+    # The API apparently doesn't like the max values
    rq = fm.click(name='submitButton', coord=(random.randint(1, 114), random.randint(1, 20)))
    rq.add_header('Accept', '*/*')
    rp = b.retrieve(rq)
    print(' Done.')
    # Returns an open file-like object with the PDF as contents
    return open(rp[0], 'rb')
 def get_date() -> DTDate:
    # For Sunday and Monday, use Friday's data; Saturday and Sunday are completely null
    if DTDate.today().weekday() in [6, 0]:
        date = DTDate.today() + relativedelta(weekday=FR(-1))
    else:
    # For all other days, the previous day is fine
        date = DTDate.today() - DTTimeDelta(1)
    return date
 def mk_filename(date: DTDate, card_type: List[str]) -> str:
    # List[str] is used because I don't want to make a class for just this
    if card_type == CARD_MASTERCARD:
        fn = date.isoformat() + '_MC.pdf'
    elif card_type == CARD_VISA:
        fn = date.isoformat() + '_VISA.pdf'
    else:
        raise TypeError("not a valid card type")
    return fn