more docs, if/elif fixed, more functions in utils
This commit is contained in:
parent
25c7767b0d
commit
81c0e5cf8f
5
crawl.py
5
crawl.py
@ -5,7 +5,6 @@ import argparse
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from datetime import date as DTDate
|
from datetime import date as DTDate
|
||||||
from datetime import datetime as DTDateTime
|
from datetime import datetime as DTDateTime
|
||||||
import appdirs
|
import appdirs
|
||||||
@ -77,7 +76,7 @@ AMOUNT CURRENCY: Convert AMOUNT euros to CURRENCY.
|
|||||||
CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros.
|
CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros.
|
||||||
d | date: Print the date which the data is from.
|
d | date: Print the date which the data is from.
|
||||||
""")
|
""")
|
||||||
if argv[0] in ['q', 'exit', 'quit']:
|
elif argv[0] in ['q', 'exit', 'quit']:
|
||||||
sys.exit()
|
sys.exit()
|
||||||
elif argv[0] in ['date', 'd']:
|
elif argv[0] in ['date', 'd']:
|
||||||
print(res.date)
|
print(res.date)
|
||||||
@ -166,7 +165,7 @@ else:
|
|||||||
filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder'))
|
filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder'))
|
||||||
if not filepath.exists():
|
if not filepath.exists():
|
||||||
filepath.mkdir(parents=True)
|
filepath.mkdir(parents=True)
|
||||||
filename = filepath / (retrieve_date.strftime('%Y%m%d') + '.pdf')
|
filename = filepath / utils.mk_filename(retrieve_date, use_card_type)
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
with open(filename, 'rb') as f:
|
with open(filename, 'rb') as f:
|
||||||
results = utils.get_results_from_pdf(f)
|
results = utils.get_results_from_pdf(f)
|
||||||
|
32
utils.py
32
utils.py
@ -29,6 +29,7 @@ def _parse_rate(text: str) -> float or None:
|
|||||||
if re.match('Keine Kursdaten vorhanden', text):
|
if re.match('Keine Kursdaten vorhanden', text):
|
||||||
_r = None
|
_r = None
|
||||||
else:
|
else:
|
||||||
|
# strip whitespace and format decimal numbers correctly for parsing
|
||||||
text = text.strip(' ').replace(',', '.')
|
text = text.strip(' ').replace(',', '.')
|
||||||
try:
|
try:
|
||||||
_r = float(text)
|
_r = float(text)
|
||||||
@ -37,15 +38,18 @@ def _parse_rate(text: str) -> float or None:
|
|||||||
return _r
|
return _r
|
||||||
|
|
||||||
def _parse_card_type(text: str) -> str:
|
def _parse_card_type(text: str) -> str:
|
||||||
|
# Method for validating metadata from the PDF against the request data
|
||||||
text = text.split(':')[1]
|
text = text.split(':')[1]
|
||||||
text = text.strip('" ')
|
text = text.strip('" ')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def _parse_date(text: str) -> DTDate:
|
def _parse_date(text: str) -> DTDate:
|
||||||
|
# Method for validating metadata from the PDF against the request data
|
||||||
text = text.split(': ')[1].rstrip()
|
text = text.split(': ')[1].rstrip()
|
||||||
return DTDateTime.strptime(text, '%d.%m.%Y').date()
|
return DTDateTime.strptime(text, '%d.%m.%Y').date()
|
||||||
|
|
||||||
def _array_remove_empty(obj: list) -> List[str]:
|
def _array_remove_empty(obj: list) -> List[str]:
|
||||||
|
# just a macro for removing empty or empty-string array objects
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
obj.remove('')
|
obj.remove('')
|
||||||
@ -119,21 +123,35 @@ def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # py
|
|||||||
# b.set_debug_responses(True)
|
# b.set_debug_responses(True)
|
||||||
# PDF URL
|
# PDF URL
|
||||||
b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf')
|
b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf')
|
||||||
fm = b.forms()
|
fm = b.forms()[0]
|
||||||
fm_i = fm[0]
|
# This must be done because I can't change the options otherwise
|
||||||
fm_i.set_all_readonly(False)
|
fm.set_all_readonly(False)
|
||||||
# Configure form
|
# Configure form
|
||||||
fm_i['creditCardsRadio'] = card_type # VISA
|
fm['creditCardsRadio'] = card_type
|
||||||
fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',')
|
fm['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',')
|
||||||
# Retrieve file using button click
|
# Retrieve file using button click; the button is 115x21 pixels in size.
|
||||||
rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20)))
|
# The API apparently doesn't like the max values
|
||||||
|
rq = fm.click(name='submitButton', coord=(random.randint(1, 114), random.randint(1, 20)))
|
||||||
rq.add_header('Accept', '*/*')
|
rq.add_header('Accept', '*/*')
|
||||||
rp = b.retrieve(rq)
|
rp = b.retrieve(rq)
|
||||||
print(' Done.')
|
print(' Done.')
|
||||||
|
# Returns an open file-like object with the PDF as contents
|
||||||
return open(rp[0], 'rb')
|
return open(rp[0], 'rb')
|
||||||
def get_date() -> DTDate:
|
def get_date() -> DTDate:
|
||||||
|
# For Sunday and Monday, use Friday's data; Saturday and Sunday are completely null
|
||||||
if DTDate.today().weekday() in [6, 0]:
|
if DTDate.today().weekday() in [6, 0]:
|
||||||
date = DTDate.today() + relativedelta(weekday=FR(-1))
|
date = DTDate.today() + relativedelta(weekday=FR(-1))
|
||||||
else:
|
else:
|
||||||
|
# For all other days, the previous day is fine
|
||||||
date = DTDate.today() - DTTimeDelta(1)
|
date = DTDate.today() - DTTimeDelta(1)
|
||||||
return date
|
return date
|
||||||
|
|
||||||
|
def mk_filename(date: DTDate, card_type: List[str]) -> str:
|
||||||
|
# List[str] is used because I don't want to make a class for just this
|
||||||
|
if card_type == CARD_MASTERCARD:
|
||||||
|
fn = date.isoformat() + '_MC.pdf'
|
||||||
|
elif card_type == CARD_VISA:
|
||||||
|
fn = date.isoformat() + '_VISA.pdf'
|
||||||
|
else:
|
||||||
|
raise TypeError("not a valid card type")
|
||||||
|
return fn
|
||||||
|
Loading…
Reference in New Issue
Block a user