i don't even know, i'm drunk
This commit is contained in:
parent
faab574f82
commit
25c7767b0d
1
crawl.py
1
crawl.py
@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# pylint: disable=missing-docstring,invalid-name
|
# pylint: disable=missing-docstring,invalid-name
|
||||||
# pylint: disable=no-member # mechanize.Browser has some lazy-loading methods that pylint doesn't see
|
|
||||||
# import logging
|
# import logging
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
25
utils.py
25
utils.py
@ -1,17 +1,17 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# pylint: disable=missing-docstring,invalid-name
|
# pylint: disable=missing-docstring,invalid-name
|
||||||
|
|
||||||
#import argparse
|
import random
|
||||||
from collections import namedtuple
|
|
||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
from typing import List, BinaryIO
|
from collections import namedtuple
|
||||||
from datetime import date as DTDate
|
from datetime import date as DTDate
|
||||||
from datetime import timedelta as DTTimeDelta
|
from datetime import timedelta as DTTimeDelta
|
||||||
|
from datetime import datetime as DTDateTime
|
||||||
|
from typing import BinaryIO, List
|
||||||
|
|
||||||
import PyPDF3
|
|
||||||
import mechanize as m
|
import mechanize as m
|
||||||
import random
|
import PyPDF3
|
||||||
|
from dateutil.relativedelta import FR, relativedelta
|
||||||
|
|
||||||
Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid'])
|
Rate = namedtuple('Rate', ['abbr', 'full_name', 'ask', 'bid'])
|
||||||
|
|
||||||
@ -25,11 +25,6 @@ class CurrencyResult():
|
|||||||
self.card_type = str()
|
self.card_type = str()
|
||||||
self.date = None
|
self.date = None
|
||||||
|
|
||||||
def _select_date():
|
|
||||||
date = datetime.date.today()-datetime.timedelta(1)
|
|
||||||
if datetime.date().isoweekday() in [6, 0]:
|
|
||||||
date = datetime.date.today()
|
|
||||||
|
|
||||||
def _parse_rate(text: str) -> float or None:
|
def _parse_rate(text: str) -> float or None:
|
||||||
if re.match('Keine Kursdaten vorhanden', text):
|
if re.match('Keine Kursdaten vorhanden', text):
|
||||||
_r = None
|
_r = None
|
||||||
@ -46,9 +41,9 @@ def _parse_card_type(text: str) -> str:
|
|||||||
text = text.strip('" ')
|
text = text.strip('" ')
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def _parse_date(text: str) -> datetime.date:
|
def _parse_date(text: str) -> DTDate:
|
||||||
text = text.split(': ')[1].rstrip()
|
text = text.split(': ')[1].rstrip()
|
||||||
return datetime.datetime.strptime(text, '%d.%m.%Y').date()
|
return DTDateTime.strptime(text, '%d.%m.%Y').date()
|
||||||
|
|
||||||
def _array_remove_empty(obj: list) -> List[str]:
|
def _array_remove_empty(obj: list) -> List[str]:
|
||||||
try:
|
try:
|
||||||
@ -99,6 +94,7 @@ def get_results_from_text(text: str, currency: str = None) -> CurrencyResult:
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult:
|
def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> CurrencyResult:
|
||||||
|
print('Parsing data... ', end='')
|
||||||
reader = PyPDF3.PdfFileReader(buf)
|
reader = PyPDF3.PdfFileReader(buf)
|
||||||
text = str()
|
text = str()
|
||||||
pages = []
|
pages = []
|
||||||
@ -106,8 +102,10 @@ def get_results_from_pdf(buf: BinaryIO or str, currency: str = None) -> Currency
|
|||||||
pages.append(reader.getPage(num))
|
pages.append(reader.getPage(num))
|
||||||
for page in pages:
|
for page in pages:
|
||||||
text += page.extractText()
|
text += page.extractText()
|
||||||
|
print('Done.')
|
||||||
return get_results_from_text(text, currency=currency)
|
return get_results_from_text(text, currency=currency)
|
||||||
def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # pylint: disable=dangerous-default-value
|
def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # pylint: disable=dangerous-default-value
|
||||||
|
# pylint: disable=no-member # mechanize.Browser has some lazy-loading methods that pylint doesn't see
|
||||||
print('Downloading rates for ' + date.strftime('%Y-%m-%d') + '... ', end='')
|
print('Downloading rates for ' + date.strftime('%Y-%m-%d') + '... ', end='')
|
||||||
b = m.Browser()
|
b = m.Browser()
|
||||||
# Firefox 64 User-Agent
|
# Firefox 64 User-Agent
|
||||||
@ -139,4 +137,3 @@ def get_date() -> DTDate:
|
|||||||
else:
|
else:
|
||||||
date = DTDate.today() - DTTimeDelta(1)
|
date = DTDate.today() - DTTimeDelta(1)
|
||||||
return date
|
return date
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user