more docs, if/elif fixed, more functions in utils
This commit is contained in:
		
							
								
								
									
										5
									
								
								crawl.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								crawl.py
									
									
									
									
									
								
							| @@ -5,7 +5,6 @@ import argparse | |||||||
| import sys | import sys | ||||||
| import os | import os | ||||||
| import pathlib | import pathlib | ||||||
|  |  | ||||||
| from datetime import date as DTDate | from datetime import date as DTDate | ||||||
| from datetime import datetime as DTDateTime | from datetime import datetime as DTDateTime | ||||||
| import appdirs | import appdirs | ||||||
| @@ -77,7 +76,7 @@ AMOUNT CURRENCY: Convert AMOUNT euros to CURRENCY. | |||||||
| CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros. | CURRENCY AMOUNT: Convert AMOUNT CURRENCY to euros. | ||||||
| d | date: Print the date which the data is from. | d | date: Print the date which the data is from. | ||||||
|             """) |             """) | ||||||
|         if argv[0] in ['q', 'exit', 'quit']: |         elif argv[0] in ['q', 'exit', 'quit']: | ||||||
|             sys.exit() |             sys.exit() | ||||||
|         elif argv[0] in ['date', 'd']: |         elif argv[0] in ['date', 'd']: | ||||||
|             print(res.date) |             print(res.date) | ||||||
| @@ -166,7 +165,7 @@ else: | |||||||
|     filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder')) |     filepath = pathlib.Path(appdirs.user_cache_dir('FirstDataCrawler', 'iwonder')) | ||||||
| if not filepath.exists(): | if not filepath.exists(): | ||||||
|     filepath.mkdir(parents=True) |     filepath.mkdir(parents=True) | ||||||
| filename = filepath / (retrieve_date.strftime('%Y%m%d') + '.pdf') | filename = filepath / utils.mk_filename(retrieve_date, use_card_type) | ||||||
| if os.path.exists(filename): | if os.path.exists(filename): | ||||||
|     with open(filename, 'rb') as f: |     with open(filename, 'rb') as f: | ||||||
|         results = utils.get_results_from_pdf(f) |         results = utils.get_results_from_pdf(f) | ||||||
|   | |||||||
							
								
								
									
										32
									
								
								utils.py
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								utils.py
									
									
									
									
									
								
							| @@ -29,6 +29,7 @@ def _parse_rate(text: str) -> float or None: | |||||||
|     if re.match('Keine Kursdaten vorhanden', text): |     if re.match('Keine Kursdaten vorhanden', text): | ||||||
|         _r = None |         _r = None | ||||||
|     else: |     else: | ||||||
|  |         # strip whitespace and format decimal numbers correctly for parsing | ||||||
|         text = text.strip(' ').replace(',', '.') |         text = text.strip(' ').replace(',', '.') | ||||||
|         try: |         try: | ||||||
|             _r = float(text) |             _r = float(text) | ||||||
| @@ -37,15 +38,18 @@ def _parse_rate(text: str) -> float or None: | |||||||
|     return _r |     return _r | ||||||
|  |  | ||||||
| def _parse_card_type(text: str) -> str: | def _parse_card_type(text: str) -> str: | ||||||
|  |     # Method for validating metadata from the PDF against the request data | ||||||
|     text = text.split(':')[1] |     text = text.split(':')[1] | ||||||
|     text = text.strip('" ') |     text = text.strip('" ') | ||||||
|     return text |     return text | ||||||
|  |  | ||||||
| def _parse_date(text: str) -> DTDate: | def _parse_date(text: str) -> DTDate: | ||||||
|  |     # Method for validating metadata from the PDF against the request data | ||||||
|     text = text.split(': ')[1].rstrip() |     text = text.split(': ')[1].rstrip() | ||||||
|     return DTDateTime.strptime(text, '%d.%m.%Y').date() |     return DTDateTime.strptime(text, '%d.%m.%Y').date() | ||||||
|  |  | ||||||
| def _array_remove_empty(obj: list) -> List[str]: | def _array_remove_empty(obj: list) -> List[str]: | ||||||
|  |     # just a macro for removing empty or empty-string array objects | ||||||
|     try: |     try: | ||||||
|         while True: |         while True: | ||||||
|             obj.remove('') |             obj.remove('') | ||||||
| @@ -119,21 +123,35 @@ def get_fileio(date: DTDate, card_type: List[str] = CARD_VISA) -> BinaryIO: # py | |||||||
|     # b.set_debug_responses(True) |     # b.set_debug_responses(True) | ||||||
|     # PDF URL |     # PDF URL | ||||||
|     b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') |     b.open('https://misc.firstdata.eu/CurrencyCalculator/fremdwaehrungskurse/pdf') | ||||||
|     fm = b.forms() |     fm = b.forms()[0] | ||||||
|     fm_i = fm[0] |     # This must be done because I can't change the options otherwise | ||||||
|     fm_i.set_all_readonly(False) |     fm.set_all_readonly(False) | ||||||
|     # Configure form |     # Configure form | ||||||
|     fm_i['creditCardsRadio'] = card_type # VISA |     fm['creditCardsRadio'] = card_type | ||||||
|     fm_i['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') |     fm['selectedDatesString'] = str(date.strftime('%Y%m%d') + ',') | ||||||
|     # Retrieve file using button click |     # Retrieve file using button click; the button is 115x21 pixels in size. | ||||||
|     rq = fm_i.click(name='submitButton', coord=(random.randint(1, 119), random.randint(1, 20))) |     # The API apparently doesn't like the max values | ||||||
|  |     rq = fm.click(name='submitButton', coord=(random.randint(1, 114), random.randint(1, 20))) | ||||||
|     rq.add_header('Accept', '*/*') |     rq.add_header('Accept', '*/*') | ||||||
|     rp = b.retrieve(rq) |     rp = b.retrieve(rq) | ||||||
|     print(' Done.') |     print(' Done.') | ||||||
|  |     # Returns an open file-like object with the PDF as contents | ||||||
|     return open(rp[0], 'rb') |     return open(rp[0], 'rb') | ||||||
| def get_date() -> DTDate: | def get_date() -> DTDate: | ||||||
|  |     # For Sunday and Monday, use Friday's data; Saturday and Sunday are completely null | ||||||
|     if DTDate.today().weekday() in [6, 0]: |     if DTDate.today().weekday() in [6, 0]: | ||||||
|         date = DTDate.today() + relativedelta(weekday=FR(-1)) |         date = DTDate.today() + relativedelta(weekday=FR(-1)) | ||||||
|     else: |     else: | ||||||
|  |     # For all other days, the previous day is fine | ||||||
|         date = DTDate.today() - DTTimeDelta(1) |         date = DTDate.today() - DTTimeDelta(1) | ||||||
|     return date |     return date | ||||||
|  |  | ||||||
|  | def mk_filename(date: DTDate, card_type: List[str]) -> str: | ||||||
|  |     # List[str] is used because I don't want to make a class for just this | ||||||
|  |     if card_type == CARD_MASTERCARD: | ||||||
|  |         fn = date.isoformat() + '_MC.pdf' | ||||||
|  |     elif card_type == CARD_VISA: | ||||||
|  |         fn = date.isoformat() + '_VISA.pdf' | ||||||
|  |     else: | ||||||
|  |         raise TypeError("not a valid card type") | ||||||
|  |     return fn | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user