Adds CLI argument to allow remaining downloads to continue after receiving a --timeout error

This commit is contained in:
Matthew Nielsen 2023-06-26 17:29:11 -06:00
parent eb86257da0
commit b5b168a140
3 changed files with 18 additions and 4 deletions

View File

@ -219,6 +219,9 @@ On top of this, the script can take these optional arguments:
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
dynamic elements (default 5). If content from the page
seems to be missing, try increasing this value
-C, --continue-after-timeout
Continue to download remaining pages or elements after
a --timeout error occurs
--clean Delete all previously cached files for the site before
generating it
--clean-css Delete previously cached .css files for the site

View File

@ -50,6 +50,11 @@ def get_args():
help="Time in seconds to wait for the loading of lazy-loaded dynamic elements (default 5)."
" If content from the page seems to be missing, try increasing this value",
)
argparser.add_argument(
"-C", "--continue-after-timeout",
action="store_true",
help="Continue to download remaining pages or elements after a --timeout error occurs",
)
argparser.add_argument(
"--clean",
action="store_true",

View File

@ -265,7 +265,13 @@ class Parser:
"Timeout waiting for page content to load, or no content found."
" Are you sure the page is set to public?"
)
raise ex
if self.args.get("continue_after_timeout", True):
log.critical(
"--continue-after-timeout is set, continuing to download remaining elements."
)
else:
raise ex
# open the toggle blocks in the page
self.open_toggle_blocks(self.args["timeout"])
@ -360,7 +366,7 @@ class Parser:
if len(new_toggle_blocks) > len(toggle_blocks):
# if so, run the function again
self.open_toggle_blocks(timeout, opened_toggles)
def _get_title_toggle_blocks(self):
"""Find toggle title blocks via their button element.
"""
@ -375,7 +381,7 @@ class Parser:
if len(toggle_buttons) > 0:
title_toggle_blocks.append(block)
return title_toggle_blocks
def clean_up(self, soup):
# remove scripts and other tags we don't want / need
for unwanted in soup.findAll("script"):
@ -552,7 +558,7 @@ class Parser:
for block in title_blocks:
if block.select_one("div[role=button]") is not None:
title_toggle_blocks.append(block)
return title_toggle_blocks
return title_toggle_blocks
def process_table_views(self, soup):
# if there are any table views in the page, add links to the title rows