Added condition to wait on database items to load correctly

By waiting for all notion_scrollers to have children
This commit is contained in:
Leonardo Cavaletti 2020-05-24 19:34:45 +01:00
parent 1bf31f2d32
commit ef7e99fbb3
3 changed files with 28 additions and 38 deletions

View File

@ -6,26 +6,31 @@ log = logging.getLogger(f"loconotion.{__name__}")
class notion_page_loaded(object):
"""An expectation for checking that a notion page has loaded."""
def __init__(self, url):
self.url = url
def __call__(self, driver):
notion_presence = len(
driver.find_elements_by_class_name("notion-presence-container")
)
collection_view_block = len(
driver.find_elements_by_class_name("notion-collection_view_page-block")
)
collection_search = len(driver.find_elements_by_class_name("collectionSearch"))
# embed_ghosts = len(driver.find_elements_by_css_selector("div[embed-ghost]"));
if (notion_presence):
unknown_blocks = len(driver.find_elements_by_class_name("notion-unknown-block"))
loading_spinners = len(driver.find_elements_by_class_name("loading-spinner"))
scrollers = driver.find_elements_by_class_name("notion-scroller")
scrollers_with_children = [];
for scroller in scrollers:
children = len(scroller.find_elements_by_tag_name("div"))
if children > 0:
scrollers_with_children.append(scroller)
log.debug(
f"Waiting for page content to load"
f" (presence container: {notion_presence}, loaders: {loading_spinners} )"
f" (pending blocks: {unknown_blocks},"
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)})"
)
if notion_presence and not loading_spinners:
all_scrollers_loaded = len(scrollers) == len(scrollers_with_children)
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners):
return True
else:
return False
else:
return False
class toggle_block_has_opened(object):
@ -40,13 +45,15 @@ class toggle_block_has_opened(object):
toggle_content = self.toggle_block.find_element_by_css_selector("div:not([style]")
if toggle_content:
content_children = len(toggle_content.find_elements_by_tag_name("div"))
unknown_children = len(toggle_content.find_elements_by_class_name("notion-unknown-block"))
unknown_children = len(
toggle_content.find_elements_by_class_name("notion-unknown-block")
)
is_loading = len(
self.toggle_block.find_elements_by_class_name("loading-spinner")
)
log.debug(
f"Waiting for toggle block to load"
f" ({unknown_children} pending children blocks / {is_loading} loaders)"
f" (pending blocks: {unknown_children}, loaders: {is_loading})"
)
if not unknown_children and not is_loading:
return True

View File

@ -31,7 +31,7 @@ except ModuleNotFoundError as error:
log.critical(f"ModuleNotFoundError: {error}. have your installed the requirements?")
sys.exit()
from conditions import toggle_block_has_opened
from conditions import toggle_block_has_opened, notion_page_loaded
class Parser:
@ -233,20 +233,8 @@ class Parser:
log.debug(f"Using page config: {self.get_page_config(url)}")
self.driver.get(url)
# if "This content does not exist" in self.driver.page_source:
# log.error(
# f"No content found in {url}."
# " Are you sure the page is set to public?"
# )
# return
try:
# WebDriverWait(self.driver, 10).until(notion_page_loaded())
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "notion-presence-container")
)
)
WebDriverWait(self.driver, 10).until(notion_page_loaded())
except TimeoutException as ex:
log.critical(
"Timeout waiting for page content to load, or no content found."
@ -254,10 +242,6 @@ class Parser:
)
return
# cooldown to allow eventual database items to load
# TODO: figure out a way to detect they loaded
time.sleep(2)
# function to expand all the toggle block in the page to make their content visible
# so we can hook up our custom toggle logic afterwards
def open_toggle_blocks(exclude=[]):
@ -288,10 +272,9 @@ class Parser:
" Likely it's already open, but doesn't hurt to check."
)
except Exception as exception:
log.error(
f"Error trying to open a toggle block: {exception}"
)
log.error(f"Error trying to open a toggle block: {exception}")
opened_toggles.append(toggle_block)
# after all toggles have been opened, check the page again to see if
# any toggle block had nested toggle blocks inside them
new_toggle_blocks = self.driver.find_elements_by_class_name(