Added condition to wait on database items to load correctly

By waiting for all notion_scrollers to have children
This commit is contained in:
Leonardo Cavaletti 2020-05-24 19:34:45 +01:00
parent 1bf31f2d32
commit ef7e99fbb3
3 changed files with 28 additions and 38 deletions

View File

@ -6,26 +6,31 @@ log = logging.getLogger(f"loconotion.{__name__}")
class notion_page_loaded(object): class notion_page_loaded(object):
"""An expectation for checking that a notion page has loaded.""" """An expectation for checking that a notion page has loaded."""
def __init__(self, url):
self.url = url
def __call__(self, driver): def __call__(self, driver):
notion_presence = len( notion_presence = len(
driver.find_elements_by_class_name("notion-presence-container") driver.find_elements_by_class_name("notion-presence-container")
) )
collection_view_block = len( if (notion_presence):
driver.find_elements_by_class_name("notion-collection_view_page-block") unknown_blocks = len(driver.find_elements_by_class_name("notion-unknown-block"))
) loading_spinners = len(driver.find_elements_by_class_name("loading-spinner"))
collection_search = len(driver.find_elements_by_class_name("collectionSearch")) scrollers = driver.find_elements_by_class_name("notion-scroller")
# embed_ghosts = len(driver.find_elements_by_css_selector("div[embed-ghost]")); scrollers_with_children = [];
for scroller in scrollers:
children = len(scroller.find_elements_by_tag_name("div"))
if children > 0:
scrollers_with_children.append(scroller)
log.debug( log.debug(
f"Waiting for page content to load" f"Waiting for page content to load"
f" (presence container: {notion_presence}, loaders: {loading_spinners} )" f" (pending blocks: {unknown_blocks},"
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)})"
) )
if notion_presence and not loading_spinners: all_scrollers_loaded = len(scrollers) == len(scrollers_with_children)
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners):
return True return True
else: else:
return False return False
else:
return False
class toggle_block_has_opened(object): class toggle_block_has_opened(object):
@ -40,13 +45,15 @@ class toggle_block_has_opened(object):
toggle_content = self.toggle_block.find_element_by_css_selector("div:not([style]") toggle_content = self.toggle_block.find_element_by_css_selector("div:not([style]")
if toggle_content: if toggle_content:
content_children = len(toggle_content.find_elements_by_tag_name("div")) content_children = len(toggle_content.find_elements_by_tag_name("div"))
unknown_children = len(toggle_content.find_elements_by_class_name("notion-unknown-block")) unknown_children = len(
toggle_content.find_elements_by_class_name("notion-unknown-block")
)
is_loading = len( is_loading = len(
self.toggle_block.find_elements_by_class_name("loading-spinner") self.toggle_block.find_elements_by_class_name("loading-spinner")
) )
log.debug( log.debug(
f"Waiting for toggle block to load" f"Waiting for toggle block to load"
f" ({unknown_children} pending children blocks / {is_loading} loaders)" f" (pending blocks: {unknown_children}, loaders: {is_loading})"
) )
if not unknown_children and not is_loading: if not unknown_children and not is_loading:
return True return True

View File

@ -31,7 +31,7 @@ except ModuleNotFoundError as error:
log.critical(f"ModuleNotFoundError: {error}. have your installed the requirements?") log.critical(f"ModuleNotFoundError: {error}. have your installed the requirements?")
sys.exit() sys.exit()
from conditions import toggle_block_has_opened from conditions import toggle_block_has_opened, notion_page_loaded
class Parser: class Parser:
@ -233,20 +233,8 @@ class Parser:
log.debug(f"Using page config: {self.get_page_config(url)}") log.debug(f"Using page config: {self.get_page_config(url)}")
self.driver.get(url) self.driver.get(url)
# if "This content does not exist" in self.driver.page_source:
# log.error(
# f"No content found in {url}."
# " Are you sure the page is set to public?"
# )
# return
try: try:
# WebDriverWait(self.driver, 10).until(notion_page_loaded()) WebDriverWait(self.driver, 10).until(notion_page_loaded())
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "notion-presence-container")
)
)
except TimeoutException as ex: except TimeoutException as ex:
log.critical( log.critical(
"Timeout waiting for page content to load, or no content found." "Timeout waiting for page content to load, or no content found."
@ -254,10 +242,6 @@ class Parser:
) )
return return
# cooldown to allow eventual database items to load
# TODO: figure out a way to detect they loaded
time.sleep(2)
# function to expand all the toggle block in the page to make their content visible # function to expand all the toggle block in the page to make their content visible
# so we can hook up our custom toggle logic afterwards # so we can hook up our custom toggle logic afterwards
def open_toggle_blocks(exclude=[]): def open_toggle_blocks(exclude=[]):
@ -288,10 +272,9 @@ class Parser:
" Likely it's already open, but doesn't hurt to check." " Likely it's already open, but doesn't hurt to check."
) )
except Exception as exception: except Exception as exception:
log.error( log.error(f"Error trying to open a toggle block: {exception}")
f"Error trying to open a toggle block: {exception}"
)
opened_toggles.append(toggle_block) opened_toggles.append(toggle_block)
# after all toggles have been opened, check the page again to see if # after all toggles have been opened, check the page again to see if
# any toggle block had nested toggle blocks inside them # any toggle block had nested toggle blocks inside them
new_toggle_blocks = self.driver.find_elements_by_class_name( new_toggle_blocks = self.driver.find_elements_by_class_name(