From f415e71586b34bad676ca0cdf51e57f88fadce36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martynas=20Mickevi=C4=8Dius?= Date: Tue, 29 Mar 2022 20:12:46 +0300 Subject: [PATCH] More reliable content loading Increases the height of the headless browser to 20000 pixels so no scrolling is needed for all content to load. This fixes a problem when some content in the middle of the screen would not be loaded. Also adds additional signal for the page load check that looks for any changes in the page source. This fixes a situation where `notion-scroller` already has some children but not all content has been loaded yet. --- loconotion/modules/conditions.py | 17 +++++++++++------ loconotion/modules/notionparser.py | 26 +------------------------- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/loconotion/modules/conditions.py b/loconotion/modules/conditions.py index 21dc4a3..f336da1 100644 --- a/loconotion/modules/conditions.py +++ b/loconotion/modules/conditions.py @@ -6,6 +6,9 @@ log = logging.getLogger(f"loconotion.{__name__}") class notion_page_loaded(object): """An expectation for checking that a notion page has loaded.""" + def __init__(self): + self.previous_page_source = "" + def __call__(self, driver): notion_presence = len( driver.find_elements_by_class_name("notion-presence-container") @@ -19,19 +22,21 @@ class notion_page_loaded(object): children = len(scroller.find_elements_by_tag_name("div")) if children > 0: scrollers_with_children.append(scroller) + source_changed = self.previous_page_source != driver.page_source + log.debug( f"Waiting for page content to load" f" (pending blocks: {unknown_blocks}," f" loading spinners: {loading_spinners}," - f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)})" + f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)}," + f" source changed: {source_changed})" ) all_scrollers_loaded = len(scrollers) == len(scrollers_with_children) - if (all_scrollers_loaded and not unknown_blocks and not loading_spinners): + if (all_scrollers_loaded and not unknown_blocks and not loading_spinners and not source_changed): return True - else: - return False - else: - return False + + self.previous_page_source = driver.page_source + return False class toggle_block_has_opened(object): diff --git a/loconotion/modules/notionparser.py b/loconotion/modules/notionparser.py index 2069d13..ee359e0 100644 --- a/loconotion/modules/notionparser.py +++ b/loconotion/modules/notionparser.py @@ -231,7 +231,7 @@ class Parser: chrome_options = Options() if not self.args.get("non_headless", False): chrome_options.add_argument("--headless") - chrome_options.add_argument("window-size=1920,1080") + chrome_options.add_argument("window-size=1920,20000") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--log-level=3") @@ -266,8 +266,6 @@ class Parser: ) return - self.scroll_to_the_bottom() - # open the toggle blocks in the page self.open_toggle_blocks(self.args["timeout"]) @@ -314,28 +312,6 @@ class Parser: "__console.environment.ThemeStore.setState({ mode: 'dark' });" ) - def scroll_to_the_bottom(self): - # scroll at the bottom of the notion-scroller element to load all elements - # continue once there are no changes in height after a timeout - # don't do this if the page has a calendar databse on it or it will load forever - calendar = self.driver.find_elements_by_class_name("notion-calendar-view") - if not calendar: - scroller = self.driver.find_element_by_css_selector( - ".notion-frame > .notion-scroller" - ) - last_height = scroller.get_attribute("scrollHeight") - log.debug(f"Scrolling to bottom of notion-scroller (height: {last_height})") - while True: - self.driver.execute_script( - "arguments[0].scrollTo(0, arguments[0].scrollHeight)", scroller - ) - time.sleep(self.args["timeout"]) - new_height = scroller.get_attribute("scrollHeight") - log.debug(f"New notion-scroller height after timeout is: {new_height}") - if new_height == last_height: - break - last_height = new_height - def open_toggle_blocks(self, timeout: int, exclude=[]): """Expand all the toggle block in the page to make their content visible