mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
More reliable content loading
Increases the height of the headless browser to 20000 pixels so no scrolling is needed for all content to load. This fixes a problem when some content in the middle of the screen would not be loaded. Also adds additional signal for the page load check that looks for any changes in the page source. This fixes a situation where `notion-scroller` already has some children but not all content has been loaded yet.
This commit is contained in:
parent
46c77076ab
commit
f415e71586
@ -6,6 +6,9 @@ log = logging.getLogger(f"loconotion.{__name__}")
|
|||||||
class notion_page_loaded(object):
|
class notion_page_loaded(object):
|
||||||
"""An expectation for checking that a notion page has loaded."""
|
"""An expectation for checking that a notion page has loaded."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.previous_page_source = ""
|
||||||
|
|
||||||
def __call__(self, driver):
|
def __call__(self, driver):
|
||||||
notion_presence = len(
|
notion_presence = len(
|
||||||
driver.find_elements_by_class_name("notion-presence-container")
|
driver.find_elements_by_class_name("notion-presence-container")
|
||||||
@ -19,18 +22,20 @@ class notion_page_loaded(object):
|
|||||||
children = len(scroller.find_elements_by_tag_name("div"))
|
children = len(scroller.find_elements_by_tag_name("div"))
|
||||||
if children > 0:
|
if children > 0:
|
||||||
scrollers_with_children.append(scroller)
|
scrollers_with_children.append(scroller)
|
||||||
|
source_changed = self.previous_page_source != driver.page_source
|
||||||
|
|
||||||
log.debug(
|
log.debug(
|
||||||
f"Waiting for page content to load"
|
f"Waiting for page content to load"
|
||||||
f" (pending blocks: {unknown_blocks},"
|
f" (pending blocks: {unknown_blocks},"
|
||||||
f" loading spinners: {loading_spinners},"
|
f" loading spinners: {loading_spinners},"
|
||||||
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)})"
|
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)},"
|
||||||
|
f" source changed: {source_changed})"
|
||||||
)
|
)
|
||||||
all_scrollers_loaded = len(scrollers) == len(scrollers_with_children)
|
all_scrollers_loaded = len(scrollers) == len(scrollers_with_children)
|
||||||
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners):
|
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners and not source_changed):
|
||||||
return True
|
return True
|
||||||
else:
|
|
||||||
return False
|
self.previous_page_source = driver.page_source
|
||||||
else:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@ -231,7 +231,7 @@ class Parser:
|
|||||||
chrome_options = Options()
|
chrome_options = Options()
|
||||||
if not self.args.get("non_headless", False):
|
if not self.args.get("non_headless", False):
|
||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("window-size=1920,1080")
|
chrome_options.add_argument("window-size=1920,20000")
|
||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_options.add_argument("--log-level=3")
|
chrome_options.add_argument("--log-level=3")
|
||||||
@ -266,8 +266,6 @@ class Parser:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.scroll_to_the_bottom()
|
|
||||||
|
|
||||||
# open the toggle blocks in the page
|
# open the toggle blocks in the page
|
||||||
self.open_toggle_blocks(self.args["timeout"])
|
self.open_toggle_blocks(self.args["timeout"])
|
||||||
|
|
||||||
@ -314,28 +312,6 @@ class Parser:
|
|||||||
"__console.environment.ThemeStore.setState({ mode: 'dark' });"
|
"__console.environment.ThemeStore.setState({ mode: 'dark' });"
|
||||||
)
|
)
|
||||||
|
|
||||||
def scroll_to_the_bottom(self):
|
|
||||||
# scroll at the bottom of the notion-scroller element to load all elements
|
|
||||||
# continue once there are no changes in height after a timeout
|
|
||||||
# don't do this if the page has a calendar databse on it or it will load forever
|
|
||||||
calendar = self.driver.find_elements_by_class_name("notion-calendar-view")
|
|
||||||
if not calendar:
|
|
||||||
scroller = self.driver.find_element_by_css_selector(
|
|
||||||
".notion-frame > .notion-scroller"
|
|
||||||
)
|
|
||||||
last_height = scroller.get_attribute("scrollHeight")
|
|
||||||
log.debug(f"Scrolling to bottom of notion-scroller (height: {last_height})")
|
|
||||||
while True:
|
|
||||||
self.driver.execute_script(
|
|
||||||
"arguments[0].scrollTo(0, arguments[0].scrollHeight)", scroller
|
|
||||||
)
|
|
||||||
time.sleep(self.args["timeout"])
|
|
||||||
new_height = scroller.get_attribute("scrollHeight")
|
|
||||||
log.debug(f"New notion-scroller height after timeout is: {new_height}")
|
|
||||||
if new_height == last_height:
|
|
||||||
break
|
|
||||||
last_height = new_height
|
|
||||||
|
|
||||||
def open_toggle_blocks(self, timeout: int, exclude=[]):
|
def open_toggle_blocks(self, timeout: int, exclude=[]):
|
||||||
"""Expand all the toggle block in the page to make their content visible
|
"""Expand all the toggle block in the page to make their content visible
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user