mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Merge pull request #99 from 2m/fix/content-load-2m
More reliable content loading
This commit is contained in:
commit
093df46b2e
@ -6,6 +6,9 @@ log = logging.getLogger(f"loconotion.{__name__}")
|
||||
class notion_page_loaded(object):
|
||||
"""An expectation for checking that a notion page has loaded."""
|
||||
|
||||
def __init__(self):
|
||||
self.previous_page_source = ""
|
||||
|
||||
def __call__(self, driver):
|
||||
notion_presence = len(
|
||||
driver.find_elements_by_class_name("notion-presence-container")
|
||||
@ -19,18 +22,20 @@ class notion_page_loaded(object):
|
||||
children = len(scroller.find_elements_by_tag_name("div"))
|
||||
if children > 0:
|
||||
scrollers_with_children.append(scroller)
|
||||
source_changed = self.previous_page_source != driver.page_source
|
||||
|
||||
log.debug(
|
||||
f"Waiting for page content to load"
|
||||
f" (pending blocks: {unknown_blocks},"
|
||||
f" loading spinners: {loading_spinners},"
|
||||
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)})"
|
||||
f" loaded scrollers: {len(scrollers_with_children)} / {len(scrollers)},"
|
||||
f" source changed: {source_changed})"
|
||||
)
|
||||
all_scrollers_loaded = len(scrollers) == len(scrollers_with_children)
|
||||
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners):
|
||||
if (all_scrollers_loaded and not unknown_blocks and not loading_spinners and not source_changed):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
|
||||
self.previous_page_source = driver.page_source
|
||||
return False
|
||||
|
||||
|
||||
|
@ -232,7 +232,7 @@ class Parser:
|
||||
chrome_options = Options()
|
||||
if not self.args.get("non_headless", False):
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("window-size=1920,1080")
|
||||
chrome_options.add_argument("window-size=1920,20000")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
chrome_options.add_argument("--log-level=3")
|
||||
@ -267,8 +267,6 @@ class Parser:
|
||||
)
|
||||
raise ex
|
||||
|
||||
self.scroll_to_the_bottom()
|
||||
|
||||
# open the toggle blocks in the page
|
||||
self.open_toggle_blocks(self.args["timeout"])
|
||||
|
||||
@ -315,28 +313,6 @@ class Parser:
|
||||
"__console.environment.ThemeStore.setState({ mode: 'dark' });"
|
||||
)
|
||||
|
||||
def scroll_to_the_bottom(self):
|
||||
# scroll at the bottom of the notion-scroller element to load all elements
|
||||
# continue once there are no changes in height after a timeout
|
||||
# don't do this if the page has a calendar databse on it or it will load forever
|
||||
calendar = self.driver.find_elements_by_class_name("notion-calendar-view")
|
||||
if not calendar:
|
||||
scroller = self.driver.find_element_by_css_selector(
|
||||
".notion-frame > .notion-scroller"
|
||||
)
|
||||
last_height = scroller.get_attribute("scrollHeight")
|
||||
log.debug(f"Scrolling to bottom of notion-scroller (height: {last_height})")
|
||||
while True:
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollTo(0, arguments[0].scrollHeight)", scroller
|
||||
)
|
||||
time.sleep(self.args["timeout"])
|
||||
new_height = scroller.get_attribute("scrollHeight")
|
||||
log.debug(f"New notion-scroller height after timeout is: {new_height}")
|
||||
if new_height == last_height:
|
||||
break
|
||||
last_height = new_height
|
||||
|
||||
def open_toggle_blocks(self, timeout: int, exclude=[]):
|
||||
"""Expand all the toggle block in the page to make their content visible
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user