mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Added support for lazy loaded page elements, and --timeout argument
This commit is contained in:
@ -84,7 +84,7 @@ Here's what a full .toml configuration would look like, alongside with explanati
|
||||
name = "Notion Test Site"
|
||||
|
||||
# the notion.so page to being parsing from. This page will become the index.html
|
||||
# of the generated site, and loconotation will parse all sub-pages present on the page
|
||||
# of the generated site, and loconotion will parse all sub-pages present on the page
|
||||
page = "https://www.notion.so/Loconotion-Example-Page-03c403f4fdc94cc1b315b9469a8950ef"
|
||||
|
||||
## Global Site Settings ##
|
||||
@ -185,6 +185,9 @@ On top of this, the script can take these optional arguments:
|
||||
Use a specific chromedriver executable instead of the
|
||||
auto-installing one
|
||||
--single-page Only parse the first page, then stop
|
||||
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
|
||||
dynamic elements (default 5). If content from the page
|
||||
seems to be missing, try increasing this value
|
||||
--clean Delete all previously cached files for the site before
|
||||
generating it
|
||||
--clean-css Delete previously cached .css files for the site
|
||||
@ -210,6 +213,6 @@ On top of this, the script can take these optional arguments:
|
||||
|
||||
If you used Loconotion to build a cool site and want it added to the list above, shoot me a mail or submit a pull request!
|
||||
|
||||
## Support 
|
||||
## Support [](https://www.buymeacoffee.com/leoncvlt)
|
||||
|
||||
If you found this useful, consider [buying me a coffee](https://www.buymeacoffee.com/leoncvlt) so I get a a nice dose of methilxanthine, and you get a nice dose of karma.
|
||||
|
@ -34,6 +34,12 @@ def main():
|
||||
argparser.add_argument(
|
||||
"--single-page", action="store_true", help="Only parse the first page, then stop"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--timeout",
|
||||
default=5,
|
||||
help="Time in seconds to wait for the loading of lazy-loaded dynamic elements (default 5)."
|
||||
" If content from the page seems to be missing, try increasing this value",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--clean",
|
||||
action="store_true",
|
||||
|
@ -257,9 +257,30 @@ class Parser:
|
||||
)
|
||||
return
|
||||
|
||||
# scroll at the bottom of the notion-scroller element to load all elements
|
||||
# continue once there are no changes in height after a timeout
|
||||
# don't do this if the page has a calendar databse on it or it will load forever
|
||||
calendar = self.driver.find_elements_by_class_name("notion-calendar-view")
|
||||
if not calendar:
|
||||
scroller = self.driver.find_element_by_css_selector(
|
||||
".notion-frame > .notion-scroller"
|
||||
)
|
||||
last_height = scroller.get_attribute("scrollHeight")
|
||||
log.debug(f"Scrolling to bottom of notion-scroller (height: {last_height})")
|
||||
while True:
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollTo(0, arguments[0].scrollHeight)", scroller
|
||||
)
|
||||
time.sleep(self.args["timeout"])
|
||||
new_height = scroller.get_attribute("scrollHeight")
|
||||
log.debug(f"New notion-scroller height after timeout is: {new_height}")
|
||||
if new_height == last_height:
|
||||
break
|
||||
last_height = new_height
|
||||
|
||||
# function to expand all the toggle block in the page to make their content visible
|
||||
# so we can hook up our custom toggle logic afterwards
|
||||
def open_toggle_blocks(exclude=[]):
|
||||
def open_toggle_blocks(timeout, exclude=[]):
|
||||
opened_toggles = exclude
|
||||
toggle_blocks = self.driver.find_elements_by_class_name("notion-toggle-block")
|
||||
log.debug(f"Opening {len(toggle_blocks)} new toggle blocks in the page")
|
||||
@ -278,7 +299,7 @@ class Parser:
|
||||
# click on it, then wait until all elements are displayed
|
||||
toggle_button.click()
|
||||
try:
|
||||
WebDriverWait(self.driver, 10).until(
|
||||
WebDriverWait(self.driver, timeout).until(
|
||||
toggle_block_has_opened(toggle_block)
|
||||
)
|
||||
except TimeoutException as ex:
|
||||
@ -297,10 +318,10 @@ class Parser:
|
||||
)
|
||||
if len(new_toggle_blocks) > len(toggle_blocks):
|
||||
# if so, run the function again
|
||||
open_toggle_blocks(opened_toggles)
|
||||
open_toggle_blocks(timeout, opened_toggles)
|
||||
|
||||
# open the toggle blocks in the page
|
||||
open_toggle_blocks()
|
||||
open_toggle_blocks(self.args["timeout"])
|
||||
|
||||
# creates soup from the page to start parsing
|
||||
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||
|
Reference in New Issue
Block a user