mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Move open_toggle_blocks
out of parse_page
Makes `parse_page` more readable
This commit is contained in:
parent
e4bb04dbf6
commit
a298edb060
@ -299,50 +299,8 @@ class Parser:
|
|||||||
break
|
break
|
||||||
last_height = new_height
|
last_height = new_height
|
||||||
|
|
||||||
# function to expand all the toggle block in the page to make their content visible
|
|
||||||
# so we can hook up our custom toggle logic afterwards
|
|
||||||
def open_toggle_blocks(timeout, exclude=[]):
|
|
||||||
opened_toggles = exclude
|
|
||||||
toggle_blocks = self.driver.find_elements_by_class_name("notion-toggle-block")
|
|
||||||
log.debug(f"Opening {len(toggle_blocks)} new toggle blocks in the page")
|
|
||||||
for toggle_block in toggle_blocks:
|
|
||||||
if not toggle_block in opened_toggles:
|
|
||||||
toggle_button = toggle_block.find_element_by_css_selector(
|
|
||||||
"div[role=button]"
|
|
||||||
)
|
|
||||||
# check if the toggle is already open by the direction of its arrow
|
|
||||||
is_toggled = "(180deg)" in (
|
|
||||||
toggle_button.find_element_by_tag_name("svg").get_attribute(
|
|
||||||
"style"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if not is_toggled:
|
|
||||||
# click on it, then wait until all elements are displayed
|
|
||||||
self.driver.execute_script("arguments[0].click();", toggle_button)
|
|
||||||
try:
|
|
||||||
WebDriverWait(self.driver, timeout).until(
|
|
||||||
toggle_block_has_opened(toggle_block)
|
|
||||||
)
|
|
||||||
except TimeoutException as ex:
|
|
||||||
log.warning(
|
|
||||||
"Timeout waiting for toggle block to open."
|
|
||||||
" Likely it's already open, but doesn't hurt to check."
|
|
||||||
)
|
|
||||||
except Exception as exception:
|
|
||||||
log.error(f"Error trying to open a toggle block: {exception}")
|
|
||||||
opened_toggles.append(toggle_block)
|
|
||||||
|
|
||||||
# after all toggles have been opened, check the page again to see if
|
|
||||||
# any toggle block had nested toggle blocks inside them
|
|
||||||
new_toggle_blocks = self.driver.find_elements_by_class_name(
|
|
||||||
"notion-toggle-block"
|
|
||||||
)
|
|
||||||
if len(new_toggle_blocks) > len(toggle_blocks):
|
|
||||||
# if so, run the function again
|
|
||||||
open_toggle_blocks(timeout, opened_toggles)
|
|
||||||
|
|
||||||
# open the toggle blocks in the page
|
# open the toggle blocks in the page
|
||||||
open_toggle_blocks(self.args["timeout"])
|
self.open_toggle_blocks(self.args["timeout"])
|
||||||
|
|
||||||
# creates soup from the page to start parsing
|
# creates soup from the page to start parsing
|
||||||
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||||
@ -656,6 +614,52 @@ class Parser:
|
|||||||
if not sub_page in self.processed_pages.keys():
|
if not sub_page in self.processed_pages.keys():
|
||||||
self.parse_page(sub_page, index=index)
|
self.parse_page(sub_page, index=index)
|
||||||
|
|
||||||
|
def open_toggle_blocks(self, timeout: int, exclude=[]):
|
||||||
|
"""Expand all the toggle block in the page to make their content visible
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout (int): timeout in seconds
|
||||||
|
exclude (list[Webelement], optional): toggles to exclude. Defaults to [].
|
||||||
|
|
||||||
|
Opening toggles is needed for hooking up our custom toggle logic afterwards.
|
||||||
|
"""
|
||||||
|
opened_toggles = exclude
|
||||||
|
toggle_blocks = self.driver.find_elements_by_class_name("notion-toggle-block")
|
||||||
|
log.debug(f"Opening {len(toggle_blocks)} new toggle blocks in the page")
|
||||||
|
for toggle_block in toggle_blocks:
|
||||||
|
if toggle_block not in opened_toggles:
|
||||||
|
toggle_button = toggle_block.find_element_by_css_selector(
|
||||||
|
"div[role=button]"
|
||||||
|
)
|
||||||
|
# check if the toggle is already open by the direction of its arrow
|
||||||
|
is_toggled = "(180deg)" in (
|
||||||
|
toggle_button.find_element_by_tag_name("svg").get_attribute("style")
|
||||||
|
)
|
||||||
|
if not is_toggled:
|
||||||
|
# click on it, then wait until all elements are displayed
|
||||||
|
self.driver.execute_script("arguments[0].click();", toggle_button)
|
||||||
|
try:
|
||||||
|
WebDriverWait(self.driver, timeout).until(
|
||||||
|
toggle_block_has_opened(toggle_block)
|
||||||
|
)
|
||||||
|
except TimeoutException as ex:
|
||||||
|
log.warning(
|
||||||
|
"Timeout waiting for toggle block to open."
|
||||||
|
" Likely it's already open, but doesn't hurt to check."
|
||||||
|
)
|
||||||
|
except Exception as exception:
|
||||||
|
log.error(f"Error trying to open a toggle block: {exception}")
|
||||||
|
opened_toggles.append(toggle_block)
|
||||||
|
|
||||||
|
# after all toggles have been opened, check the page again to see if
|
||||||
|
# any toggle block had nested toggle blocks inside them
|
||||||
|
new_toggle_blocks = self.driver.find_elements_by_class_name(
|
||||||
|
"notion-toggle-block"
|
||||||
|
)
|
||||||
|
if len(new_toggle_blocks) > len(toggle_blocks):
|
||||||
|
# if so, run the function again
|
||||||
|
self.open_toggle_blocks(timeout, opened_toggles)
|
||||||
|
|
||||||
def load(self, url):
|
def load(self, url):
|
||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||||
|
Loading…
Reference in New Issue
Block a user