Added support for dark-themed notion pages (--dark-theme option)

This commit is contained in:
Nijat Ismayilzada 2020-11-20 22:56:57 +00:00
parent 58e776b423
commit dd2a4028f5
4 changed files with 29 additions and 13 deletions

5
.gitignore vendored
View File

@ -111,4 +111,7 @@ dmypy.json
.env
.vscode
temp
logs
logs
.idea/
venv/

View File

@ -198,6 +198,7 @@ On top of this, the script can take these optional arguments:
Use a specific chromedriver executable instead of the
auto-installing one
--single-page Only parse the first page, then stop
--dark-theme Use dark themed version of the target Notion.so page
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
dynamic elements (default 5). If content from the page
seems to be missing, try increasing this value

View File

@ -34,6 +34,11 @@ def main():
argparser.add_argument(
"--single-page", action="store_true", help="Only parse the first page, then stop"
)
argparser.add_argument(
"--dark-theme",
action="store_true",
help="Use dark themed version of the target Notion.so page",
)
argparser.add_argument(
"--timeout",
type=int,

View File

@ -116,7 +116,7 @@ class Parser:
return {**site_config, **matching_page_config}
else:
log.error(
f"Matching page configuration for {url} was not a dict:"
f"Matching page configuration for {token} was not a dict:"
f" {matching_page_config} - something went wrong"
)
return site_config
@ -244,16 +244,19 @@ class Parser:
)
def parse_page(self, url, processed_pages={}, index=None):
# if this is the first page being parse, set it as the index.html
if not index:
index = url
log.info(f"Parsing page '{url}'")
log.debug(f"Using page config: {self.get_page_config(url)}")
self.driver.get(url)
try:
WebDriverWait(self.driver, 60).until(notion_page_loaded())
self.load(url)
if not index:
# if this is the first page being parse, set it as the index.html
index = url
# if dark theme is enabled, set local storage item and re-load the page
if self.args.get("dark_theme", True):
log.debug(f"Dark theme is enabled")
self.driver.execute_script("window.localStorage.setItem('theme','{\"mode\":\"dark\"}');")
self.load(url)
except TimeoutException as ex:
log.critical(
"Timeout waiting for page content to load, or no content found."
@ -402,8 +405,8 @@ class Parser:
style = cssutils.parseStyle(img["style"])
spritesheet = style["background"]
spritesheet_url = spritesheet[
spritesheet.find("(") + 1 : spritesheet.find(")")
]
spritesheet.find("(") + 1: spritesheet.find(")")
]
cached_spritesheet_url = self.cache_file(
"https://www.notion.so" + spritesheet_url
)
@ -458,7 +461,7 @@ class Parser:
# the link to the row item is equal to its data-block-id without dashes
for table_view in soup.findAll("div", {"class": "notion-table-view"}):
for table_row in table_view.findAll(
"div", {"class": "notion-collection-item"}
"div", {"class": "notion-collection-item"}
):
table_row_block_id = table_row["data-block-id"]
table_row_href = "/" + table_row_block_id.replace("-", "")
@ -564,8 +567,8 @@ class Parser:
a["href"] = "#" + sub_page_href_tokens[-1]
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
if (
sub_page_href in processed_pages.keys()
or sub_page_href in sub_pages
sub_page_href in processed_pages.keys()
or sub_page_href in sub_pages
):
log.debug(
f"Original page for anchor link {sub_page_href}"
@ -608,6 +611,10 @@ class Parser:
# we're all done!
return processed_pages
def load(self, url):
self.driver.get(url)
WebDriverWait(self.driver, 60).until(notion_page_loaded())
def run(self, url):
start_time = time.time()
tot_processed_pages = self.parse_page(url)