Added support for dark-themed notion pages (--dark-theme option)

This commit is contained in:
Nijat Ismayilzada 2020-11-20 22:56:57 +00:00
parent 58e776b423
commit dd2a4028f5
4 changed files with 29 additions and 13 deletions

3
.gitignore vendored
View File

@ -112,3 +112,6 @@ dmypy.json
.vscode .vscode
temp temp
logs logs
.idea/
venv/

View File

@ -198,6 +198,7 @@ On top of this, the script can take these optional arguments:
Use a specific chromedriver executable instead of the Use a specific chromedriver executable instead of the
auto-installing one auto-installing one
--single-page Only parse the first page, then stop --single-page Only parse the first page, then stop
--dark-theme Use dark themed version of the target Notion.so page
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded --timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
dynamic elements (default 5). If content from the page dynamic elements (default 5). If content from the page
seems to be missing, try increasing this value seems to be missing, try increasing this value

View File

@ -34,6 +34,11 @@ def main():
argparser.add_argument( argparser.add_argument(
"--single-page", action="store_true", help="Only parse the first page, then stop" "--single-page", action="store_true", help="Only parse the first page, then stop"
) )
argparser.add_argument(
"--dark-theme",
action="store_true",
help="Use dark themed version of the target Notion.so page",
)
argparser.add_argument( argparser.add_argument(
"--timeout", "--timeout",
type=int, type=int,

View File

@ -116,7 +116,7 @@ class Parser:
return {**site_config, **matching_page_config} return {**site_config, **matching_page_config}
else: else:
log.error( log.error(
f"Matching page configuration for {url} was not a dict:" f"Matching page configuration for {token} was not a dict:"
f" {matching_page_config} - something went wrong" f" {matching_page_config} - something went wrong"
) )
return site_config return site_config
@ -244,16 +244,19 @@ class Parser:
) )
def parse_page(self, url, processed_pages={}, index=None): def parse_page(self, url, processed_pages={}, index=None):
# if this is the first page being parse, set it as the index.html
if not index:
index = url
log.info(f"Parsing page '{url}'") log.info(f"Parsing page '{url}'")
log.debug(f"Using page config: {self.get_page_config(url)}") log.debug(f"Using page config: {self.get_page_config(url)}")
self.driver.get(url)
try: try:
WebDriverWait(self.driver, 60).until(notion_page_loaded()) self.load(url)
if not index:
# if this is the first page being parse, set it as the index.html
index = url
# if dark theme is enabled, set local storage item and re-load the page
if self.args.get("dark_theme", True):
log.debug(f"Dark theme is enabled")
self.driver.execute_script("window.localStorage.setItem('theme','{\"mode\":\"dark\"}');")
self.load(url)
except TimeoutException as ex: except TimeoutException as ex:
log.critical( log.critical(
"Timeout waiting for page content to load, or no content found." "Timeout waiting for page content to load, or no content found."
@ -402,7 +405,7 @@ class Parser:
style = cssutils.parseStyle(img["style"]) style = cssutils.parseStyle(img["style"])
spritesheet = style["background"] spritesheet = style["background"]
spritesheet_url = spritesheet[ spritesheet_url = spritesheet[
spritesheet.find("(") + 1 : spritesheet.find(")") spritesheet.find("(") + 1: spritesheet.find(")")
] ]
cached_spritesheet_url = self.cache_file( cached_spritesheet_url = self.cache_file(
"https://www.notion.so" + spritesheet_url "https://www.notion.so" + spritesheet_url
@ -608,6 +611,10 @@ class Parser:
# we're all done! # we're all done!
return processed_pages return processed_pages
def load(self, url):
self.driver.get(url)
WebDriverWait(self.driver, 60).until(notion_page_loaded())
def run(self, url): def run(self, url):
start_time = time.time() start_time = time.time()
tot_processed_pages = self.parse_page(url) tot_processed_pages = self.parse_page(url)