mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Added support for dark-themed notion pages (--dark-theme option)
This commit is contained in:
parent
58e776b423
commit
dd2a4028f5
5
.gitignore
vendored
5
.gitignore
vendored
@ -111,4 +111,7 @@ dmypy.json
|
|||||||
.env
|
.env
|
||||||
.vscode
|
.vscode
|
||||||
temp
|
temp
|
||||||
logs
|
logs
|
||||||
|
|
||||||
|
.idea/
|
||||||
|
venv/
|
@ -198,6 +198,7 @@ On top of this, the script can take these optional arguments:
|
|||||||
Use a specific chromedriver executable instead of the
|
Use a specific chromedriver executable instead of the
|
||||||
auto-installing one
|
auto-installing one
|
||||||
--single-page Only parse the first page, then stop
|
--single-page Only parse the first page, then stop
|
||||||
|
--dark-theme Use dark themed version of the target Notion.so page
|
||||||
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
|
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
|
||||||
dynamic elements (default 5). If content from the page
|
dynamic elements (default 5). If content from the page
|
||||||
seems to be missing, try increasing this value
|
seems to be missing, try increasing this value
|
||||||
|
@ -34,6 +34,11 @@ def main():
|
|||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--single-page", action="store_true", help="Only parse the first page, then stop"
|
"--single-page", action="store_true", help="Only parse the first page, then stop"
|
||||||
)
|
)
|
||||||
|
argparser.add_argument(
|
||||||
|
"--dark-theme",
|
||||||
|
action="store_true",
|
||||||
|
help="Use dark themed version of the target Notion.so page",
|
||||||
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--timeout",
|
"--timeout",
|
||||||
type=int,
|
type=int,
|
||||||
|
@ -116,7 +116,7 @@ class Parser:
|
|||||||
return {**site_config, **matching_page_config}
|
return {**site_config, **matching_page_config}
|
||||||
else:
|
else:
|
||||||
log.error(
|
log.error(
|
||||||
f"Matching page configuration for {url} was not a dict:"
|
f"Matching page configuration for {token} was not a dict:"
|
||||||
f" {matching_page_config} - something went wrong"
|
f" {matching_page_config} - something went wrong"
|
||||||
)
|
)
|
||||||
return site_config
|
return site_config
|
||||||
@ -244,16 +244,19 @@ class Parser:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def parse_page(self, url, processed_pages={}, index=None):
|
def parse_page(self, url, processed_pages={}, index=None):
|
||||||
# if this is the first page being parse, set it as the index.html
|
|
||||||
if not index:
|
|
||||||
index = url
|
|
||||||
|
|
||||||
log.info(f"Parsing page '{url}'")
|
log.info(f"Parsing page '{url}'")
|
||||||
log.debug(f"Using page config: {self.get_page_config(url)}")
|
log.debug(f"Using page config: {self.get_page_config(url)}")
|
||||||
self.driver.get(url)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
self.load(url)
|
||||||
|
if not index:
|
||||||
|
# if this is the first page being parse, set it as the index.html
|
||||||
|
index = url
|
||||||
|
# if dark theme is enabled, set local storage item and re-load the page
|
||||||
|
if self.args.get("dark_theme", True):
|
||||||
|
log.debug(f"Dark theme is enabled")
|
||||||
|
self.driver.execute_script("window.localStorage.setItem('theme','{\"mode\":\"dark\"}');")
|
||||||
|
self.load(url)
|
||||||
except TimeoutException as ex:
|
except TimeoutException as ex:
|
||||||
log.critical(
|
log.critical(
|
||||||
"Timeout waiting for page content to load, or no content found."
|
"Timeout waiting for page content to load, or no content found."
|
||||||
@ -402,8 +405,8 @@ class Parser:
|
|||||||
style = cssutils.parseStyle(img["style"])
|
style = cssutils.parseStyle(img["style"])
|
||||||
spritesheet = style["background"]
|
spritesheet = style["background"]
|
||||||
spritesheet_url = spritesheet[
|
spritesheet_url = spritesheet[
|
||||||
spritesheet.find("(") + 1 : spritesheet.find(")")
|
spritesheet.find("(") + 1: spritesheet.find(")")
|
||||||
]
|
]
|
||||||
cached_spritesheet_url = self.cache_file(
|
cached_spritesheet_url = self.cache_file(
|
||||||
"https://www.notion.so" + spritesheet_url
|
"https://www.notion.so" + spritesheet_url
|
||||||
)
|
)
|
||||||
@ -458,7 +461,7 @@ class Parser:
|
|||||||
# the link to the row item is equal to its data-block-id without dashes
|
# the link to the row item is equal to its data-block-id without dashes
|
||||||
for table_view in soup.findAll("div", {"class": "notion-table-view"}):
|
for table_view in soup.findAll("div", {"class": "notion-table-view"}):
|
||||||
for table_row in table_view.findAll(
|
for table_row in table_view.findAll(
|
||||||
"div", {"class": "notion-collection-item"}
|
"div", {"class": "notion-collection-item"}
|
||||||
):
|
):
|
||||||
table_row_block_id = table_row["data-block-id"]
|
table_row_block_id = table_row["data-block-id"]
|
||||||
table_row_href = "/" + table_row_block_id.replace("-", "")
|
table_row_href = "/" + table_row_block_id.replace("-", "")
|
||||||
@ -564,8 +567,8 @@ class Parser:
|
|||||||
a["href"] = "#" + sub_page_href_tokens[-1]
|
a["href"] = "#" + sub_page_href_tokens[-1]
|
||||||
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
|
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
|
||||||
if (
|
if (
|
||||||
sub_page_href in processed_pages.keys()
|
sub_page_href in processed_pages.keys()
|
||||||
or sub_page_href in sub_pages
|
or sub_page_href in sub_pages
|
||||||
):
|
):
|
||||||
log.debug(
|
log.debug(
|
||||||
f"Original page for anchor link {sub_page_href}"
|
f"Original page for anchor link {sub_page_href}"
|
||||||
@ -608,6 +611,10 @@ class Parser:
|
|||||||
# we're all done!
|
# we're all done!
|
||||||
return processed_pages
|
return processed_pages
|
||||||
|
|
||||||
|
def load(self, url):
|
||||||
|
self.driver.get(url)
|
||||||
|
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||||
|
|
||||||
def run(self, url):
|
def run(self, url):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
tot_processed_pages = self.parse_page(url)
|
tot_processed_pages = self.parse_page(url)
|
||||||
|
Loading…
Reference in New Issue
Block a user