mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Added support for dark-themed notion pages (--dark-theme option)
This commit is contained in:
parent
58e776b423
commit
dd2a4028f5
5
.gitignore
vendored
5
.gitignore
vendored
@ -111,4 +111,7 @@ dmypy.json
|
||||
.env
|
||||
.vscode
|
||||
temp
|
||||
logs
|
||||
logs
|
||||
|
||||
.idea/
|
||||
venv/
|
@ -198,6 +198,7 @@ On top of this, the script can take these optional arguments:
|
||||
Use a specific chromedriver executable instead of the
|
||||
auto-installing one
|
||||
--single-page Only parse the first page, then stop
|
||||
--dark-theme Use dark themed version of the target Notion.so page
|
||||
--timeout TIMEOUT Time in seconds to wait for the loading of lazy-loaded
|
||||
dynamic elements (default 5). If content from the page
|
||||
seems to be missing, try increasing this value
|
||||
|
@ -34,6 +34,11 @@ def main():
|
||||
argparser.add_argument(
|
||||
"--single-page", action="store_true", help="Only parse the first page, then stop"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--dark-theme",
|
||||
action="store_true",
|
||||
help="Use dark themed version of the target Notion.so page",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
|
@ -116,7 +116,7 @@ class Parser:
|
||||
return {**site_config, **matching_page_config}
|
||||
else:
|
||||
log.error(
|
||||
f"Matching page configuration for {url} was not a dict:"
|
||||
f"Matching page configuration for {token} was not a dict:"
|
||||
f" {matching_page_config} - something went wrong"
|
||||
)
|
||||
return site_config
|
||||
@ -244,16 +244,19 @@ class Parser:
|
||||
)
|
||||
|
||||
def parse_page(self, url, processed_pages={}, index=None):
|
||||
# if this is the first page being parse, set it as the index.html
|
||||
if not index:
|
||||
index = url
|
||||
|
||||
log.info(f"Parsing page '{url}'")
|
||||
log.debug(f"Using page config: {self.get_page_config(url)}")
|
||||
self.driver.get(url)
|
||||
|
||||
try:
|
||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||
self.load(url)
|
||||
if not index:
|
||||
# if this is the first page being parse, set it as the index.html
|
||||
index = url
|
||||
# if dark theme is enabled, set local storage item and re-load the page
|
||||
if self.args.get("dark_theme", True):
|
||||
log.debug(f"Dark theme is enabled")
|
||||
self.driver.execute_script("window.localStorage.setItem('theme','{\"mode\":\"dark\"}');")
|
||||
self.load(url)
|
||||
except TimeoutException as ex:
|
||||
log.critical(
|
||||
"Timeout waiting for page content to load, or no content found."
|
||||
@ -402,8 +405,8 @@ class Parser:
|
||||
style = cssutils.parseStyle(img["style"])
|
||||
spritesheet = style["background"]
|
||||
spritesheet_url = spritesheet[
|
||||
spritesheet.find("(") + 1 : spritesheet.find(")")
|
||||
]
|
||||
spritesheet.find("(") + 1: spritesheet.find(")")
|
||||
]
|
||||
cached_spritesheet_url = self.cache_file(
|
||||
"https://www.notion.so" + spritesheet_url
|
||||
)
|
||||
@ -458,7 +461,7 @@ class Parser:
|
||||
# the link to the row item is equal to its data-block-id without dashes
|
||||
for table_view in soup.findAll("div", {"class": "notion-table-view"}):
|
||||
for table_row in table_view.findAll(
|
||||
"div", {"class": "notion-collection-item"}
|
||||
"div", {"class": "notion-collection-item"}
|
||||
):
|
||||
table_row_block_id = table_row["data-block-id"]
|
||||
table_row_href = "/" + table_row_block_id.replace("-", "")
|
||||
@ -564,8 +567,8 @@ class Parser:
|
||||
a["href"] = "#" + sub_page_href_tokens[-1]
|
||||
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
|
||||
if (
|
||||
sub_page_href in processed_pages.keys()
|
||||
or sub_page_href in sub_pages
|
||||
sub_page_href in processed_pages.keys()
|
||||
or sub_page_href in sub_pages
|
||||
):
|
||||
log.debug(
|
||||
f"Original page for anchor link {sub_page_href}"
|
||||
@ -608,6 +611,10 @@ class Parser:
|
||||
# we're all done!
|
||||
return processed_pages
|
||||
|
||||
def load(self, url):
|
||||
self.driver.get(url)
|
||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||
|
||||
def run(self, url):
|
||||
start_time = time.time()
|
||||
tot_processed_pages = self.parse_page(url)
|
||||
|
Loading…
Reference in New Issue
Block a user