mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Apply black
formatter
This commit is contained in:
parent
c8a9dcbcd7
commit
ffc96882ed
@ -16,8 +16,7 @@ try:
|
|||||||
import toml
|
import toml
|
||||||
|
|
||||||
except ModuleNotFoundError as error:
|
except ModuleNotFoundError as error:
|
||||||
log.critical(
|
log.critical(f"ModuleNotFoundError: {error}. Have you installed the requirements?")
|
||||||
f"ModuleNotFoundError: {error}. Have you installed the requirements?")
|
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
@ -43,7 +42,9 @@ def get_args():
|
|||||||
help="Use a specific chromedriver executable instead of the auto-installing one",
|
help="Use a specific chromedriver executable instead of the auto-installing one",
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--single-page", action="store_true", help="Only parse the first page, then stop"
|
"--single-page",
|
||||||
|
action="store_true",
|
||||||
|
help="Only parse the first page, then stop",
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--dark-theme",
|
"--dark-theme",
|
||||||
@ -133,7 +134,7 @@ def init_parser(args, log):
|
|||||||
try:
|
try:
|
||||||
requests.get(args.target)
|
requests.get(args.target)
|
||||||
except requests.ConnectionError as exception:
|
except requests.ConnectionError as exception:
|
||||||
log.critical('Connection error')
|
log.critical("Connection error")
|
||||||
|
|
||||||
if "notion.so" in args.target or "notion.site" in args.target:
|
if "notion.so" in args.target or "notion.site" in args.target:
|
||||||
log.info("Initialising parser with simple page url")
|
log.info("Initialising parser with simple page url")
|
||||||
@ -145,12 +146,12 @@ def init_parser(args, log):
|
|||||||
elif Path(args.target).is_file():
|
elif Path(args.target).is_file():
|
||||||
with open(args.target, encoding="utf-8") as f:
|
with open(args.target, encoding="utf-8") as f:
|
||||||
parsed_config = toml.loads(f.read())
|
parsed_config = toml.loads(f.read())
|
||||||
log.info('Initialising parser with configuration file')
|
log.info("Initialising parser with configuration file")
|
||||||
log.debug(parsed_config)
|
log.debug(parsed_config)
|
||||||
parser = Parser(config=parsed_config, args=vars(args))
|
parser = Parser(config=parsed_config, args=vars(args))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.critical(f"Config file {args.target} does not exists")
|
log.critical(f"Config file {args.target} does not exist")
|
||||||
|
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
log.critical(f"FileNotFoundError: {e}")
|
log.critical(f"FileNotFoundError: {e}")
|
||||||
|
@ -181,8 +181,10 @@ class Parser:
|
|||||||
content_type = response.headers.get("content-type")
|
content_type = response.headers.get("content-type")
|
||||||
if content_type:
|
if content_type:
|
||||||
file_extension = mimetypes.guess_extension(content_type)
|
file_extension = mimetypes.guess_extension(content_type)
|
||||||
elif '%3f' in file_extension.lower():
|
elif "%3f" in file_extension.lower():
|
||||||
file_extension = re.split("%3f", file_extension, flags=re.IGNORECASE)[0]
|
file_extension = re.split(
|
||||||
|
"%3f", file_extension, flags=re.IGNORECASE
|
||||||
|
)[0]
|
||||||
destination = destination.with_suffix(file_extension)
|
destination = destination.with_suffix(file_extension)
|
||||||
|
|
||||||
Path(destination).parent.mkdir(parents=True, exist_ok=True)
|
Path(destination).parent.mkdir(parents=True, exist_ok=True)
|
||||||
@ -228,8 +230,8 @@ class Parser:
|
|||||||
if not self.args.get("non_headless", False):
|
if not self.args.get("non_headless", False):
|
||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("window-size=1920,1080")
|
chrome_options.add_argument("window-size=1920,1080")
|
||||||
chrome_options.add_argument('--no-sandbox')
|
chrome_options.add_argument("--no-sandbox")
|
||||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_options.add_argument("--log-level=3")
|
chrome_options.add_argument("--log-level=3")
|
||||||
chrome_options.add_argument("--silent")
|
chrome_options.add_argument("--silent")
|
||||||
chrome_options.add_argument("--disable-logging")
|
chrome_options.add_argument("--disable-logging")
|
||||||
@ -302,14 +304,18 @@ class Parser:
|
|||||||
# if dark theme is enabled, set local storage item and re-load the page
|
# if dark theme is enabled, set local storage item and re-load the page
|
||||||
if self.args.get("dark_theme", True):
|
if self.args.get("dark_theme", True):
|
||||||
log.debug("Dark theme is enabled")
|
log.debug("Dark theme is enabled")
|
||||||
self.driver.execute_script("window.localStorage.setItem('theme','{\"mode\":\"dark\"}');")
|
self.driver.execute_script(
|
||||||
|
"window.localStorage.setItem('theme','{\"mode\":\"dark\"}');"
|
||||||
|
)
|
||||||
self.load(url)
|
self.load(url)
|
||||||
|
|
||||||
# light theme is on by default
|
# light theme is on by default
|
||||||
# enable dark mode based on https://fruitionsite.com/ dark mode hack
|
# enable dark mode based on https://fruitionsite.com/ dark mode hack
|
||||||
if self.config.get('theme') == 'dark':
|
if self.config.get("theme") == "dark":
|
||||||
self.driver.execute_script("__console.environment.ThemeStore.setState({ mode: 'dark' });")
|
self.driver.execute_script(
|
||||||
\
|
"__console.environment.ThemeStore.setState({ mode: 'dark' });"
|
||||||
|
)
|
||||||
|
|
||||||
def scroll_to_the_bottom(self):
|
def scroll_to_the_bottom(self):
|
||||||
# scroll at the bottom of the notion-scroller element to load all elements
|
# scroll at the bottom of the notion-scroller element to load all elements
|
||||||
# continue once there are no changes in height after a timeout
|
# continue once there are no changes in height after a timeout
|
||||||
@ -392,7 +398,9 @@ class Parser:
|
|||||||
vendors_css.decompose()
|
vendors_css.decompose()
|
||||||
|
|
||||||
# collection selectors (List, Gallery, etc.) don't work, so remove them
|
# collection selectors (List, Gallery, etc.) don't work, so remove them
|
||||||
for collection_selector in soup.findAll("div", {"class": "notion-collection-view-select"}):
|
for collection_selector in soup.findAll(
|
||||||
|
"div", {"class": "notion-collection-view-select"}
|
||||||
|
):
|
||||||
collection_selector.decompose()
|
collection_selector.decompose()
|
||||||
|
|
||||||
# clean up the default notion meta tags
|
# clean up the default notion meta tags
|
||||||
@ -459,7 +467,7 @@ class Parser:
|
|||||||
spritesheet.find("(") + 1 : spritesheet.find(")")
|
spritesheet.find("(") + 1 : spritesheet.find(")")
|
||||||
]
|
]
|
||||||
cached_spritesheet_url = self.cache_file(
|
cached_spritesheet_url = self.cache_file(
|
||||||
f'https://www.notion.so{spritesheet_url}'
|
f"https://www.notion.so{spritesheet_url}"
|
||||||
)
|
)
|
||||||
|
|
||||||
style["background"] = spritesheet.replace(
|
style["background"] = spritesheet.replace(
|
||||||
@ -474,10 +482,14 @@ class Parser:
|
|||||||
# we don't need the vendors stylesheet
|
# we don't need the vendors stylesheet
|
||||||
if "vendors~" in link["href"]:
|
if "vendors~" in link["href"]:
|
||||||
continue
|
continue
|
||||||
cached_css_file = self.cache_file(f'https://www.notion.so{link["href"]}')
|
cached_css_file = self.cache_file(
|
||||||
|
f'https://www.notion.so{link["href"]}'
|
||||||
|
)
|
||||||
# files in the css file might be reference with a relative path,
|
# files in the css file might be reference with a relative path,
|
||||||
# so store the path of the current css file
|
# so store the path of the current css file
|
||||||
parent_css_path = os.path.split(urllib.parse.urlparse(link["href"]).path)[0]
|
parent_css_path = os.path.split(
|
||||||
|
urllib.parse.urlparse(link["href"]).path
|
||||||
|
)[0]
|
||||||
# open the locally saved file
|
# open the locally saved file
|
||||||
with open(self.dist_folder / cached_css_file, "rb+") as f:
|
with open(self.dist_folder / cached_css_file, "rb+") as f:
|
||||||
stylesheet = cssutils.parseString(f.read())
|
stylesheet = cssutils.parseString(f.read())
|
||||||
@ -490,9 +502,19 @@ class Parser:
|
|||||||
rule.style["src"].split("url(")[-1].split(")")[0]
|
rule.style["src"].split("url(")[-1].split(")")[0]
|
||||||
)
|
)
|
||||||
# assemble the url given the current css path
|
# assemble the url given the current css path
|
||||||
font_url = "/".join(p.strip("/") for p in ["https://www.notion.so", parent_css_path, font_file] if p.strip("/"))
|
font_url = "/".join(
|
||||||
|
p.strip("/")
|
||||||
|
for p in [
|
||||||
|
"https://www.notion.so",
|
||||||
|
parent_css_path,
|
||||||
|
font_file,
|
||||||
|
]
|
||||||
|
if p.strip("/")
|
||||||
|
)
|
||||||
# don't hash the font files filenames, rather get filename only
|
# don't hash the font files filenames, rather get filename only
|
||||||
cached_font_file = self.cache_file(font_url, Path(font_file).name)
|
cached_font_file = self.cache_file(
|
||||||
|
font_url, Path(font_file).name
|
||||||
|
)
|
||||||
rule.style["src"] = f"url({cached_font_file})"
|
rule.style["src"] = f"url({cached_font_file})"
|
||||||
# commit stylesheet edits to file
|
# commit stylesheet edits to file
|
||||||
f.seek(0)
|
f.seek(0)
|
||||||
@ -531,9 +553,15 @@ class Parser:
|
|||||||
table_row_block_id = table_row["data-block-id"]
|
table_row_block_id = table_row["data-block-id"]
|
||||||
table_row_href = "/" + table_row_block_id.replace("-", "")
|
table_row_href = "/" + table_row_block_id.replace("-", "")
|
||||||
row_target_span = table_row.find("span")
|
row_target_span = table_row.find("span")
|
||||||
row_target_span["style"] = row_target_span["style"].replace("pointer-events: none;","")
|
row_target_span["style"] = row_target_span["style"].replace(
|
||||||
|
"pointer-events: none;", ""
|
||||||
|
)
|
||||||
row_link_wrapper = soup.new_tag(
|
row_link_wrapper = soup.new_tag(
|
||||||
"a", attrs={"href": table_row_href, "style": "cursor: pointer; color: inherit; text-decoration: none; fill: inherit;"}
|
"a",
|
||||||
|
attrs={
|
||||||
|
"href": table_row_href,
|
||||||
|
"style": "cursor: pointer; color: inherit; text-decoration: none; fill: inherit;",
|
||||||
|
},
|
||||||
)
|
)
|
||||||
row_target_span.wrap(row_link_wrapper)
|
row_target_span.wrap(row_link_wrapper)
|
||||||
|
|
||||||
@ -609,7 +637,7 @@ class Parser:
|
|||||||
# destination = (self.dist_folder / source.name)
|
# destination = (self.dist_folder / source.name)
|
||||||
# shutil.copyfile(source, destination)
|
# shutil.copyfile(source, destination)
|
||||||
injected_tag[attr] = str(cached_custom_file) # source.name
|
injected_tag[attr] = str(cached_custom_file) # source.name
|
||||||
log.debug(f'Injecting <{section}> tag: {injected_tag}')
|
log.debug(f"Injecting <{section}> tag: {injected_tag}")
|
||||||
soup.find(section).append(injected_tag)
|
soup.find(section).append(injected_tag)
|
||||||
|
|
||||||
def inject_loconotion_script_and_css(self, soup):
|
def inject_loconotion_script_and_css(self, soup):
|
||||||
@ -629,19 +657,23 @@ class Parser:
|
|||||||
# find sub-pages and clean slugs / links
|
# find sub-pages and clean slugs / links
|
||||||
subpages = []
|
subpages = []
|
||||||
parse_links = not self.get_page_config(url).get("no-links", False)
|
parse_links = not self.get_page_config(url).get("no-links", False)
|
||||||
for a in soup.find_all('a', href=True):
|
for a in soup.find_all("a", href=True):
|
||||||
sub_page_href = a["href"]
|
sub_page_href = a["href"]
|
||||||
if sub_page_href.startswith("/"):
|
if sub_page_href.startswith("/"):
|
||||||
sub_page_href = f'{hrefDomain}/{a["href"].split("/")[len(a["href"].split("/"))-1]}'
|
sub_page_href = (
|
||||||
|
f'{hrefDomain}/{a["href"].split("/")[len(a["href"].split("/"))-1]}'
|
||||||
|
)
|
||||||
log.info(f"Got this as href {sub_page_href}")
|
log.info(f"Got this as href {sub_page_href}")
|
||||||
if sub_page_href.startswith(hrefDomain):
|
if sub_page_href.startswith(hrefDomain):
|
||||||
if parse_links or not len(a.find_parents("div", class_="notion-scroller")):
|
if parse_links or not len(
|
||||||
|
a.find_parents("div", class_="notion-scroller")
|
||||||
|
):
|
||||||
# if the link is an anchor link,
|
# if the link is an anchor link,
|
||||||
# check if the page hasn't already been parsed
|
# check if the page hasn't already been parsed
|
||||||
if "#" in sub_page_href:
|
if "#" in sub_page_href:
|
||||||
sub_page_href_tokens = sub_page_href.split("#")
|
sub_page_href_tokens = sub_page_href.split("#")
|
||||||
sub_page_href = sub_page_href_tokens[0]
|
sub_page_href = sub_page_href_tokens[0]
|
||||||
a["href"] = f'#{sub_page_href_tokens[-1]}'
|
a["href"] = f"#{sub_page_href_tokens[-1]}"
|
||||||
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
|
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
|
||||||
if (
|
if (
|
||||||
sub_page_href in self.processed_pages.keys()
|
sub_page_href in self.processed_pages.keys()
|
||||||
@ -668,11 +700,11 @@ class Parser:
|
|||||||
del a["href"]
|
del a["href"]
|
||||||
a.name = "span"
|
a.name = "span"
|
||||||
# remove pointer cursor styling on the link and all children
|
# remove pointer cursor styling on the link and all children
|
||||||
for child in ([a] + a.find_all()):
|
for child in [a] + a.find_all():
|
||||||
if (child.has_attr("style")):
|
if child.has_attr("style"):
|
||||||
style = cssutils.parseStyle(child['style'])
|
style = cssutils.parseStyle(child["style"])
|
||||||
style['cursor'] = "default"
|
style["cursor"] = "default"
|
||||||
child['style'] = style.cssText
|
child["style"] = style.cssText
|
||||||
return subpages
|
return subpages
|
||||||
|
|
||||||
def export_parsed_page(self, url, index, soup):
|
def export_parsed_page(self, url, index, soup):
|
||||||
|
Loading…
Reference in New Issue
Block a user