mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Fixed requirements.txt & image url unescaping for default Notion images
This commit is contained in:
parent
c124806bdb
commit
3c111236ba
@ -80,7 +80,7 @@ class Parser():
|
||||
|
||||
# set the output folder based on the site name
|
||||
self.dist_folder = Path(config.get("output", Path("dist") / site_name))
|
||||
log.info(f"Setting output path to {self.dist_folder}")
|
||||
log.info(f"Setting output path to '{self.dist_folder}'")
|
||||
|
||||
# check if the argument to clean the dist folder was passed
|
||||
if (self.args.get("clean", False)):
|
||||
@ -130,7 +130,7 @@ class Parser():
|
||||
# first check if the url has a custom slug configured in the config file
|
||||
custom_slug = self.get_page_config(url).get("slug", None)
|
||||
if custom_slug:
|
||||
log.debug(f"Custom slug found for url {url}: {custom_slug}")
|
||||
log.debug(f"Custom slug found for url '{url}': '{custom_slug}'")
|
||||
return custom_slug.strip("/") + (".html" if extension else "")
|
||||
else:
|
||||
# if not, clean up the existing slug
|
||||
@ -220,8 +220,8 @@ class Parser():
|
||||
if (not index):
|
||||
index = url;
|
||||
|
||||
log.info(f'Parsing page {url}')
|
||||
log.debug(f'Using page config: {self.get_page_config(url)}')
|
||||
log.info(f"Parsing page '{url}'")
|
||||
log.debug(f"Using page config: {self.get_page_config(url)}")
|
||||
self.driver.get(url)
|
||||
|
||||
# if ("This content does not exist" in self.driver.page_source):
|
||||
@ -311,9 +311,10 @@ class Parser():
|
||||
|
||||
# if the path starts with /, it's one of notion's predefined images
|
||||
if (img['src'].startswith('/')):
|
||||
# notion's images urls are in a weird format, need to sanitize them
|
||||
# notion's own default images urls are in a weird format, need to sanitize them
|
||||
img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0]
|
||||
# img_src = urllib.parse.unquote(img_src)
|
||||
if (not '.amazonaws' in img_src):
|
||||
img_src = urllib.parse.unquote(img_src)
|
||||
|
||||
cached_image = self.cache_file(img_src)
|
||||
img['src'] = cached_image
|
||||
@ -433,7 +434,7 @@ class Parser():
|
||||
if (html_file in processed_pages.values()):
|
||||
log.error(f"Found duplicate pages with slug '{html_file}' - previous one will be overwritten." +
|
||||
"make sure that your notion pages names or custom slugs in the configuration files are unique")
|
||||
log.info(f"Exporting page {url} as {html_file}")
|
||||
log.info(f"Exporting page '{url}' as '{html_file}'")
|
||||
with open(self.dist_folder / html_file, "wb") as f:
|
||||
f.write(html_str.encode('utf-8').strip())
|
||||
processed_pages[url] = html_file
|
||||
@ -453,7 +454,7 @@ class Parser():
|
||||
total_processed_pages = self.parse_page(url)
|
||||
elapsed_time = time.time() - start_time
|
||||
formatted_time = '{:02d}:{:02d}:{:02d}'.format(int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60))
|
||||
log.info(f'Finished!\nヽ( ・‿・)ノ Processed {len(total_processed_pages)} pages in {formatted_time}')
|
||||
log.info(f'Finished!\n\n\tヽ( ・‿・)ノ Processed {len(total_processed_pages)} pages in {formatted_time}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
# set up argument parser
|
||||
|
@ -1,20 +1,11 @@
|
||||
ansicon==1.89.0
|
||||
beautifulsoup4==4.9.0
|
||||
blessed==1.17.5
|
||||
beautifulsoup4==4.9.1
|
||||
certifi==2020.4.5.1
|
||||
chardet==3.0.4
|
||||
colorama==0.4.3
|
||||
commonmark==0.9.1
|
||||
cssutils==1.0.2
|
||||
idna==2.9
|
||||
jinxed==1.0.0
|
||||
pprintpp==0.4.0
|
||||
Pygments==2.6.1
|
||||
requests==2.23.0
|
||||
selenium==3.141.0
|
||||
six==1.14.0
|
||||
soupsieve==2.0
|
||||
soupsieve==2.0.1
|
||||
toml==0.10.1
|
||||
typing-extensions==3.7.4.2
|
||||
urllib3==1.25.9
|
||||
wcwidth==0.1.9
|
||||
|
Loading…
Reference in New Issue
Block a user