Added config file, custom fonts injection

This commit is contained in:
Leonardo Cavaletti 2020-05-17 10:40:18 +01:00
parent b0a0baf9fb
commit a70c8b4708
4 changed files with 170 additions and 65 deletions

View File

@ -1 +1,22 @@
loconotion # loconotion
Notion.so is a popular application where you can create your own workspace. It's very easy to use. Notion also offer the possibility of making a page (and its sub-page) public on the web and because of this several people choose to use Notion to manage their personal blog, portfolio, or some kind of simple website. Notion however does not support custom domains when doing so: your pages are stuck in the `notion.so` domain, and computer-generated urls and slugs.
Some services like Super, HostingPotion, HostNotion and Fruition cleverly tried to work around this issue by relying on a clever hack using CloudFlare workers. This solution, however, has some disadvantages:
- Not free (Super, HostingPotion and HostNotion all take a monthly fee: Fruition is open-sourced but any domain with a decent amount of daily visit will soon clash against CloudFlare's free tier limitations, and force you to upgrade to the 5$ or more plan.)
- As the page is still hosted on Notion, it comes bundled with all their analytics, editing / collaboration javascript, vendors css, and more bloat which causes the page to load at speeds that are not exactly appropriate to a simple blog / website. Running [this](https://www.notion.so/The-perfect-It-s-Always-Sunny-in-Philadelphia-episode-d08aaec2b24946408e8be0e9f2ae857e) example page on Google's [PageSpeed Insights](https://developers.google.com/speed/pagespeed/insights/) scores a measly 24 / 66 on mobile / desktop.
Enter Loconotion!
Loconotion is a tool that approach this a bit differently. It lets Notion render the page, then parses it and saves a static version of the page to disk. While doing so, it strips out all the unnecessary bloat, and adds some extra css and js to keep the nice features like mobile responsiveness working. It also saves all related images / assets, and parses any subpage as well while keeping links intact, and cleaning up the urls. The result? A faster, self-contained version of the page that keeps all of Notion's nice layouts and eye candies, ready to be deployed on your CDN of choice. For a comparison, the same example page parsed with Loconotion and deployed on Netflify's free tier achieves a PageSpeed Insight score of 96 / 100!
This approach also offers the advantage of being able to inject anything in th pages, from custom fonts to additional meta tags for SEO, or custom analytics.
However, bear in mind that as we are effectively parsing a static version of the page, the following features will not work:
- All pages will open in their own page and not modals (this could be a pro, depending on how you look at it)
- Databases will be presented in their initial view - no switching views from table to gallery, for example.
- All editing features will be disabled - no ticking checkboxes or dragging kanban boards cards around. Usually not an issue as public pages usually have changes locked.
- Dynamic elements won't update automatically - for example, the calendar will not highlight the current date.
Everything else, like embedding and dropdowns should work as expected.

View File

@ -10,6 +10,8 @@ from rich.logging import RichHandler
from rich.progress import Progress from rich.progress import Progress
import urllib.parse import urllib.parse
import hashlib import hashlib
import toml
import argparse
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
@ -51,8 +53,7 @@ def download_file(url, destination):
# https://stackoverflow.com/questions/28521535/requests-how-to-disable-bypass-proxy # https://stackoverflow.com/questions/28521535/requests-how-to-disable-bypass-proxy
session = requests.Session() session = requests.Session()
session.trust_env = False session.trust_env = False
parsed_url = urllib.parse.urlparse(url) log.info(f"Downloading {url} to {destination}")
log.info(f"Downloading {parsed_url.scheme + parsed_url.netloc + parsed_url.path} to {destination}")
response = session.get(url) response = session.get(url)
Path(destination).parent.mkdir(parents=True, exist_ok=True) Path(destination).parent.mkdir(parents=True, exist_ok=True)
with open(destination, "wb") as f: with open(destination, "wb") as f:
@ -84,19 +85,18 @@ def download_file(url, destination):
# log.debug(f"File {destination} was already downloaded") # log.debug(f"File {destination} was already downloaded")
# return destination # return destination
class notion_page_loaded(object): # class notion_page_loaded(object):
"""An expectation for checking that a notion page has loaded. # """An expectation for checking that a notion page has loaded.
""" # """
def __call__(self, driver): # def __call__(self, driver):
notion_presence = len(driver.find_elements_by_class_name("notion-presence-container")) # notion_presence = len(driver.find_elements_by_class_name("notion-presence-container"))
loading_spinners = len(driver.find_elements_by_class_name("loading-spinner")); # loading_spinners = len(driver.find_elements_by_class_name("loading-spinner"));
# embed_ghosts = len(driver.find_elements_by_css_selector("div[embed-ghost]")); # # embed_ghosts = len(driver.find_elements_by_css_selector("div[embed-ghost]"));
log.debug(f"Waiting for page content to load (presence container: {notion_presence}, loaders: {loading_spinners} )") # log.debug(f"Waiting for page content to load (presence container: {notion_presence}, loaders: {loading_spinners} )")
if (notion_presence and not loading_spinners): # if (notion_presence and not loading_spinners):
return True # return True
else: # else:
return False # return False
class toggle_block_has_opened(object): class toggle_block_has_opened(object):
"""An expectation for checking that a notion toggle block has been opened. """An expectation for checking that a notion toggle block has been opened.
@ -120,14 +120,25 @@ class toggle_block_has_opened(object):
return False return False
class Parser(): class Parser():
def __init__(self, dist_folder): def __init__(self, config = {}):
self.dist_folder = Path(dist_folder) url = config.get("page", None)
self.driver = self.init_chromedriver() if not url:
log.error("No url specified")
return
# create output path if it doesn't exists self.driver = self.init_chromedriver()
self.config = config
# get the site name from the config, or make it up by cleaning the target page's slug
site_name = self.config.get("name", get_clean_slug(url, extension = False))
# set the output folder based on the site name, and create it if necessary
self.dist_folder = Path(config.get("output", Path("dist") / site_name))
self.dist_folder.mkdir(parents=True, exist_ok=True) self.dist_folder.mkdir(parents=True, exist_ok=True)
log.info(f"Setting output path to {self.dist_folder}") log.info(f"Setting output path to {self.dist_folder}")
self.run(url)
def init_chromedriver(self): def init_chromedriver(self):
log.info("Initialising chrome driver") log.info("Initialising chrome driver")
chrome_options = Options() chrome_options = Options()
@ -150,11 +161,16 @@ class Parser():
log.info(f'Parsing page {url}') log.info(f'Parsing page {url}')
self.driver.get(url) self.driver.get(url)
# if ("This content does not exist" in self.driver.page_source):
# log.error(f"No content found in {url}. Are you sure the page is set to public?")
# return
try: try:
# WebDriverWait(self.driver, 10).until(notion_page_loaded()) # WebDriverWait(self.driver, 10).until(notion_page_loaded())
WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'notion-presence-container'))) WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'notion-presence-container')))
except TimeoutException as ex: except TimeoutException as ex:
log.error("Timeout waiting for page content to load") log.error("Timeout waiting for page content to load, or no content found. Are you sure the page is set to public?")
return return
time.sleep(2) time.sleep(2)
@ -188,7 +204,7 @@ class Parser():
open_toggle_blocks() open_toggle_blocks()
# creates soup from the page to start parsing # creates soup from the page to start parsing
soup = BeautifulSoup(self.driver.page_source, "lxml") soup = BeautifulSoup(self.driver.page_source)
# process eventual embedded iframes # process eventual embedded iframes
for embed in soup.select('div[embed-ghost]'): for embed in soup.select('div[embed-ghost]'):
@ -199,40 +215,39 @@ class Parser():
# process meta tags # process meta tags
def set_meta_tag(prop_name, prop_value, content): def set_meta_tag(prop_name, prop_value, content):
log.debug(f"Setting meta tag {prop_value} to '{content}'")
tag = soup.find("meta", attrs = { prop_name : prop_value}) tag = soup.find("meta", attrs = { prop_name : prop_value})
if (tag): if (tag):
log.debug(f"Setting meta tag {prop_value} to {content}")
if (content): tag["content"] = content if (content): tag["content"] = content
else: tag.decompose(); else: tag.decompose();
else: else:
log.warn(f"Meta tag with {prop_name}: {prop_value} was not found") tag = soup.new_tag('meta')
tag.attrs[prop_name] = prop_value
tag.attrs['content'] = content
soup.head.append(tag)
set_meta_tag("name", "description", None) # clean up the default notion meta tags
set_meta_tag("name", "twitter:card", None) for tag in ["description", "twitter:card", "twitter:site", "twitter:title", "twitter:description", "twitter:image", "twitter:url", "apple-itunes-app"]:
set_meta_tag("name", "twitter:site", None) set_meta_tag("name", tag, None)
set_meta_tag("name", "twitter:title", None) for tag in ["og:site_name", "og:type", "og:url", "og:title", "og:description", "og:image"]:
set_meta_tag("name", "twitter:description", None) set_meta_tag("property", tag, None)
set_meta_tag("name", "twitter:image", None)
set_meta_tag("name", "twitter:url", None) # set custom meta tags
set_meta_tag("property", "og:site_name", None) for name, content in self.config.get("meta", {}).items():
set_meta_tag("property", "og:type", None) set_meta_tag("name", name, content)
set_meta_tag("property", "og:url", None)
set_meta_tag("property", "og:title", None)
set_meta_tag("property", "og:description", None)
set_meta_tag("property", "og:image", None)
set_meta_tag("name", "apple-itunes-app", None)
# process images # process images
cache_images = True cache_images = True
for img in soup.findAll('img'): for img in soup.findAll('img'):
if img.has_attr('src'): if img.has_attr('src'):
if (cache_images): if (cache_images and not 'data:image' in img['src']):
img_src = img['src'] img_src = img['src']
# if the path starts with /, it's one of notion's predefined images # if the path starts with /, it's one of notion's predefined images
if (img['src'].startswith('/')): if (img['src'].startswith('/')):
# notion's images urls are in a weird format, need to sanitize them # notion's images urls are in a weird format, need to sanitize them
img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0] img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0]
img_src = urllib.parse.unquote(img_src) #TODO
# generate an hashed id for the image filename based the url, # generate an hashed id for the image filename based the url,
# so we avoid re-downloading images we have already downloaded, # so we avoid re-downloading images we have already downloaded,
@ -286,27 +301,45 @@ class Parser():
toggle_content.attrs['loconotion-toggle-id'] = toggle_button.attrs['loconotion-toggle-id'] = toggle_id toggle_content.attrs['loconotion-toggle-id'] = toggle_button.attrs['loconotion-toggle-id'] = toggle_id
# embed custom google font # embed custom google font
custom_font = None fonts_selectors = {
if (custom_font): "site" : "div:not(.notion-code-block)",
custom_font_stylesheet_stylesheet = soup.new_tag("link") "navbar": ".notion-topbar div",
custom_font_stylesheet.attrs["rel"] = "stylesheet" "title" : ".notion-page-block, .notion-collection_view_page-block",
custom_font_stylesheet.attrs["href"] = f"https://fonts.googleapis.com/css2?family={custom_font}:wght@500;600;700&display=swap" "h1" : ".notion-header-block div",
soup.head.insert(-1, custom_font_stylesheet) "h2" : ".notion-sub_header-block div",
for app in soup.findAll('div',{'class':'notion-app-inner'}): "h3" : ".notion-sub_sub_header-block div",
style = cssutils.parseStyle(app['style']); "body" : ".notion-app-inner",
style['font-family'] = f"'{custom_font}', {style['font-family']}" "code" : ".notion-code-block *"
app['style'] = style.cssText }
# append custom stylesheet custom_fonts = self.config.get("fonts", {})
custom_css = soup.new_tag("link") if (custom_fonts):
custom_css.attrs["rel"] = "stylesheet" # append a stylesheet importing the google font for each unique font
custom_css.attrs["href"] = "loconotion.css" unique_custom_fonts = set(custom_fonts.values())
for font in unique_custom_fonts:
custom_font_stylesheet = soup.new_tag("link", rel="stylesheet",
href=f"https://fonts.googleapis.com/css2?family={font}:wght@500;600;700&display=swap")
soup.head.append(custom_font_stylesheet);
# go through each custom font, and add a css rule overriding the font-family
# to the font override stylesheet targetting the appropriate selector
font_override_stylesheet = soup.new_tag('style', type='text/css')
for target, custom_font in custom_fonts.items():
if custom_font and not target == "site":
log.debug(f"Setting {target} font-family to {custom_font}")
font_override_stylesheet.append(fonts_selectors[target] + " {font-family:" + custom_font + " !important}")
site_font = custom_fonts.get("site", None)
# process global site font last to more granular settings can override it
if (site_font):
log.debug(f"Setting global site font-family to {site_font}")
font_override_stylesheet.append(fonts_selectors["site"] + " {font-family:" + site_font + "}")
soup.head.append(font_override_stylesheet)
# append custom stylesheet and script
custom_css = soup.new_tag("link", rel="stylesheet", href="loconotion.css")
soup.head.insert(-1, custom_css) soup.head.insert(-1, custom_css)
custom_script = soup.new_tag("script", type="text/javascript", src="loconotion.js")
# append custom script
custom_script = soup.new_tag("script")
custom_script.attrs["type"] = "text/javascript"
custom_script.attrs["src"] = "loconotion.js"
soup.body.insert(-1, custom_script) soup.body.insert(-1, custom_script)
# find sub-pages and clean slugs / links # find sub-pages and clean slugs / links
@ -339,15 +372,30 @@ class Parser():
shutil.copyfile("loconotion.css", self.dist_folder / "loconotion.css"); shutil.copyfile("loconotion.css", self.dist_folder / "loconotion.css");
shutil.copyfile("loconotion.js", self.dist_folder / "loconotion.js"); shutil.copyfile("loconotion.js", self.dist_folder / "loconotion.js");
parser = argparse.ArgumentParser(description='Generate static websites from Notion.so pages')
parser.add_argument('target', help='The config file containing the site properties, or the url of the Notion.so page to generate the site from')
args = parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
try: try:
url = "https://www.notion.so/leoncvlt-f276385bf5ce42969497f0b03aef907e" if urllib.parse.urlparse(args.target).scheme:
output_folder = Path("dist") / get_clean_slug(url, extension = False) try:
parser = Parser(output_folder) response = requests.get(args.target)
parser.run(url) if ("notion.so" in args.target):
# parser.run("https://www.notion.so/A-Notion-Page-03c403f4fdc94cc1b315b9469a8950ef") log.info("Initialising parser with simple page url")
# parser.run("https://www.notion.so/Media-be1a5c3e1c9640a0ab9ba0ba9b67e6a5") Parser({ "page" : args.target })
# parser.run('https://www.notion.so/leoncvlt-f276385bf5ce42969497f0b03aef907e') else:
log.error(f"{args.target} is not a notion.so page")
except requests.ConnectionError as exception:
log.error(f"{args.target} does not seem to be an existing web page")
else:
if Path(args.target).is_file():
with open(args.target) as f:
parsed_config = toml.loads(f.read())
log.info("Initialising parser with configuration file")
Parser(parsed_config)
else:
log.error(f"Config file {args.target} does not exists")
except KeyboardInterrupt: except KeyboardInterrupt:
log.error('Interrupted by user') log.error('Interrupted by user')
try: try:

16
requirements.txt Normal file
View File

@ -0,0 +1,16 @@
beautifulsoup4==4.9.0
certifi==2020.4.5.1
chardet==3.0.4
colorama==0.4.3
commonmark==0.9.1
cssutils==1.0.2
idna==2.9
pprintpp==0.4.0
Pygments==2.6.1
requests==2.23.0
rich==1.1.5
selenium==3.141.0
soupsieve==2.0
toml==0.10.1
typing-extensions==3.7.4.2
urllib3==1.25.9

20
test_site.toml Normal file
View File

@ -0,0 +1,20 @@
name = "Notion Test Site"
page = "https://www.notion.so/A-Notion-Page-03c403f4fdc94cc1b315b9469a8950ef"
[meta]
title = "Loconotion Test Site"
description = "A static site generated from a Notion.so page using Loconotion"
[fonts]
site = 'Roboto'
navbar = ''
title = ''
h1 = 'Nunito'
h2 = 'Nunito'
h3 = 'Nunito'
body = ''
code = ''
[custom]
js = []
css = []