From 3ff70de1b20f15740362717025b299a3ac5eec32 Mon Sep 17 00:00:00 2001 From: Leonardo Cavaletti Date: Sun, 17 May 2020 18:11:17 +0100 Subject: [PATCH] Implemented custom elements injection --- example/custom-script.js | 15 +++++ example/favicon-16x16.png | Bin 0 -> 1183 bytes example/favicon-32x32.png | Bin 0 -> 1731 bytes loconotion.css | 2 +- loconotion.py | 117 ++++++++++++++++++++------------------ test_site.toml | 60 ++++++++++++++++--- 6 files changed, 130 insertions(+), 64 deletions(-) create mode 100644 example/custom-script.js create mode 100644 example/favicon-16x16.png create mode 100644 example/favicon-32x32.png diff --git a/example/custom-script.js b/example/custom-script.js new file mode 100644 index 0000000..34f9c79 --- /dev/null +++ b/example/custom-script.js @@ -0,0 +1,15 @@ +console.log(` +Hello! I am running from a script injected on the page by Loconotion 🎉 +This could be an analytics script, real-time chat support script, or anything you want really. +`); +fetch("https://api.quotable.io/random") + .then((data) => { + return data.json(); + }) + .then((response) => { + console.log("Here's a quote for your time:"); + console.log(response.content + " --" + response.author); + }) + .catch((error) => { + console.log(error); + }); diff --git a/example/favicon-16x16.png b/example/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..8af7b3687c16938e0412cfb234a10c4e7b2b1c71 GIT binary patch literal 1183 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstU$g(vPY0F z14ES>14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a>+9Hk`tw?!PiuV`s{aF}1`q+A1Vm^KfCvIfpgaHX=Y`9{#UTu!ED(KK z>kIS6Ra zZ&}8v-0`Yy?(JK%?Y@>|djTE6nB?v5($;g&Iu^*`EbxddW?;}$24TjErS@e&LG}_) zUsv`gtb*LS{C8WPC<2B4d%8G=NL-ffer|s_LB#F->sw2CS4Qb>b?T1l_060dr8s3# zS#EB}-tE8t-Jdis|M{PJ>D~DvwU;qTO9Z6|1tdUN>}pb%~32UVPu% z_pI*P3(wRh-Lhsncuuc#g*6Kwzo%`_&HMN4tU0dOANX|0=vjBO^PvwMJ5Jc2D#)ox zG7_4hwPD)jeHjxHeF7gFM_5>^J7?4_TycZz)+3=;r?ojXB4fQ@?U1{F!^J1}WA2Tc zcW>K!$hTd5^kT;R1DQ?Q=^YFm&&6#fZ)G;uTkm6BT6;ME{Un9^_BKUVE2_*w*6lgL zu|EIhcm8?GpEYmCI~f1|x;E2(fBmkd|F@*uR$JXP?ga*gYKdz^NlIc#s#S7PDv)9@ zGB7gGH89mRG!HQ}v@$WZGBnjTFtRc*$lAUCCyIvL{FKbJO57TxpRt3oa(GopL`hI$ zxk5ovep+TuszOO+L8?M#K}j+LL&coOpLjS5!!$Hb`JX=H`80@uS(#fenOj&{*n6@F zv#^3ogUR6(X64Nx3a4*eIdSC75t$?GryD#Lc0Q4 literal 0 HcmV?d00001 diff --git a/example/favicon-32x32.png b/example/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..2b78e38693d82306007c1814bbbeaf0f8c99966f GIT binary patch literal 1731 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE3?yBabR7dyEa{HEjtmSN`?>!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+0815;3dPlzi}!H2bePTi+~ z02%-P|7RBUYy+mM!zDp}!9bctBYVxw zPk;X2Se2>9Y&Pxw@4J(Xm?S%1K5r7^WHdSWZ;!rSvHkUnF~`^byfmwzaL$!qKUVKQ z5+xCLqWz0#-#612MXXb_#5Ip9sxn4L+N``h@nN;}wYhWto=+C|Q1WY!FUJPwugh4t zo_C1^96GrD;{s!*`XAfe*jApNa3+TL?riVe>>NYU@6oKAJ6<%uFNhpBtIIEGl9 z&Yko8|FP)!t$=)>A!AfJ3LhXYE2PH4g zFfwZA**a6OLFe~|6-yo}Z(>YX!rWq?RpMvjcN9fEFI}(;(A66 zN4$ZYj(t|ttAJn%w-#P>?>&&5B$e1&3WUAz=5>0+f%b=_T(rhSRY~Z)xIAXnCUjD z-01BiLDmJCyT4aRHvHSGa@48F>L&q8Uh?LX63>MCN4g{>csKKGJY(pn$@!^ zn7+(cS#T{!d7f46vm70TRi!L*f;YUX`OCU2;IFI8`=u3iM<(?D%J1m5GUgTguI79R&%eJg>WQ9I!?lYNL4QrW ze%$-ZF1zPSq?`DmhIXdTEw)vEo@9onZ})#wT@}c1uq*1V{i{l~yfx2ucIn5t3B3(p z@9m=Yr6uyddH(Ly{qOEpzl>b4eDCU&itp-v{8)K?@`nu)pMIXco`1jokIvt(zt6`X z{>OgF*WX-pN9Z$PDpD9nO2EggY+|YP;nn#6%tVrlvu7%P?VpRnUkteQdy9ykXcZY%)n4F z=kX^Vj>0evjZ^-o&v-r!VqjM0)=TCVRu=Z2EW#|T;L>1nIE7hxbBMy}8&^&oIdeqj h2>a;>hnG@t+g literal 0 HcmV?d00001 diff --git a/loconotion.css b/loconotion.css index 437752f..bcc226f 100644 --- a/loconotion.css +++ b/loconotion.css @@ -17,7 +17,7 @@ div[role="button"]:not(.notion-record-icon):hover { /* normalizes banner width */ .notion-scroller > div > div:not([class]) { width: 100% !important; - max-width: 900px !important; + max-width: 960px !important; padding-left: 0 !important; padding-right: 0 !important; } diff --git a/loconotion.py b/loconotion.py index f1fa1d9..c3e411f 100644 --- a/loconotion.py +++ b/loconotion.py @@ -34,45 +34,6 @@ def setup_logger(name): log = setup_logger("loconotion-logger") -def download_file(url, destination): - if not Path(destination).is_file(): - # Disabling proxy speeds up requests time - # https://stackoverflow.com/questions/45783655/first-https-request-takes-much-more-time-than-the-rest - # https://stackoverflow.com/questions/28521535/requests-how-to-disable-bypass-proxy - session = requests.Session() - session.trust_env = False - log.info(f"Downloading {url} to {destination}") - response = session.get(url) - Path(destination).parent.mkdir(parents=True, exist_ok=True) - with open(destination, "wb") as f: - f.write(response.content) - else: - log.debug(f"File {destination} was already downloaded") - return destination - -# def rich_download_file(url, destination): -# if not Path(destination).is_file(): -# progress = Progress(auto_refresh = True) -# # Disabling proxy speeds up requests time -# session = requests.Session() -# session.trust_env = False -# Path(destination).parent.mkdir(parents=True, exist_ok=True) -# with open(destination, 'wb') as f: -# response = session.get(url, stream=True) -# total = response.headers.get('content-length') -# task_id = progress.add_task(url) -# if total is None: -# f.write(response.content) -# else: -# progress.update(task_id, total=int(total)) -# for data in response.iter_content(chunk_size=4096): -# f.write(data) -# progress.update(task_id, advance=len(data)) -# progress.update(task_id, completed =int(total)) -# else: -# log.debug(f"File {destination} was already downloaded") -# return destination - # class notion_page_loaded(object): # """An expectation for checking that a notion page has loaded. # """ @@ -135,7 +96,7 @@ class Parser(): if (site_config.get("slug", None)): log.error("'slug' parameter has no effect in the [site] table, and should only present in page tables.") del site_config['slug'] - + # find a table in the configuration file whose key contains the passed token string matching_page_config = [value for key, value in self.config.items() if key.lower() in token] if (matching_page_config): @@ -155,10 +116,10 @@ class Parser(): custom_slug = self.get_page_config(url).get("slug", None) if custom_slug: log.debug(f"Custom slug found for url {url}: {custom_slug}") - return custom_slug.replace('/', '') + (".html" if extension else "") + return custom_slug.strip("/") + (".html" if extension else "") else: # if not, clean up the existing slug - path = urllib.parse.urlparse(url).path.replace('/', '') + path = urllib.parse.urlparse(url).path.strip("/") if ("-" in path and len(path.split("-")) > 1): # a standard notion page looks like the-page-title-[uiid] # strip the uuid and keep the page title only @@ -169,6 +130,24 @@ class Parser(): path = path.split("?")[0].lower() return path + (".html" if extension else "") + def cache_file(self, url, filename = None): + if (not filename): filename = url + destination = self.dist_folder / filename + if not Path(destination).is_file(): + # Disabling proxy speeds up requests time + # https://stackoverflow.com/questions/45783655/first-https-request-takes-much-more-time-than-the-rest + # https://stackoverflow.com/questions/28521535/requests-how-to-disable-bypass-proxy + session = requests.Session() + session.trust_env = False + log.info(f"Downloading '{url}' to '{destination}'") + response = session.get(url) + Path(destination).parent.mkdir(parents=True, exist_ok=True) + with open(destination, "wb") as f: + f.write(response.content) + else: + log.debug(f"File '{destination}' was already downloaded") + return destination + def init_chromedriver(self): log.info("Initialising chrome driver") chrome_options = Options() @@ -272,16 +251,17 @@ class Parser(): if (img['src'].startswith('/')): # notion's images urls are in a weird format, need to sanitize them img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0] - img_src = urllib.parse.unquote(img_src) #TODO + img_src = urllib.parse.unquote(img_src) # generate an hashed id for the image filename based the url, # so we avoid re-downloading images we have already downloaded, # and figure out the filename from the url (I know, just this once) img_extension = Path(urllib.parse.urlparse(img_src).path).suffix + #TODO: unsplash images don't have an extension in the url (they work though) img_name = hashlib.sha1(str.encode(img_src)).hexdigest(); img_file = img_name + img_extension - download_file(img_src, self.dist_folder / img_file) + self.cache_file(img_src, img_file) img['src'] = img_file else: if (img['src'].startswith('/')): @@ -293,8 +273,8 @@ class Parser(): # we don't need the vendors stylesheet if ("vendors~" in link['href']): continue - css_file = link['href'].replace('/', '') - saved_css_file = download_file('https://www.notion.so' + link['href'], self.dist_folder / css_file) + css_file = link['href'].strip("/") + saved_css_file = self.cache_file('https://www.notion.so' + link['href'], css_file) with open(saved_css_file, 'rb') as f: stylesheet = cssutils.parseString(f.read()) # open the stylesheet and check for any font-face rule, @@ -302,7 +282,7 @@ class Parser(): if rule.type == cssutils.css.CSSRule.FONT_FACE_RULE: # if any are found, download the font file font_file = rule.style['src'].split("url(/")[-1].split(") format")[0] - download_file(f'https://www.notion.so/{font_file}', self.dist_folder / font_file) + self.cache_file(f'https://www.notion.so/{font_file}', font_file) link['href'] = css_file # remove scripts and other tags we don't want / need @@ -312,6 +292,8 @@ class Parser(): intercom_div.decompose(); for overlay_div in soup.findAll('div',{'class':'notion-overlay-container'}): overlay_div.decompose(); + for vendors_css in soup.find_all("link", href=lambda x: x and 'vendors~' in x): + vendors_css.decompose(); # add our custom logic to all toggle blocks for toggle_block in soup.findAll('div',{'class':'notion-toggle-block'}): @@ -336,15 +318,16 @@ class Parser(): "body" : ".notion-app-inner", "code" : ".notion-code-block *" } - custom_fonts = self.get_page_config(url).get("fonts", {}) if (custom_fonts): # append a stylesheet importing the google font for each unique font unique_custom_fonts = set(custom_fonts.values()) - for font in unique_custom_fonts: - custom_font_stylesheet = soup.new_tag("link", rel="stylesheet", - href=f"https://fonts.googleapis.com/css2?family={font}:wght@500;600;700&display=swap") - soup.head.append(custom_font_stylesheet); + for font in unique_custom_fonts: + if (font): + google_fonts_embed_name = font.replace(" ", "+") + font_href = f"https://fonts.googleapis.com/css2?family={google_fonts_embed_name}:wght@500;600;700&display=swap" + custom_font_stylesheet = soup.new_tag("link", rel="stylesheet", href=font_href) + soup.head.append(custom_font_stylesheet); # go through each custom font, and add a css rule overriding the font-family # to the font override stylesheet targetting the appropriate selector @@ -356,12 +339,33 @@ class Parser(): site_font = custom_fonts.get("site", None) # process global site font last to more granular settings can override it if (site_font): - log.debug(f"Setting global site font-family to {site_font}") + log.debug(f"Setting global site font-family to {site_font}"), font_override_stylesheet.append(fonts_selectors["site"] + " {font-family:" + site_font + "}") - + # finally append the font overrides stylesheets to the page soup.head.append(font_override_stylesheet) - # append custom stylesheet and script + # inject any custom elements to the page + custom_injects = self.get_page_config(url).get("inject", {}) + def injects_custom_tags(section): + section_custom_injects = custom_injects.get(section, {}) + for tag, elements in section_custom_injects.items(): + for element in elements: + injected_tag = soup.new_tag(tag) + for attr, value in element.items(): + injected_tag[attr] = value + # if the value refers to a file, copy it to the dist folder + if (attr.lower() == "href" or attr.lower() == "src"): + log.debug(f"Copying injected file '{value}'") + source = (Path.cwd() / value.strip("/")) + destination = (self.dist_folder / source.name) + shutil.copyfile(source, destination) + injected_tag[attr] = source.name + log.debug(f"Injecting <{section}> tag: {str(injected_tag)}") + soup.find(section).append(injected_tag) + injects_custom_tags("head") + injects_custom_tags("body") + + # inject loconotion's custom stylesheet and script custom_css = soup.new_tag("link", rel="stylesheet", href="loconotion.css") soup.head.insert(-1, custom_css) custom_script = soup.new_tag("script", type="text/javascript", src="loconotion.js") @@ -421,6 +425,9 @@ if __name__ == '__main__': Parser(parsed_config) else: log.critical(f"Config file {args.target} does not exists") + except FileNotFoundError as e: + log.critical(f'FileNotFoundError: {e}') + sys.exit(0) except KeyboardInterrupt: log.critical('Interrupted by user') try: diff --git a/test_site.toml b/test_site.toml index c2a6b64..8b5d677 100644 --- a/test_site.toml +++ b/test_site.toml @@ -1,28 +1,72 @@ name = "Notion Test Site" +# the notion.so page to being parsing from. This page will become the index.html +# of the generated site, and loconotation will parse all sub-pages present on the page. page = "https://www.notion.so/A-Notion-Page-03c403f4fdc94cc1b315b9469a8950ef" +# this site table defines override settings for the whole site +# later on we will see how to define settings for a single page [site] + ## custom meta tags ## [[site.meta]] name = "title" content = "Loconotion Test Site" - [[site.meta]] name = "description" content = "A static site generated from a Notion.so page using Loconotion" + ## custom site fonts ## + # you can specify the name of a google font to use on the site, use the font embed name + # (if in doubt select a style on fonts.google.com and navigate to the "embed" tag to check the name under CSS rules) + # keys controls the font of the following elements: + # site: changes the font for the whole page (apart from code blocks) but the following settings override it + # navbar: site breadcrumbs on the top-left of the page + # title: page title (under the icon) + # h1 / h2 / h3: heading blocks + # body: non-heading text on the page + # code: text inside code blocks [site.fonts] site = 'Roboto' navbar = '' - title = '' - h1 = 'Nunito' - h2 = 'Nunito' - h3 = 'Nunito' + title = 'Montserrat' + h1 = 'Montserrat' + h2 = 'Montserrat' + h3 = '' body = '' code = '' -[d2fa06f244e64f66880bb0491f58223d] #list page + ## custom element injection ## + # 'head' or 'body' to set where the element will be injected + # the next dotted key represents the tag to inject, with the table values being the the tag attributes + # e.g. the following injects in the + [[site.inject.head.link]] + rel="icon" + sizes="16x16" + type="image/png" + href="/example/favicon-16x16.png" + + # the following injects in the + [[site.inject.body.script]] + type="text/javascript" + src="/example/custom-script.js" + +## individual page settings ## +# while the [site] table applies the settings to all parse pages, +# it's possible to override a single page's setting by defining +# a table named after the page url or part of it. +# +# e.g the following settings will only apply to this parsed page: +# https://www.notion.so/d2fa06f244e64f66880bb0491f58223d +[d2fa06f244e64f66880bb0491f58223d] + ## custom slugs ## + # inside page settings, you can change the url that page will map to with the 'slug' key + # e.g. page "/d2fa06f244e64f66880bb0491f58223d" will now map to "/list" slug = "list" - [[d2fa06f244e64f66880bb0491f58223d.meta]] + [[d2fa06f244e64f66880bb0491f58223d.meta]] + # change the description meta tag for this page only name = "description" - content = "A fullscreen list database page" \ No newline at end of file + content = "A fullscreen list database page, now with a pretty slug" + + [d2fa06f244e64f66880bb0491f58223d.fonts] + # change the title font for this page only + title = 'Nunito' \ No newline at end of file