Merge pull request #75 from meSunnySrivastava/new-url-format

Use custom new Notion url format instead of hardcoded default one
This commit is contained in:
Leonardo Cavaletti 2021-09-07 08:15:21 +01:00 committed by GitHub
commit b9fd73f777
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -574,14 +574,18 @@ class Parser:
)
soup.body.insert(-1, custom_script)
hrefDomain = url.split('notion.site')[0] + 'notion.site'
log.info(f"Got the domain as {hrefDomain}")
# find sub-pages and clean slugs / links
sub_pages = []
parse_links = not self.get_page_config(url).get("no-links", False)
for a in soup.find_all('a', href=True):
sub_page_href = a["href"]
if sub_page_href.startswith("/"):
sub_page_href = "https://www.notion.so" + a["href"]
if sub_page_href.startswith("https://www.notion.so/"):
sub_page_href = hrefDomain + '/'+ a["href"].split('/')[len(a["href"].split('/'))-1]
log.info(f"Got this as href {sub_page_href}")
if sub_page_href.startswith(hrefDomain):
if parse_links or not len(a.find_parents("div", class_="notion-scroller")):
# if the link is an anchor link,
# check if the page hasn't already been parsed