diff --git a/loconotion/notionparser.py b/loconotion/notionparser.py index 71bc120..2807aea 100644 --- a/loconotion/notionparser.py +++ b/loconotion/notionparser.py @@ -574,14 +574,18 @@ class Parser: ) soup.body.insert(-1, custom_script) + hrefDomain = url.split('notion.site')[0] + 'notion.site' + log.info(f"Got the domain as {hrefDomain}") + # find sub-pages and clean slugs / links sub_pages = [] parse_links = not self.get_page_config(url).get("no-links", False) for a in soup.find_all('a', href=True): sub_page_href = a["href"] if sub_page_href.startswith("/"): - sub_page_href = "https://www.notion.so" + a["href"] - if sub_page_href.startswith("https://www.notion.so/"): + sub_page_href = hrefDomain + '/'+ a["href"].split('/')[len(a["href"].split('/'))-1] + log.info(f"Got this as href {sub_page_href}") + if sub_page_href.startswith(hrefDomain): if parse_links or not len(a.find_parents("div", class_="notion-scroller")): # if the link is an anchor link, # check if the page hasn't already been parsed