From 11c066d6f1169ccc63e1e919e80a1a377cf17072 Mon Sep 17 00:00:00 2001
From: Alexey Leshchenko <leshchenko@gmail.com>
Date: Fri, 18 Feb 2022 17:11:01 +0300
Subject: [PATCH] Fix handling the index URL

---
 loconotion/modules/notionparser.py | 36 ++++++++++++++----------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/loconotion/modules/notionparser.py b/loconotion/modules/notionparser.py
index 7c11592..2069d13 100644
--- a/loconotion/modules/notionparser.py
+++ b/loconotion/modules/notionparser.py
@@ -35,17 +35,19 @@ class Parser:
     def __init__(self, config={}, args={}):
         self.config = config
         self.args = args
-        url = self.config.get("page", None)
-        if not url:
+        index_url = self.config.get("page", None)
+        if not index_url:
             log.critical(
                 "No initial page url specified. If passing a configuration file,"
-                " make sure it contains a 'page' key with the url of the notion.so"
+                " make sure it contains a 'page' key with the url of the notion.site"
                 " page to parse"
             )
             return
 
         # get the site name from the config, or make it up by cleaning the target page's slug
-        site_name = self.config.get("name", self.get_page_slug(url, extension=False))
+        site_name = self.config.get("name", self.get_page_slug(index_url, extension=False))
+
+        self.index_url = index_url
 
         # set the output folder based on the site name
         self.dist_folder = Path(config.get("output", Path("dist") / site_name))
@@ -80,7 +82,7 @@ class Parser:
         # initialize chromedriver
         self.driver = self.init_chromedriver()
 
-        self.starting_url = url
+        self.starting_url = index_url
 
     def get_page_config(self, token):
         # starts by grabbing the gobal site configuration table, if exists
@@ -243,12 +245,11 @@ class Parser:
             options=chrome_options,
         )
 
-    def parse_page(self, url: str, index: str = None):
+    def parse_page(self, url: str):
         """Parse page at url and write it to file, then recursively parse all subpages.
 
         Args:
             url (str): URL of the page to parse.
-            index (str, optional): URL of the index page. Defaults to None.
 
         After the page at `url` has been parsed, calls itself recursively for every subpage
         it has discovered.
@@ -256,9 +257,6 @@ class Parser:
         log.info(f"Parsing page '{url}'")
         log.debug(f"Using page config: {self.get_page_config(url)}")
 
-        if not index:  # if this is the first page being parsed
-            index = url  # set it as the index.html
-
         try:
             self.load_correct_theme(url)
         except TimeoutException:
@@ -294,9 +292,9 @@ class Parser:
         hrefDomain = f'{url.split("notion.site")[0]}notion.site'
         log.info(f"Got the domain as {hrefDomain}")
 
-        subpages = self.find_subpages(url, index, soup, hrefDomain)
-        self.export_parsed_page(url, index, soup)
-        self.parse_subpages(index, subpages)
+        subpages = self.find_subpages(url, soup, hrefDomain)
+        self.export_parsed_page(url, soup)
+        self.parse_subpages(subpages)
 
     def load_correct_theme(self, url):
         self.load(url)
@@ -653,7 +651,7 @@ class Parser:
         )
         soup.body.insert(-1, custom_script)
 
-    def find_subpages(self, url, index, soup, hrefDomain):
+    def find_subpages(self, url, soup, hrefDomain):
         # find sub-pages and clean slugs / links
         subpages = []
         parse_links = not self.get_page_config(url).get("no-links", False)
@@ -687,7 +685,7 @@ class Parser:
                     else:
                         a["href"] = (
                             self.get_page_slug(sub_page_href)
-                            if sub_page_href != index
+                            if sub_page_href != self.index_url
                             else "index.html"
                         )
                     subpages.append(sub_page_href)
@@ -707,10 +705,10 @@ class Parser:
                             child["style"] = style.cssText
         return subpages
 
-    def export_parsed_page(self, url, index, soup):
+    def export_parsed_page(self, url, soup):
         # exports the parsed page
         html_str = str(soup)
-        html_file = self.get_page_slug(url) if url != index else "index.html"
+        html_file = self.get_page_slug(url) if url != self.index_url else "index.html"
         if html_file in self.processed_pages.values():
             log.error(
                 f"Found duplicate pages with slug '{html_file}' - previous one will be"
@@ -722,14 +720,14 @@ class Parser:
             f.write(html_str.encode("utf-8").strip())
         self.processed_pages[url] = html_file
 
-    def parse_subpages(self, index, subpages):
+    def parse_subpages(self, subpages):
         # parse sub-pages
         if subpages and not self.args.get("single_page", False):
             if self.processed_pages:
                 log.debug(f"Pages processed so far: {len(self.processed_pages)}")
             for sub_page in subpages:
                 if sub_page not in self.processed_pages.keys():
-                    self.parse_page(sub_page, index=index)
+                    self.parse_page(sub_page)
 
     def load(self, url):
         self.driver.get(url)