From 6da501e15475b038e0a9e72956610b5a19c64fae Mon Sep 17 00:00:00 2001 From: kevindaffaarr Date: Fri, 6 Jan 2023 19:33:50 +0700 Subject: [PATCH] generate sitemap with custom domain in toml --- example/example_site.toml | 1 + loconotion/modules/notionparser.py | 15 +++++++++++++++ loconotion/tests/test_sitemap.py | 14 ++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 loconotion/tests/test_sitemap.py diff --git a/example/example_site.toml b/example/example_site.toml index 6be71a7..d4e8165 100644 --- a/example/example_site.toml +++ b/example/example_site.toml @@ -4,6 +4,7 @@ # name of the folder that the site will be generated in name = "Notion Test Site" +domain = "example.com" # the notion.so page to being parsing from. This page will become the index.html # of the generated site, and loconotation will parse all sub-pages present on the page diff --git a/loconotion/modules/notionparser.py b/loconotion/modules/notionparser.py index 4fccf65..c01fecc 100644 --- a/loconotion/modules/notionparser.py +++ b/loconotion/modules/notionparser.py @@ -754,6 +754,19 @@ class Parser: if sub_page not in self.processed_pages.keys(): self.parse_page(sub_page) + def export_sitemap(self, domain:str, processed_pages:list): + # Open file in dist/sitemap.xml to write sitemap + with open(self.dist_folder / "sitemap.xml", "w") as f: + # Write XML header + f.write('\r') + # Write sitemap index opening tag + f.write('\r') + # Write the sitemap from domain and processed pages + for page in processed_pages: + f.write(f'https://{domain}/{page}\r') + # Write sitemap index closing tag + f.write("") + def load(self, url): self.driver.get(url) WebDriverWait(self.driver, 60).until(notion_page_loaded()) @@ -762,6 +775,8 @@ class Parser: start_time = time.time() self.processed_pages = {} self.parse_page(self.starting_url) + if self.config.get("domain",None): + self.export_sitemap(self.config.get("domain"),list(self.processed_pages.values())) elapsed_time = time.time() - start_time formatted_time = "{:02d}:{:02d}:{:02d}".format( int(elapsed_time // 3600), diff --git a/loconotion/tests/test_sitemap.py b/loconotion/tests/test_sitemap.py new file mode 100644 index 0000000..068e86c --- /dev/null +++ b/loconotion/tests/test_sitemap.py @@ -0,0 +1,14 @@ +import sys +sys.path.insert(0, "D:\\Other Projects\\loconotion\\loconotion") + +from modules.notionparser import Parser + +def test_parse_sample_page(): + config={"page": "https://www.notion.so/Loconotion-Example-Page-03c403f4fdc94cc1b315b9469a8950ef", "domain": "example.com"} + args = {"timeout": 10, "single_page": True} + parser = Parser(config, args) + parser.run() + pass + +if __name__ == "__main__": + test_parse_sample_page()