generate sitemap with custom domain in toml

This commit is contained in:
kevindaffaarr 2023-01-06 19:33:50 +07:00
parent 58a118c615
commit 6da501e154
3 changed files with 30 additions and 0 deletions

View File

@ -4,6 +4,7 @@
# name of the folder that the site will be generated in # name of the folder that the site will be generated in
name = "Notion Test Site" name = "Notion Test Site"
domain = "example.com"
# the notion.so page to being parsing from. This page will become the index.html # the notion.so page to being parsing from. This page will become the index.html
# of the generated site, and loconotation will parse all sub-pages present on the page # of the generated site, and loconotation will parse all sub-pages present on the page

View File

@ -754,6 +754,19 @@ class Parser:
if sub_page not in self.processed_pages.keys(): if sub_page not in self.processed_pages.keys():
self.parse_page(sub_page) self.parse_page(sub_page)
def export_sitemap(self, domain:str, processed_pages:list):
# Open file in dist/sitemap.xml to write sitemap
with open(self.dist_folder / "sitemap.xml", "w") as f:
# Write XML header
f.write('<?xml version="1.0" encoding="UTF-8"?>\r')
# Write sitemap index opening tag
f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\r')
# Write the sitemap from domain and processed pages
for page in processed_pages:
f.write(f'<url><loc>https://{domain}/{page}</loc></url>\r')
# Write sitemap index closing tag
f.write("</urlset>")
def load(self, url): def load(self, url):
self.driver.get(url) self.driver.get(url)
WebDriverWait(self.driver, 60).until(notion_page_loaded()) WebDriverWait(self.driver, 60).until(notion_page_loaded())
@ -762,6 +775,8 @@ class Parser:
start_time = time.time() start_time = time.time()
self.processed_pages = {} self.processed_pages = {}
self.parse_page(self.starting_url) self.parse_page(self.starting_url)
if self.config.get("domain",None):
self.export_sitemap(self.config.get("domain"),list(self.processed_pages.values()))
elapsed_time = time.time() - start_time elapsed_time = time.time() - start_time
formatted_time = "{:02d}:{:02d}:{:02d}".format( formatted_time = "{:02d}:{:02d}:{:02d}".format(
int(elapsed_time // 3600), int(elapsed_time // 3600),

View File

@ -0,0 +1,14 @@
import sys
sys.path.insert(0, "D:\\Other Projects\\loconotion\\loconotion")
from modules.notionparser import Parser
def test_parse_sample_page():
config={"page": "https://www.notion.so/Loconotion-Example-Page-03c403f4fdc94cc1b315b9469a8950ef", "domain": "example.com"}
args = {"timeout": 10, "single_page": True}
parser = Parser(config, args)
parser.run()
pass
if __name__ == "__main__":
test_parse_sample_page()