mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
generate sitemap with custom domain in toml
This commit is contained in:
parent
58a118c615
commit
6da501e154
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
# name of the folder that the site will be generated in
|
# name of the folder that the site will be generated in
|
||||||
name = "Notion Test Site"
|
name = "Notion Test Site"
|
||||||
|
domain = "example.com"
|
||||||
|
|
||||||
# the notion.so page to being parsing from. This page will become the index.html
|
# the notion.so page to being parsing from. This page will become the index.html
|
||||||
# of the generated site, and loconotation will parse all sub-pages present on the page
|
# of the generated site, and loconotation will parse all sub-pages present on the page
|
||||||
|
@ -754,6 +754,19 @@ class Parser:
|
|||||||
if sub_page not in self.processed_pages.keys():
|
if sub_page not in self.processed_pages.keys():
|
||||||
self.parse_page(sub_page)
|
self.parse_page(sub_page)
|
||||||
|
|
||||||
|
def export_sitemap(self, domain:str, processed_pages:list):
|
||||||
|
# Open file in dist/sitemap.xml to write sitemap
|
||||||
|
with open(self.dist_folder / "sitemap.xml", "w") as f:
|
||||||
|
# Write XML header
|
||||||
|
f.write('<?xml version="1.0" encoding="UTF-8"?>\r')
|
||||||
|
# Write sitemap index opening tag
|
||||||
|
f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\r')
|
||||||
|
# Write the sitemap from domain and processed pages
|
||||||
|
for page in processed_pages:
|
||||||
|
f.write(f'<url><loc>https://{domain}/{page}</loc></url>\r')
|
||||||
|
# Write sitemap index closing tag
|
||||||
|
f.write("</urlset>")
|
||||||
|
|
||||||
def load(self, url):
|
def load(self, url):
|
||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||||
@ -762,6 +775,8 @@ class Parser:
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
self.processed_pages = {}
|
self.processed_pages = {}
|
||||||
self.parse_page(self.starting_url)
|
self.parse_page(self.starting_url)
|
||||||
|
if self.config.get("domain",None):
|
||||||
|
self.export_sitemap(self.config.get("domain"),list(self.processed_pages.values()))
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
formatted_time = "{:02d}:{:02d}:{:02d}".format(
|
formatted_time = "{:02d}:{:02d}:{:02d}".format(
|
||||||
int(elapsed_time // 3600),
|
int(elapsed_time // 3600),
|
||||||
|
14
loconotion/tests/test_sitemap.py
Normal file
14
loconotion/tests/test_sitemap.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.insert(0, "D:\\Other Projects\\loconotion\\loconotion")
|
||||||
|
|
||||||
|
from modules.notionparser import Parser
|
||||||
|
|
||||||
|
def test_parse_sample_page():
|
||||||
|
config={"page": "https://www.notion.so/Loconotion-Example-Page-03c403f4fdc94cc1b315b9469a8950ef", "domain": "example.com"}
|
||||||
|
args = {"timeout": 10, "single_page": True}
|
||||||
|
parser = Parser(config, args)
|
||||||
|
parser.run()
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_parse_sample_page()
|
Loading…
Reference in New Issue
Block a user