diff --git a/loconotion/fileorganizer.py b/loconotion/fileorganizer.py new file mode 100644 index 0000000..ac69d7e --- /dev/null +++ b/loconotion/fileorganizer.py @@ -0,0 +1,72 @@ +import logging +import os +import platform + +log = logging.getLogger(f"loconotion.{__name__}") +all_files = [] +old_to_new = {} +sep = os.path.sep + +def process_folder(): + log.info("Processing folder"+ os.getcwd()) + structure = {'assets'+sep+'images': ['png', 'jpg', 'jpeg','bmp','gif','ico'], + 'assets'+sep+'fonts': ['woff','ttf'], + 'assets'+sep+'css': ['css'], + 'assets'+sep+'js': ['js']} + # !! changing this structure, may break other stuff. + + mapping = {} + for folder, extensions in structure.items(): + for ext in extensions: + mapping[ext] = folder + + all_files = os.listdir() + log.info("Found following files in "+ os.getcwd()) + log.info(all_files) + for file in all_files: + ext = file.split('.')[-1] + new_parent_dir = mapping.get(ext) + + if new_parent_dir: + new_file = os.path.join(new_parent_dir, file) + + if not os.path.isdir(new_parent_dir): + os.makedirs(new_parent_dir) + + os.rename(file, new_file) + old_to_new[file] = new_file + log.info('%s moved to %s', file, new_file) + + +def update_code(file_name, old_to_new): + log.info('Updating assets link in ' + file_name) + with open(file_name, 'r', encoding = "utf8") as file: + content = file.read() + + for old, new in old_to_new.items(): + if file_name.endswith('.css'): + new = new.replace('assets', '..') + # relative position of files related to css files + content = content.replace(old, new) + + with open(file_name, 'w') as file: + file.write(content) + + +def main(): + process_folder() + for file in os.listdir(): + if file.endswith('.html'): + update_code(file, old_to_new) + for file in os.listdir('assets'+sep+'css'): + if file.endswith('.css'): + update_code('assets'+sep+'css'+sep+f'{file}', old_to_new) + + +def organize(dist_folder): + os.chdir(dist_folder) + + log.info('Organizing files in assets folder') + input( + 'Organizer will run in ['+ os.getcwd() +'] Are you sure you are in a correct directory ? \n Press [ENTER] to confirm or Ctrl + C to quit') + main() \ No newline at end of file diff --git a/loconotion/notionparser.py b/loconotion/notionparser.py index cb145ef..8cf598c 100644 --- a/loconotion/notionparser.py +++ b/loconotion/notionparser.py @@ -10,6 +10,7 @@ import mimetypes import urllib.parse import hashlib from pathlib import Path +from fileorganizer import * log = logging.getLogger(f"loconotion.{__name__}") @@ -573,19 +574,15 @@ class Parser: "script", type="text/javascript", src=str(loconotion_custom_js) ) soup.body.insert(-1, custom_script) - - ## extract the custom domain from links of type https://xxxx.notion.site - hrefDomain = url.split('notion.site')[0] + 'notion.site' + # find sub-pages and clean slugs / links sub_pages = [] parse_links = not self.get_page_config(url).get("no-links", False) for a in soup.find_all('a', href=True): sub_page_href = a["href"] - if sub_page_href.startswith("/"): - # spliting the href to avoid creating subfolders in case of links like https://xxxx.notion.site/xxxx - sub_page_href = hrefDomain + '/'+ a["href"].split('/')[len(a["href"].split('/'))-1] - if sub_page_href.startswith(hrefDomain): + sub_page_href = "https://www.notion.so" + a["href"] + if sub_page_href.startswith("https://www.notion.so/"): if parse_links or not len(a.find_parents("div", class_="notion-scroller")): # if the link is an anchor link, # check if the page hasn't already been parsed @@ -667,6 +664,7 @@ class Parser: int(elapsed_time % 60), tot_processed_pages, ) + organize(self.dist_folder) log.info( f"Finished!\n\nProcessed {len(tot_processed_pages)} pages in {formatted_time}" )