add file organizer to organizer files in separate assets folder

2024-08-30 18:12:12 +00:00 · 2021-08-26 21:19:35 +05:30 · 2021-08-26 21:19:35 +05:30 · b61e4fe01a
commit b61e4fe01a
parent e91bac9954
2 changed files with 77 additions and 7 deletions
--- a/loconotion/fileorganizer.py
+++ b/loconotion/fileorganizer.py
@ -0,0 +1,72 @@
+import logging
+import os
+import platform
+
+log = logging.getLogger(f"loconotion.{__name__}")
+all_files = []
+old_to_new = {}
+sep = os.path.sep
+
+def process_folder():
+    log.info("Processing folder"+ os.getcwd())
+    structure = {'assets'+sep+'images': ['png', 'jpg', 'jpeg','bmp','gif','ico'],
+                 'assets'+sep+'fonts': ['woff','ttf'],
+                 'assets'+sep+'css': ['css'],
+                 'assets'+sep+'js': ['js']}
+    # !! changing this structure, may break other stuff.
+
+    mapping = {}
+    for folder, extensions in structure.items():
+        for ext in extensions:
+            mapping[ext] = folder
+
+    all_files = os.listdir()
+    log.info("Found following files in "+ os.getcwd())
+    log.info(all_files)
+    for file in all_files:
+        ext = file.split('.')[-1]
+        new_parent_dir = mapping.get(ext)
+
+        if new_parent_dir:
+            new_file = os.path.join(new_parent_dir, file)
+
+            if not os.path.isdir(new_parent_dir):
+                os.makedirs(new_parent_dir)
+
+            os.rename(file, new_file)
+            old_to_new[file] = new_file
+            log.info('%s moved to %s', file, new_file)
+
+
+def update_code(file_name, old_to_new):
+    log.info('Updating assets link in ' + file_name)
+    with open(file_name, 'r', encoding = "utf8") as file:
+        content = file.read()
+
+    for old, new in old_to_new.items():
+        if file_name.endswith('.css'):
+            new = new.replace('assets', '..')
+            # relative position of files related to css files
+        content = content.replace(old, new)
+
+    with open(file_name, 'w') as file:
+        file.write(content)
+
+
+def main():
+    process_folder()
+    for file in os.listdir():
+        if file.endswith('.html'):
+            update_code(file, old_to_new)
+    for file in os.listdir('assets'+sep+'css'):
+        if file.endswith('.css'):
+            update_code('assets'+sep+'css'+sep+f'{file}', old_to_new)
+
+
+def organize(dist_folder):
+    os.chdir(dist_folder)
+
+    log.info('Organizing files in assets folder')
+    input(
+        'Organizer will run in ['+ os.getcwd() +'] Are you sure you are in a correct directory  ? \n Press [ENTER] to confirm or Ctrl + C to quit')
+    main()
--- a/loconotion/notionparser.py
+++ b/loconotion/notionparser.py
@ -10,6 +10,7 @@ import mimetypes
 import urllib.parse
 import hashlib
 from pathlib import Path
+from fileorganizer import *

 log = logging.getLogger(f"loconotion.{__name__}")

@ -573,19 +574,15 @@ class Parser:
            "script", type="text/javascript", src=str(loconotion_custom_js)
        )
        soup.body.insert(-1, custom_script)
-        
-        ## extract the custom domain from links of type https://xxxx.notion.site
-        hrefDomain = url.split('notion.site')[0] + 'notion.site'
+
        # find sub-pages and clean slugs / links
        sub_pages = []
        parse_links = not self.get_page_config(url).get("no-links", False)
        for a in soup.find_all('a', href=True):
            sub_page_href = a["href"]
-            
            if sub_page_href.startswith("/"):
-                # spliting the href to avoid creating subfolders in case of links like https://xxxx.notion.site/xxxx
-                sub_page_href = hrefDomain + '/'+ a["href"].split('/')[len(a["href"].split('/'))-1]
-            if sub_page_href.startswith(hrefDomain):
+                sub_page_href = "https://www.notion.so" + a["href"]
+            if sub_page_href.startswith("https://www.notion.so/"):
                if parse_links or not len(a.find_parents("div", class_="notion-scroller")):
                    # if the link is an anchor link,
                    # check if the page hasn't already been parsed
@ -667,6 +664,7 @@ class Parser:
            int(elapsed_time % 60),
            tot_processed_pages,
        )
+        organize(self.dist_folder)
        log.info(
            f"Finished!\n\nProcessed {len(tot_processed_pages)} pages in {formatted_time}"
        )