mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Removed rich and enlightened pacakges, set up own custom logger
This commit is contained in:
parent
f293dd63a3
commit
c124806bdb
@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import requests
|
|
||||||
import shutil
|
import shutil
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
@ -8,13 +7,10 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import glob
|
import glob
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from rich.logging import RichHandler
|
|
||||||
from rich.progress import Progress
|
|
||||||
import enlighten
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import hashlib
|
import hashlib
|
||||||
import toml
|
|
||||||
import argparse
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
@ -22,20 +18,14 @@ from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
|||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from pathlib import Path
|
|
||||||
|
import requests
|
||||||
|
import toml
|
||||||
import cssutils
|
import cssutils
|
||||||
cssutils.log.setLevel(logging.CRITICAL) # removes warning logs from cssutils
|
cssutils.log.setLevel(logging.CRITICAL) # removes warning logs from cssutils
|
||||||
|
|
||||||
def setup_logger(name):
|
log = logging.getLogger(__name__)
|
||||||
rich_handler = RichHandler()
|
|
||||||
logger = logging.getLogger(name)
|
|
||||||
logger.addHandler(rich_handler)
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
return logger
|
|
||||||
|
|
||||||
log = setup_logger("loconotion-logger")
|
|
||||||
|
|
||||||
class notion_page_loaded(object):
|
class notion_page_loaded(object):
|
||||||
"""An expectation for checking that a notion page has loaded.
|
"""An expectation for checking that a notion page has loaded.
|
||||||
@ -76,8 +66,9 @@ class toggle_block_has_opened(object):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
class Parser():
|
class Parser():
|
||||||
def __init__(self, config = {}):
|
def __init__(self, config = {}, args = {}):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self.args = args
|
||||||
url = self.config.get("page", None)
|
url = self.config.get("page", None)
|
||||||
if not url:
|
if not url:
|
||||||
log.critical("No initial page url specified. If passing a configuration file," +
|
log.critical("No initial page url specified. If passing a configuration file," +
|
||||||
@ -92,7 +83,7 @@ class Parser():
|
|||||||
log.info(f"Setting output path to {self.dist_folder}")
|
log.info(f"Setting output path to {self.dist_folder}")
|
||||||
|
|
||||||
# check if the argument to clean the dist folder was passed
|
# check if the argument to clean the dist folder was passed
|
||||||
if (self.config.get("--clean", False)):
|
if (self.args.get("clean", False)):
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(self.dist_folder)
|
shutil.rmtree(self.dist_folder)
|
||||||
log.info(f"Removing previously cached files in '{self.dist_folder}'")
|
log.info(f"Removing previously cached files in '{self.dist_folder}'")
|
||||||
@ -155,7 +146,6 @@ class Parser():
|
|||||||
return path + (".html" if extension else "")
|
return path + (".html" if extension else "")
|
||||||
|
|
||||||
def cache_file(self, url, filename = None):
|
def cache_file(self, url, filename = None):
|
||||||
show_progress_bars = False
|
|
||||||
# stringify the url in case it's a Path object
|
# stringify the url in case it's a Path object
|
||||||
url = str(url)
|
url = str(url)
|
||||||
|
|
||||||
@ -178,7 +168,7 @@ class Parser():
|
|||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.trust_env = False
|
session.trust_env = False
|
||||||
log.info(f"Downloading '{url}'")
|
log.info(f"Downloading '{url}'")
|
||||||
response = session.get(url, stream=True)
|
response = session.get(url)
|
||||||
|
|
||||||
# if the filename does not have an extension at this point,
|
# if the filename does not have an extension at this point,
|
||||||
# try to infer it from the url, and if not possible,
|
# try to infer it from the url, and if not possible,
|
||||||
@ -192,16 +182,7 @@ class Parser():
|
|||||||
|
|
||||||
Path(destination).parent.mkdir(parents=True, exist_ok=True)
|
Path(destination).parent.mkdir(parents=True, exist_ok=True)
|
||||||
with open(destination, "wb") as f:
|
with open(destination, "wb") as f:
|
||||||
total = response.headers.get('content-length')
|
|
||||||
if total is None or not show_progress_bars:
|
|
||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
else:
|
|
||||||
progress_manager = enlighten.get_manager()
|
|
||||||
download_progress = progress_manager.counter(total=int(total)//1024, desc='Downloading', unit='mb')
|
|
||||||
for data in response.iter_content(chunk_size=1024):
|
|
||||||
f.write(data)
|
|
||||||
download_progress.update()
|
|
||||||
progress_manager.stop()
|
|
||||||
|
|
||||||
return destination.relative_to(self.dist_folder)
|
return destination.relative_to(self.dist_folder)
|
||||||
# if not, check if it's a local file, and copy it to the dist folder
|
# if not, check if it's a local file, and copy it to the dist folder
|
||||||
@ -469,37 +450,63 @@ class Parser():
|
|||||||
|
|
||||||
def run(self, url):
|
def run(self, url):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
total_processed_pages = self.parse_page(url)
|
total_processed_pages = self.parse_page(url)
|
||||||
|
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
formatted_time = '{:02d}:{:02d}:{:02d}'.format(int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60))
|
formatted_time = '{:02d}:{:02d}:{:02d}'.format(int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60))
|
||||||
log.info(f'Finished!\nヽ( ・‿・)ノ Processed {len(total_processed_pages)} pages in {formatted_time}')
|
log.info(f'Finished!\nヽ( ・‿・)ノ Processed {len(total_processed_pages)} pages in {formatted_time}')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# set up argument parser
|
||||||
parser = argparse.ArgumentParser(description='Generate static websites from Notion.so pages')
|
parser = argparse.ArgumentParser(description='Generate static websites from Notion.so pages')
|
||||||
parser.add_argument('target', help='The config file containing the site properties, or the url of the Notion.so page to generate the site from')
|
parser.add_argument('target', help='The config file containing the site properties, or the url of the Notion.so page to generate the site from')
|
||||||
parser.add_argument('--clean', action='store_true', default=False, help='Delete all previously cached files for the site before generating it')
|
parser.add_argument('--clean', action='store_true', default=False, help='Delete all previously cached files for the site before generating it')
|
||||||
parser.add_argument("-v", "--verbose", action="store_true", help="Shows way more exciting facts in the output")
|
parser.add_argument("-v", "--verbose", action="store_true", help="Shows way more exciting facts in the output")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# set up some pretty logs
|
||||||
|
import colorama
|
||||||
|
import copy
|
||||||
|
|
||||||
|
LOG_COLORS = {
|
||||||
|
logging.DEBUG: colorama.Fore.GREEN,
|
||||||
|
logging.INFO: colorama.Fore.BLUE,
|
||||||
|
logging.WARNING: colorama.Fore.YELLOW,
|
||||||
|
logging.ERROR: colorama.Fore.RED,
|
||||||
|
logging.CRITICAL: colorama.Back.RED
|
||||||
|
}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
class ColorFormatter(logging.Formatter):
|
||||||
if args.verbose:
|
def format(self, record, *args, **kwargs):
|
||||||
log.setLevel(logging.DEBUG)
|
# if the corresponding logger has children, they may receive modified
|
||||||
|
# record, so we want to keep it intact
|
||||||
|
new_record = copy.copy(record)
|
||||||
|
if new_record.levelno in LOG_COLORS:
|
||||||
|
new_record.levelname = "{color_begin}{level}{color_end}".format(
|
||||||
|
level=new_record.levelname,
|
||||||
|
color_begin=LOG_COLORS[new_record.levelno],
|
||||||
|
color_end=colorama.Style.RESET_ALL,
|
||||||
|
)
|
||||||
|
return super(ColorFormatter, self).format(new_record, *args, **kwargs)
|
||||||
|
|
||||||
def extend_configuration_from_args(config):
|
log_screen_handler = logging.StreamHandler(stream=sys.stdout)
|
||||||
if (args.clean): config['--clean'] = True
|
log_screen_handler.setFormatter(ColorFormatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
|
||||||
return config
|
datefmt="{color_begin}[%H:%M:%S]{color_end}".format(
|
||||||
|
color_begin=colorama.Style.DIM,
|
||||||
|
color_end=colorama.Style.RESET_ALL
|
||||||
|
)))
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
|
||||||
|
log.addHandler(log_screen_handler)
|
||||||
|
|
||||||
|
# parse the provided arguments
|
||||||
try:
|
try:
|
||||||
if urllib.parse.urlparse(args.target).scheme:
|
if urllib.parse.urlparse(args.target).scheme:
|
||||||
try:
|
try:
|
||||||
response = requests.get(args.target)
|
response = requests.get(args.target)
|
||||||
if ("notion.so" in args.target):
|
if ("notion.so" in args.target):
|
||||||
log.info("Initialising parser with simple page url")
|
log.info("Initialising parser with simple page url")
|
||||||
config = extend_configuration_from_args({ "page" : args.target })
|
config = { "page" : args.target }
|
||||||
Parser(config)
|
Parser(config = config, args = vars(args))
|
||||||
else:
|
else:
|
||||||
log.critical(f"{args.target} is not a notion.so page")
|
log.critical(f"{args.target} is not a notion.so page")
|
||||||
except requests.ConnectionError as exception:
|
except requests.ConnectionError as exception:
|
||||||
@ -508,10 +515,9 @@ if __name__ == '__main__':
|
|||||||
if Path(args.target).is_file():
|
if Path(args.target).is_file():
|
||||||
with open(args.target) as f:
|
with open(args.target) as f:
|
||||||
parsed_config = toml.loads(f.read())
|
parsed_config = toml.loads(f.read())
|
||||||
parsed_config = extend_configuration_from_args(parsed_config)
|
|
||||||
log.info(f"Initialising parser with configuration file")
|
log.info(f"Initialising parser with configuration file")
|
||||||
log.debug(parsed_config)
|
log.debug(parsed_config)
|
||||||
Parser(parsed_config)
|
Parser(config = parsed_config, args = vars(args))
|
||||||
else:
|
else:
|
||||||
log.critical(f"Config file {args.target} does not exists")
|
log.critical(f"Config file {args.target} does not exists")
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
|
@ -1,16 +1,20 @@
|
|||||||
|
ansicon==1.89.0
|
||||||
beautifulsoup4==4.9.0
|
beautifulsoup4==4.9.0
|
||||||
|
blessed==1.17.5
|
||||||
certifi==2020.4.5.1
|
certifi==2020.4.5.1
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
colorama==0.4.3
|
colorama==0.4.3
|
||||||
commonmark==0.9.1
|
commonmark==0.9.1
|
||||||
cssutils==1.0.2
|
cssutils==1.0.2
|
||||||
idna==2.9
|
idna==2.9
|
||||||
|
jinxed==1.0.0
|
||||||
pprintpp==0.4.0
|
pprintpp==0.4.0
|
||||||
Pygments==2.6.1
|
Pygments==2.6.1
|
||||||
requests==2.23.0
|
requests==2.23.0
|
||||||
rich==1.1.5
|
|
||||||
selenium==3.141.0
|
selenium==3.141.0
|
||||||
|
six==1.14.0
|
||||||
soupsieve==2.0
|
soupsieve==2.0
|
||||||
toml==0.10.1
|
toml==0.10.1
|
||||||
typing-extensions==3.7.4.2
|
typing-extensions==3.7.4.2
|
||||||
urllib3==1.25.9
|
urllib3==1.25.9
|
||||||
|
wcwidth==0.1.9
|
||||||
|
Loading…
Reference in New Issue
Block a user