Do not run Parser on init and split main into subfunctions

Running `Parser` from init makes unit testing impossible.

Splitting `main` into subfunctions makes it more readable.
This commit is contained in:
Alexey Leshchenko 2022-02-18 12:34:37 +03:00
parent be1981b813
commit 45632b8265
2 changed files with 42 additions and 23 deletions

View File

@ -18,7 +18,14 @@ except ModuleNotFoundError as error:
def main():
# set up argument parser
args = get_args()
log = setup_logging(args)
parser = init_parser(args, log)
parser.run()
def get_args():
# set up argument parser and return parsed args
argparser = argparse.ArgumentParser(
description="Generate static websites from Notion.so pages"
)
@ -69,8 +76,10 @@ def main():
argparser.add_argument(
"-v", "--verbose", action="store_true", help="Increase output log verbosity"
)
args = argparser.parse_args()
return argparser.parse_args()
def setup_logging(args):
# set up some pretty logs
log = logging.getLogger("loconotion")
log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
@ -113,32 +122,41 @@ def main():
except ModuleNotFoundError as identifier:
pass
# initialise and run the website parser
return log
def init_parser(args, log):
# initialise the website parser
try:
if urllib.parse.urlparse(args.target).scheme:
try:
response = requests.get(args.target)
if "notion.so" in args.target or "notion.site" in args.target:
log.info("Initialising parser with simple page url")
config = {"page": args.target}
Parser(config=config, args=vars(args))
else:
log.critical(f"{args.target} is not a notion.so page")
requests.get(args.target)
except requests.ConnectionError as exception:
log.critical(f"Connection error")
else:
if Path(args.target).is_file():
with open(args.target, encoding="utf-8") as f:
parsed_config = toml.loads(f.read())
log.info(f"Initialising parser with configuration file")
log.debug(parsed_config)
Parser(config=parsed_config, args=vars(args))
log.critical('Connection error')
if "notion.so" in args.target or "notion.site" in args.target:
log.info("Initialising parser with simple page url")
config = {"page": args.target}
parser = Parser(config=config, args=vars(args))
else:
log.critical(f"Config file {args.target} does not exists")
log.critical(f"{args.target} is not a notion.so page")
elif Path(args.target).is_file():
with open(args.target, encoding="utf-8") as f:
parsed_config = toml.loads(f.read())
log.info('Initialising parser with configuration file')
log.debug(parsed_config)
parser = Parser(config=parsed_config, args=vars(args))
else:
log.critical(f"Config file {args.target} does not exists")
except FileNotFoundError as e:
log.critical(f"FileNotFoundError: {e}")
sys.exit(0)
return parser
if __name__ == "__main__":
try:

View File

@ -77,9 +77,10 @@ class Parser:
# create the output folder if necessary
self.dist_folder.mkdir(parents=True, exist_ok=True)
# initialize chromedriver and start parsing
# initialize chromedriver
self.driver = self.init_chromedriver()
self.run(url)
self.starting_url = url
def get_page_config(self, token):
# starts by grabbing the gobal site configuration table, if exists
@ -702,10 +703,10 @@ class Parser:
self.driver.get(url)
WebDriverWait(self.driver, 60).until(notion_page_loaded())
def run(self, url):
def run(self):
start_time = time.time()
self.processed_pages = {}
self.parse_page(url)
self.parse_page(self.starting_url)
elapsed_time = time.time() - start_time
formatted_time = "{:02d}:{:02d}:{:02d}".format(
int(elapsed_time // 3600),