Do not run Parser on init and split main into subfunctions

Running `Parser` from init makes unit testing impossible.

Splitting `main` into subfunctions makes it more readable.
This commit is contained in:
Alexey Leshchenko 2022-02-18 12:34:37 +03:00
parent be1981b813
commit 45632b8265
2 changed files with 42 additions and 23 deletions

View File

@ -18,7 +18,14 @@ except ModuleNotFoundError as error:
def main(): def main():
# set up argument parser args = get_args()
log = setup_logging(args)
parser = init_parser(args, log)
parser.run()
def get_args():
# set up argument parser and return parsed args
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
description="Generate static websites from Notion.so pages" description="Generate static websites from Notion.so pages"
) )
@ -69,8 +76,10 @@ def main():
argparser.add_argument( argparser.add_argument(
"-v", "--verbose", action="store_true", help="Increase output log verbosity" "-v", "--verbose", action="store_true", help="Increase output log verbosity"
) )
args = argparser.parse_args() return argparser.parse_args()
def setup_logging(args):
# set up some pretty logs # set up some pretty logs
log = logging.getLogger("loconotion") log = logging.getLogger("loconotion")
log.setLevel(logging.INFO if not args.verbose else logging.DEBUG) log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
@ -113,32 +122,41 @@ def main():
except ModuleNotFoundError as identifier: except ModuleNotFoundError as identifier:
pass pass
# initialise and run the website parser return log
def init_parser(args, log):
# initialise the website parser
try: try:
if urllib.parse.urlparse(args.target).scheme: if urllib.parse.urlparse(args.target).scheme:
try: try:
response = requests.get(args.target) requests.get(args.target)
except requests.ConnectionError as exception:
log.critical('Connection error')
if "notion.so" in args.target or "notion.site" in args.target: if "notion.so" in args.target or "notion.site" in args.target:
log.info("Initialising parser with simple page url") log.info("Initialising parser with simple page url")
config = {"page": args.target} config = {"page": args.target}
Parser(config=config, args=vars(args)) parser = Parser(config=config, args=vars(args))
else: else:
log.critical(f"{args.target} is not a notion.so page") log.critical(f"{args.target} is not a notion.so page")
except requests.ConnectionError as exception:
log.critical(f"Connection error") elif Path(args.target).is_file():
else:
if Path(args.target).is_file():
with open(args.target, encoding="utf-8") as f: with open(args.target, encoding="utf-8") as f:
parsed_config = toml.loads(f.read()) parsed_config = toml.loads(f.read())
log.info(f"Initialising parser with configuration file") log.info('Initialising parser with configuration file')
log.debug(parsed_config) log.debug(parsed_config)
Parser(config=parsed_config, args=vars(args)) parser = Parser(config=parsed_config, args=vars(args))
else: else:
log.critical(f"Config file {args.target} does not exists") log.critical(f"Config file {args.target} does not exists")
except FileNotFoundError as e: except FileNotFoundError as e:
log.critical(f"FileNotFoundError: {e}") log.critical(f"FileNotFoundError: {e}")
sys.exit(0) sys.exit(0)
return parser
if __name__ == "__main__": if __name__ == "__main__":
try: try:

View File

@ -77,9 +77,10 @@ class Parser:
# create the output folder if necessary # create the output folder if necessary
self.dist_folder.mkdir(parents=True, exist_ok=True) self.dist_folder.mkdir(parents=True, exist_ok=True)
# initialize chromedriver and start parsing # initialize chromedriver
self.driver = self.init_chromedriver() self.driver = self.init_chromedriver()
self.run(url)
self.starting_url = url
def get_page_config(self, token): def get_page_config(self, token):
# starts by grabbing the gobal site configuration table, if exists # starts by grabbing the gobal site configuration table, if exists
@ -702,10 +703,10 @@ class Parser:
self.driver.get(url) self.driver.get(url)
WebDriverWait(self.driver, 60).until(notion_page_loaded()) WebDriverWait(self.driver, 60).until(notion_page_loaded())
def run(self, url): def run(self):
start_time = time.time() start_time = time.time()
self.processed_pages = {} self.processed_pages = {}
self.parse_page(url) self.parse_page(self.starting_url)
elapsed_time = time.time() - start_time elapsed_time = time.time() - start_time
formatted_time = "{:02d}:{:02d}:{:02d}".format( formatted_time = "{:02d}:{:02d}:{:02d}".format(
int(elapsed_time // 3600), int(elapsed_time // 3600),