mirror of
https://github.com/leoncvlt/loconotion.git
synced 2024-08-30 18:12:12 +00:00
Do not run Parser
on init and split main
into subfunctions
Running `Parser` from init makes unit testing impossible. Splitting `main` into subfunctions makes it more readable.
This commit is contained in:
parent
be1981b813
commit
45632b8265
@ -18,7 +18,14 @@ except ModuleNotFoundError as error:
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# set up argument parser
|
args = get_args()
|
||||||
|
log = setup_logging(args)
|
||||||
|
parser = init_parser(args, log)
|
||||||
|
parser.run()
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
# set up argument parser and return parsed args
|
||||||
argparser = argparse.ArgumentParser(
|
argparser = argparse.ArgumentParser(
|
||||||
description="Generate static websites from Notion.so pages"
|
description="Generate static websites from Notion.so pages"
|
||||||
)
|
)
|
||||||
@ -69,8 +76,10 @@ def main():
|
|||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"-v", "--verbose", action="store_true", help="Increase output log verbosity"
|
"-v", "--verbose", action="store_true", help="Increase output log verbosity"
|
||||||
)
|
)
|
||||||
args = argparser.parse_args()
|
return argparser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging(args):
|
||||||
# set up some pretty logs
|
# set up some pretty logs
|
||||||
log = logging.getLogger("loconotion")
|
log = logging.getLogger("loconotion")
|
||||||
log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
|
log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
|
||||||
@ -113,32 +122,41 @@ def main():
|
|||||||
except ModuleNotFoundError as identifier:
|
except ModuleNotFoundError as identifier:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# initialise and run the website parser
|
return log
|
||||||
|
|
||||||
|
|
||||||
|
def init_parser(args, log):
|
||||||
|
# initialise the website parser
|
||||||
try:
|
try:
|
||||||
if urllib.parse.urlparse(args.target).scheme:
|
if urllib.parse.urlparse(args.target).scheme:
|
||||||
try:
|
try:
|
||||||
response = requests.get(args.target)
|
requests.get(args.target)
|
||||||
|
except requests.ConnectionError as exception:
|
||||||
|
log.critical('Connection error')
|
||||||
|
|
||||||
if "notion.so" in args.target or "notion.site" in args.target:
|
if "notion.so" in args.target or "notion.site" in args.target:
|
||||||
log.info("Initialising parser with simple page url")
|
log.info("Initialising parser with simple page url")
|
||||||
config = {"page": args.target}
|
config = {"page": args.target}
|
||||||
Parser(config=config, args=vars(args))
|
parser = Parser(config=config, args=vars(args))
|
||||||
else:
|
else:
|
||||||
log.critical(f"{args.target} is not a notion.so page")
|
log.critical(f"{args.target} is not a notion.so page")
|
||||||
except requests.ConnectionError as exception:
|
|
||||||
log.critical(f"Connection error")
|
elif Path(args.target).is_file():
|
||||||
else:
|
|
||||||
if Path(args.target).is_file():
|
|
||||||
with open(args.target, encoding="utf-8") as f:
|
with open(args.target, encoding="utf-8") as f:
|
||||||
parsed_config = toml.loads(f.read())
|
parsed_config = toml.loads(f.read())
|
||||||
log.info(f"Initialising parser with configuration file")
|
log.info('Initialising parser with configuration file')
|
||||||
log.debug(parsed_config)
|
log.debug(parsed_config)
|
||||||
Parser(config=parsed_config, args=vars(args))
|
parser = Parser(config=parsed_config, args=vars(args))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.critical(f"Config file {args.target} does not exists")
|
log.critical(f"Config file {args.target} does not exists")
|
||||||
|
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
log.critical(f"FileNotFoundError: {e}")
|
log.critical(f"FileNotFoundError: {e}")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
|
@ -77,9 +77,10 @@ class Parser:
|
|||||||
# create the output folder if necessary
|
# create the output folder if necessary
|
||||||
self.dist_folder.mkdir(parents=True, exist_ok=True)
|
self.dist_folder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# initialize chromedriver and start parsing
|
# initialize chromedriver
|
||||||
self.driver = self.init_chromedriver()
|
self.driver = self.init_chromedriver()
|
||||||
self.run(url)
|
|
||||||
|
self.starting_url = url
|
||||||
|
|
||||||
def get_page_config(self, token):
|
def get_page_config(self, token):
|
||||||
# starts by grabbing the gobal site configuration table, if exists
|
# starts by grabbing the gobal site configuration table, if exists
|
||||||
@ -702,10 +703,10 @@ class Parser:
|
|||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
WebDriverWait(self.driver, 60).until(notion_page_loaded())
|
||||||
|
|
||||||
def run(self, url):
|
def run(self):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
self.processed_pages = {}
|
self.processed_pages = {}
|
||||||
self.parse_page(url)
|
self.parse_page(self.starting_url)
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
formatted_time = "{:02d}:{:02d}:{:02d}".format(
|
formatted_time = "{:02d}:{:02d}:{:02d}".format(
|
||||||
int(elapsed_time // 3600),
|
int(elapsed_time // 3600),
|
||||||
|
Loading…
Reference in New Issue
Block a user