Set up poetry and black formatter

This commit is contained in:
Leonardo Cavaletti 2020-05-24 17:13:48 +01:00
parent bd76bc3089
commit 7e5a11cb6a
8 changed files with 1098 additions and 573 deletions

9
.gitignore vendored
View File

@ -108,10 +108,7 @@ dmypy.json
.pyre/
# End of https://www.gitignore.io/api/python
.env
.vscode
env
dist/*
test/*
logs/*
*.bat
temp
logs

View File

@ -40,11 +40,13 @@ It does, but I wasn't really happy with the styling - the pages looked a bit ugl
## Installation & Requirements
`pip install -r requirements.txt`
Make sure you're in your virtual environment of choiche, then run
- `poetry install --no-dev` if you have [Poetry](https://python-poetry.org/) installed
- `pip install -r requirements.txt` otherwise
This script uses [ChromeDriver](chromedriver.chromium.org) to automate the Google Chrome browser - therefore Google Chrome needs to be installed in order to work.
The script comes bundled with the default windows chromedriver executable. On Max / Linux, download the right distribution for you from https://chromedriver.chromium.org/downloads and place the executable in this folder. Alternatively, use the `--chromedriver` argument to specify its path at runtime.
The script will automatically try to download and use the appropriate chromedriver distribution for your OS and Chrome version. If this doesn't work, download the right version for you from https://chromedriver.chromium.org/downloads and use the `--chromedriver` argument to specify its path at runtime.
## Simple Usage

View File

@ -16,15 +16,37 @@ except ModuleNotFoundError as error:
from notionparser import Parser
def main():
# set up argument parser
argparser = argparse.ArgumentParser(description='Generate static websites from Notion.so pages')
argparser.add_argument('target', help='The config file containing the site properties, or the url of the Notion.so page to generate the site from')
argparser.add_argument('--chromedriver', help='Use a specific chromedriver executable instead of the auto-installing one')
argparser.add_argument("--single-page", action="store_true", help="Only parse the first page, then stop")
argparser.add_argument('--clean', action='store_true', help='Delete all previously cached files for the site before generating it')
argparser.add_argument('--non-headless', action='store_true', help='Run chromedriver in non-headless mode')
argparser.add_argument("-v", "--verbose", action="store_true", help="Increasite output log verbosity")
argparser = argparse.ArgumentParser(
description="Generate static websites from Notion.so pages"
)
argparser.add_argument(
"target",
help="The config file containing the site properties, or the url"
" of the Notion.so page to generate the site from",
)
argparser.add_argument(
"--chromedriver",
help="Use a specific chromedriver executable instead of the auto-installing one",
)
argparser.add_argument(
"--single-page", action="store_true", help="Only parse the first page, then stop"
)
argparser.add_argument(
"--clean",
action="store_true",
help="Delete all previously cached files for the site before generating it",
)
argparser.add_argument(
"--non-headless",
action="store_true",
help="Run chromedriver in non-headless mode",
)
argparser.add_argument(
"-v", "--verbose", action="store_true", help="Increasite output log verbosity"
)
args = argparser.parse_args()
# set up some pretty logs
@ -41,7 +63,7 @@ def main():
logging.INFO: colorama.Fore.BLUE,
logging.WARNING: colorama.Fore.YELLOW,
logging.ERROR: colorama.Fore.RED,
logging.CRITICAL: colorama.Back.RED
logging.CRITICAL: colorama.Back.RED,
}
class ColorFormatter(logging.Formatter):
@ -57,11 +79,14 @@ def main():
)
return super(ColorFormatter, self).format(new_record, *args, **kwargs)
log_screen_handler.setFormatter(ColorFormatter(fmt='%(asctime)s %(levelname)-8s %(message)s',
log_screen_handler.setFormatter(
ColorFormatter(
fmt="%(asctime)s %(levelname)-8s %(message)s",
datefmt="{color_begin}[%H:%M:%S]{color_end}".format(
color_begin=colorama.Style.DIM,
color_end=colorama.Style.RESET_ALL
)))
color_begin=colorama.Style.DIM, color_end=colorama.Style.RESET_ALL
),
)
)
except ModuleNotFoundError as identifier:
pass
@ -70,7 +95,7 @@ def main():
if urllib.parse.urlparse(args.target).scheme:
try:
response = requests.get(args.target)
if ("notion.so" in args.target):
if "notion.so" in args.target:
log.info("Initialising parser with simple page url")
config = {"page": args.target}
Parser(config=config, args=vars(args))
@ -88,14 +113,15 @@ def main():
else:
log.critical(f"Config file {args.target} does not exists")
except FileNotFoundError as e:
log.critical(f'FileNotFoundError: {e}')
log.critical(f"FileNotFoundError: {e}")
sys.exit(0)
if __name__ == '__main__':
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
log.critical('Interrupted by user')
log.critical("Interrupted by user")
try:
sys.exit(0)
except SystemExit:

View File

@ -2,38 +2,52 @@ import logging
log = logging.getLogger(f"loconotion.{__name__}")
class notion_page_loaded(object):
"""An expectation for checking that a notion page has loaded.
"""
"""An expectation for checking that a notion page has loaded."""
def __init__(self, url):
self.url = url
def __call__(self, driver):
notion_presence = len(driver.find_elements_by_class_name("notion-presence-container"))
collection_view_block = len(driver.find_elements_by_class_name("notion-collection_view_page-block"));
collection_search = len(driver.find_elements_by_class_name("collectionSearch"));
notion_presence = len(
driver.find_elements_by_class_name("notion-presence-container")
)
collection_view_block = len(
driver.find_elements_by_class_name("notion-collection_view_page-block")
)
collection_search = len(driver.find_elements_by_class_name("collectionSearch"))
# embed_ghosts = len(driver.find_elements_by_css_selector("div[embed-ghost]"));
log.debug(f"Waiting for page content to load (presence container: {notion_presence}, loaders: {loading_spinners} )")
if (notion_presence and not loading_spinners):
log.debug(
f"Waiting for page content to load"
f" (presence container: {notion_presence}, loaders: {loading_spinners} )"
)
if notion_presence and not loading_spinners:
return True
else:
return False
class toggle_block_has_opened(object):
"""An expectation for checking that a notion toggle block has been opened.
It does so by checking if the div hosting the content has enough children,
and the abscence of the loading spinner.
"""
and the abscence of the loading spinner."""
def __init__(self, toggle_block):
self.toggle_block = toggle_block
def __call__(self, driver):
toggle_content = self.toggle_block.find_element_by_css_selector("div:not([style]")
if (toggle_content):
if toggle_content:
content_children = len(toggle_content.find_elements_by_tag_name("div"))
is_loading = len(self.toggle_block.find_elements_by_class_name("loading-spinner"));
log.debug(f"Waiting for toggle block to load ({content_children} children so far and {is_loading} loaders)")
if (content_children > 3 and not is_loading):
is_loading = len(
self.toggle_block.find_elements_by_class_name("loading-spinner")
)
log.debug(
f"Waiting for toggle block to load"
f" ({content_children} children so far and {is_loading} loaders)"
)
if content_children > 3 and not is_loading:
return True
else:
return False

View File

@ -25,6 +25,7 @@ try:
from bs4 import BeautifulSoup
import requests
import cssutils
cssutils.log.setLevel(logging.CRITICAL) # removes warning logs from cssutils
except ModuleNotFoundError as error:
log.critical(f"ModuleNotFoundError: {error}. have your installed the requirements?")
@ -32,14 +33,18 @@ except ModuleNotFoundError as error:
from conditions import toggle_block_has_opened
class Parser():
class Parser:
def __init__(self, config={}, args={}):
self.config = config
self.args = args
url = self.config.get("page", None)
if not url:
log.critical("No initial page url specified. If passing a configuration file," +
"make sure it contains a 'page' key with the url of the notion.so page to parse")
log.critical(
"No initial page url specified. If passing a configuration file,"
" make sure it contains a 'page' key with the url of the notion.so"
" page to parse"
)
return
# get the site name from the config, or make it up by cleaning the target page's slug
@ -50,7 +55,7 @@ class Parser():
log.info(f"Setting output path to '{self.dist_folder}'")
# check if the argument to clean the dist folder was passed
if (self.args.get("clean", False)):
if self.args.get("clean", False):
try:
shutil.rmtree(self.dist_folder)
log.info(f"Removing previously cached files in '{self.dist_folder}'")
@ -69,25 +74,36 @@ class Parser():
site_config = self.config.get("site", {})
# check if there's anything wrong with the site config
if (site_config.get("slug", None)):
log.error("'slug' parameter has no effect in the [site] table, and should only present in page tables.")
del site_config['slug']
if site_config.get("slug", None):
log.error(
"'slug' parameter has no effect in the [site] table, "
"and should only present in page tables."
)
del site_config["slug"]
# find a table in the configuration file whose key contains the passed token string
site_pages_config = self.config.get("pages", {})
matching_pages_config = [value for key, value in site_pages_config.items() if key.lower() in token]
if (matching_pages_config):
if (len(matching_pages_config) > 1):
log.error(f"multiple matching page config tokens found for {token} in configuration file. Make sure pages urls / slugs are unique")
matching_pages_config = [
value for key, value in site_pages_config.items() if key.lower() in token
]
if matching_pages_config:
if len(matching_pages_config) > 1:
log.error(
f"multiple matching page config tokens found for {token}"
" in configuration file. Make sure pages urls / slugs are unique"
)
return site_config
else:
# if found, merge it on top of the global site configuration table
# log.debug(f"Config table found for page with token {token}")
matching_page_config = matching_pages_config[0]
if (type(matching_page_config) is dict):
if type(matching_page_config) is dict:
return {**site_config, **matching_page_config}
else:
log.error(f"Matching page configuration for {url} was not a dict: {matching_page_config} - something went wrong")
log.error(
f"Matching page configuration for {url} was not a dict:"
f" {matching_page_config} - something went wrong"
)
return site_config
else:
# log.debug(f"No config table found for page token {token}, using global site config table")
@ -102,11 +118,11 @@ class Parser():
else:
# if not, clean up the existing slug
path = urllib.parse.urlparse(url).path.strip("/")
if ("-" in path and len(path.split("-")) > 1):
if "-" in path and len(path.split("-")) > 1:
# a standard notion page looks like the-page-title-[uiid]
# strip the uuid and keep the page title only
path = "-".join(path.split("-")[:-1]).lower()
elif ("?" in path):
elif "?" in path:
# database pages just have an uiid and a query param
# not much to do here, just get rid of the query param
path = path.split("?")[0].lower()
@ -118,19 +134,19 @@ class Parser():
# if no filename specificed, generate an hashed id based the query-less url,
# so we avoid re-downloading / caching files we already have
if (not filename):
if not filename:
parsed_url = urllib.parse.urlparse(url)
queryless_url = parsed_url.netloc + parsed_url.path
query_params = urllib.parse.parse_qs(parsed_url.query)
# if any of the query params contains a size parameters store it in the has
# so we can download other higher-resolution versions if needed
if ("width" in query_params.keys()):
if "width" in query_params.keys():
queryless_url = queryless_url + f"?width={query_params['width']}"
filename = hashlib.sha1(str.encode(queryless_url)).hexdigest();
filename = hashlib.sha1(str.encode(queryless_url)).hexdigest()
destination = self.dist_folder / filename
# check if there are any files matching the filename, ignoring extension
matching_file = glob.glob(str(destination.with_suffix('.*')))
matching_file = glob.glob(str(destination.with_suffix(".*")))
if not matching_file:
# if url has a network scheme, download the file
if "http" in urllib.parse.urlparse(url).scheme:
@ -146,11 +162,11 @@ class Parser():
# if the filename does not have an extension at this point,
# try to infer it from the url, and if not possible,
# from the content-type header mimetype
if (not destination.suffix):
if not destination.suffix:
file_extension = Path(urllib.parse.urlparse(url).path).suffix
if (not file_extension):
content_type = response.headers.get('content-type')
if (content_type):
if not file_extension:
content_type = response.headers.get("content-type")
if content_type:
file_extension = mimetypes.guess_extension(content_type)
destination = destination.with_suffix(file_extension)
@ -177,51 +193,65 @@ class Parser():
def init_chromedriver(self):
chromedriver_path = self.args.get("chromedriver")
if (not chromedriver_path):
if not chromedriver_path:
try:
chromedriver_path = chromedriver_autoinstaller.install()
except Exception as exception:
log.critical(f"Failed to install the built-in chromedriver: {exception}\n" +
"download the correct version for your system at https://chromedriver.chromium.org/downloads" +
"and use the --chromedriver argument to point to the chromedriver executable")
log.critical(
f"Failed to install the built-in chromedriver: {exception}\n"
"\nDownload the correct version for your system at"
" https://chromedriver.chromium.org/downloads and use the"
" --chromedriver argument to point to the chromedriver executable"
)
sys.exit()
log.info(f"Initialising chromedriver at {chromedriver_path}")
logs_path = (Path.cwd() / "logs" / "webdrive.log")
logs_path = Path.cwd() / "logs" / "webdrive.log"
logs_path.parent.mkdir(parents=True, exist_ok=True)
chrome_options = Options()
if (not self.args.get("non_headless", False)):
if not self.args.get("non_headless", False):
chrome_options.add_argument("--headless")
chrome_options.add_argument("window-size=1920,1080")
chrome_options.add_argument("--log-level=3");
chrome_options.add_argument("--silent");
chrome_options.add_argument("--log-level=3")
chrome_options.add_argument("--silent")
chrome_options.add_argument("--disable-logging")
# removes the 'DevTools listening' log message
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
return webdriver.Chrome(
executable_path=str(chromedriver_path),
service_log_path=str(logs_path),
options=chrome_options)
options=chrome_options,
)
def parse_page(self, url, processed_pages={}, index=None):
# if this is the first page being parse, set it as the index.html
if (not index):
index = url;
if not index:
index = url
log.info(f"Parsing page '{url}'")
log.debug(f"Using page config: {self.get_page_config(url)}")
self.driver.get(url)
# if ("This content does not exist" in self.driver.page_source):
# log.error(f"No content found in {url}. Are you sure the page is set to public?")
# if "This content does not exist" in self.driver.page_source:
# log.error(
# f"No content found in {url}."
# " Are you sure the page is set to public?"
# )
# return
try:
# WebDriverWait(self.driver, 10).until(notion_page_loaded())
WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'notion-presence-container')))
WebDriverWait(self.driver, 10).until(
EC.presence_of_element_located(
(By.CLASS_NAME, "notion-presence-container")
)
)
except TimeoutException as ex:
log.critical("Timeout waiting for page content to load, or no content found. Are you sure the page is set to public?")
log.critical(
"Timeout waiting for page content to load, or no content found."
" Are you sure the page is set to public?"
)
return
# cooldown to allow eventual database items to load
@ -231,185 +261,254 @@ class Parser():
# function to expand all the toggle block in the page to make their content visible
# so we can hook up our custom toggle logic afterwards
def open_toggle_blocks(exclude=[]):
opened_toggles = exclude;
opened_toggles = exclude
toggle_blocks = self.driver.find_elements_by_class_name("notion-toggle-block")
log.debug(f"Opening {len(toggle_blocks)} new toggle blocks in the page")
for toggle_block in toggle_blocks:
if (not toggle_block in opened_toggles):
toggle_button = toggle_block.find_element_by_css_selector("div[role=button]")
if not toggle_block in opened_toggles:
toggle_button = toggle_block.find_element_by_css_selector(
"div[role=button]"
)
# check if the toggle is already open by the direction of its arrow
is_toggled = "(180deg)" in (toggle_button.find_element_by_tag_name("svg").get_attribute("style"))
if (not is_toggled):
is_toggled = "(180deg)" in (
toggle_button.find_element_by_tag_name("svg").get_attribute(
"style"
)
)
if not is_toggled:
# click on it, then wait until all elements are displayed
toggle_button.click()
try:
WebDriverWait(self.driver, 10).until(toggle_block_has_opened(toggle_block))
WebDriverWait(self.driver, 10).until(
toggle_block_has_opened(toggle_block)
)
except TimeoutException as ex:
log.warning("Timeout waiting for toggle block to open. Likely it's already open, but doesn't hurt to check.")
log.warning(
"Timeout waiting for toggle block to open."
" Likely it's already open, but doesn't hurt to check."
)
except Exception as ex:
log.error("Something went wrong with selenium while trying to open a toggle block")
log.error(
"Something went wrong while trying to open a toggle block"
)
opened_toggles.append(toggle_block)
# after all toggles have been opened, check the page again to see if
# any toggle block had nested toggle blocks inside them
new_toggle_blocks = self.driver.find_elements_by_class_name("notion-toggle-block")
if (len(new_toggle_blocks) > len(toggle_blocks)):
new_toggle_blocks = self.driver.find_elements_by_class_name(
"notion-toggle-block"
)
if len(new_toggle_blocks) > len(toggle_blocks):
# if so, run the function again
open_toggle_blocks(opened_toggles)
# open the toggle blocks in the page
open_toggle_blocks()
# creates soup from the page to start parsing
soup = BeautifulSoup(self.driver.page_source, "html.parser")
# remove scripts and other tags we don't want / need
for unwanted in soup.findAll('script'):
unwanted.decompose();
for intercom_frame in soup.findAll('div',{'id':'intercom-frame'}):
intercom_frame.decompose();
for intercom_div in soup.findAll('div',{'class':'intercom-lightweight-app'}):
intercom_div.decompose();
for overlay_div in soup.findAll('div',{'class':'notion-overlay-container'}):
overlay_div.decompose();
for vendors_css in soup.find_all("link", href=lambda x: x and 'vendors~' in x):
vendors_css.decompose();
for unwanted in soup.findAll("script"):
unwanted.decompose()
for intercom_frame in soup.findAll("div", {"id": "intercom-frame"}):
intercom_frame.decompose()
for intercom_div in soup.findAll("div", {"class": "intercom-lightweight-app"}):
intercom_div.decompose()
for overlay_div in soup.findAll("div", {"class": "notion-overlay-container"}):
overlay_div.decompose()
for vendors_css in soup.find_all("link", href=lambda x: x and "vendors~" in x):
vendors_css.decompose()
# clean up the default notion meta tags
for tag in ["description", "twitter:card", "twitter:site", "twitter:title", "twitter:description", "twitter:image", "twitter:url", "apple-itunes-app"]:
for tag in [
"description",
"twitter:card",
"twitter:site",
"twitter:title",
"twitter:description",
"twitter:image",
"twitter:url",
"apple-itunes-app",
]:
unwanted_tag = soup.find("meta", attrs={"name": tag})
if (unwanted_tag): unwanted_tag.decompose();
for tag in ["og:site_name", "og:type", "og:url", "og:title", "og:description", "og:image"]:
if unwanted_tag:
unwanted_tag.decompose()
for tag in [
"og:site_name",
"og:type",
"og:url",
"og:title",
"og:description",
"og:image",
]:
unwanted_og_tag = soup.find("meta", attrs={"property": tag})
if (unwanted_og_tag): unwanted_og_tag.decompose();
if unwanted_og_tag:
unwanted_og_tag.decompose()
# set custom meta tags
custom_meta_tags = self.get_page_config(url).get("meta", [])
for custom_meta_tag in custom_meta_tags:
tag = soup.new_tag('meta')
tag = soup.new_tag("meta")
for attr, value in custom_meta_tag.items():
tag.attrs[attr] = value
log.debug(f"Adding meta tag {str(tag)}")
soup.head.append(tag)
# process images
cache_images = True
for img in soup.findAll('img'):
if img.has_attr('src'):
if (cache_images and not 'data:image' in img['src']):
img_src = img['src']
for img in soup.findAll("img"):
if img.has_attr("src"):
if cache_images and not "data:image" in img["src"]:
img_src = img["src"]
# if the path starts with /, it's one of notion's predefined images
if (img['src'].startswith('/')):
img_src = "https://www.notion.so" + img['src']
if img["src"].startswith("/"):
img_src = "https://www.notion.so" + img["src"]
# notion's own default images urls are in a weird format, need to sanitize them
# img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0]
# if (not '.amazonaws' in img_src):
# img_src = urllib.parse.unquote(img_src)
cached_image = self.cache_file(img_src)
img['src'] = cached_image
img["src"] = cached_image
else:
if (img['src'].startswith('/')):
img['src'] = "https://www.notion.so" + img['src']
if img["src"].startswith("/"):
img["src"] = "https://www.notion.so" + img["src"]
# process stylesheets
for link in soup.findAll('link', rel="stylesheet"):
if link.has_attr('href') and link['href'].startswith('/'):
for link in soup.findAll("link", rel="stylesheet"):
if link.has_attr("href") and link["href"].startswith("/"):
# we don't need the vendors stylesheet
if ("vendors~" in link['href']):
if "vendors~" in link["href"]:
continue
# css_file = link['href'].strip("/")
cached_css_file = self.cache_file('https://www.notion.so' + link['href'])
with open(self.dist_folder / cached_css_file, 'rb') as f:
cached_css_file = self.cache_file("https://www.notion.so" + link["href"])
with open(self.dist_folder / cached_css_file, "rb") as f:
stylesheet = cssutils.parseString(f.read())
# open the stylesheet and check for any font-face rule,
for rule in stylesheet.cssRules:
if rule.type == cssutils.css.CSSRule.FONT_FACE_RULE:
# if any are found, download the font file
font_file = rule.style['src'].split("url(/")[-1].split(") format")[0]
cached_font_file = self.cache_file(f'https://www.notion.so/{font_file}')
rule.style['src'] = f"url({str(cached_font_file)})"
link['href'] = str(cached_css_file)
font_file = (
rule.style["src"].split("url(/")[-1].split(") format")[0]
)
cached_font_file = self.cache_file(
f"https://www.notion.so/{font_file}"
)
rule.style["src"] = f"url({str(cached_font_file)})"
link["href"] = str(cached_css_file)
# add our custom logic to all toggle blocks
for toggle_block in soup.findAll('div',{'class':'notion-toggle-block'}):
for toggle_block in soup.findAll("div", {"class": "notion-toggle-block"}):
toggle_id = uuid.uuid4()
toggle_button = toggle_block.select_one('div[role=button]')
toggle_content = toggle_block.find('div', {'class': None, 'style': ''})
if (toggle_button and toggle_content):
# add a custom class to the toggle button and content, plus a custom attribute
# sharing a unique uiid so we can hook them up with some custom js logic later
toggle_button['class'] = toggle_block.get('class', []) + ['loconotion-toggle-button']
toggle_content['class'] = toggle_content.get('class', []) + ['loconotion-toggle-content']
toggle_content.attrs['loconotion-toggle-id'] = toggle_button.attrs['loconotion-toggle-id'] = toggle_id
toggle_button = toggle_block.select_one("div[role=button]")
toggle_content = toggle_block.find("div", {"class": None, "style": ""})
if toggle_button and toggle_content:
# add a custom class to the toggle button and content,
# plus a custom attribute sharing a unique uiid so
# we can hook them up with some custom js logic later
toggle_button["class"] = toggle_block.get("class", []) + [
"loconotion-toggle-button"
]
toggle_content["class"] = toggle_content.get("class", []) + [
"loconotion-toggle-content"
]
toggle_content.attrs["loconotion-toggle-id"] = toggle_button.attrs[
"loconotion-toggle-id"
] = toggle_id
# if there are any table views in the page, add links to the title rows
for table_view in soup.findAll('div', {'class':'notion-table-view'}):
for table_row in table_view.findAll('div', {'class':'notion-collection-item'}):
for table_view in soup.findAll("div", {"class": "notion-table-view"}):
for table_row in table_view.findAll(
"div", {"class": "notion-collection-item"}
):
# for each row, hover the mouse over it to make the open button appear,
# then grab its href and wrap the table row's name into a link
table_row_block_id = table_row['data-block-id']
table_row_hover_target = self.driver.find_element_by_css_selector(f"div[data-block-id='{table_row_block_id}'] > div > div")
# need to scroll the row into view or else the open button won't visible to selenium
self.driver.execute_script("arguments[0].scrollIntoView();", table_row_hover_target)
ActionChains(self.driver).move_to_element(table_row_hover_target).perform()
table_row_block_id = table_row["data-block-id"]
table_row_hover_target = self.driver.find_element_by_css_selector(
f"div[data-block-id='{table_row_block_id}'] > div > div"
)
# need to scroll the row into view or else
# the open button won't visible to selenium
self.driver.execute_script(
"arguments[0].scrollIntoView();", table_row_hover_target
)
ActionChains(self.driver).move_to_element(
table_row_hover_target
).perform()
try:
WebDriverWait(self.driver, 5).until(EC.visibility_of_element_located(
(By.CSS_SELECTOR, f"div[data-block-id='{table_row_block_id}'] > div > a")))
WebDriverWait(self.driver, 5).until(
EC.visibility_of_element_located(
(
By.CSS_SELECTOR,
f"div[data-block-id='{table_row_block_id}'] > div > a",
)
)
)
except TimeoutException as ex:
log.error(f"Timeout waiting for the 'open' button for row in table with block id {table_row_block_id}")
table_row_href = self.driver.find_element_by_css_selector(f"div[data-block-id='{table_row_block_id}'] > div > a").get_attribute('href')
log.error(
f"Timeout waiting for the 'open' button to appear for"
f" row in table with block id {table_row_block_id}"
)
table_row_href = self.driver.find_element_by_css_selector(
f"div[data-block-id='{table_row_block_id}'] > div > a"
).get_attribute("href")
table_row_href = table_row_href.split("notion.so")[-1]
row_target_span = table_row.find("span")
row_link_wrapper = soup.new_tag('a', attrs={'href': table_row_href, 'style':"cursor: pointer;"})
row_link_wrapper = soup.new_tag(
"a", attrs={"href": table_row_href, "style": "cursor: pointer;"}
)
row_target_span.wrap(row_link_wrapper)
# embed custom google font(s)
fonts_selectors = {
"site": "div:not(.notion-code-block)",
"navbar": ".notion-topbar div",
"title": ".notion-page-block > div, .notion-collection_view_page-block > div",
"h1" : ".notion-header-block div, notion-page-content > notion-collection_view-block > div:first-child div",
"h1": ".notion-header-block div, notion-page-content >"
" notion-collection_view-block > div:first-child div",
"h2": ".notion-sub_header-block div",
"h3": ".notion-sub_sub_header-block div",
"body": ".notion-app-inner",
"code": ".notion-code-block *",
}
custom_fonts = self.get_page_config(url).get("fonts", {})
if (custom_fonts):
if custom_fonts:
# append a stylesheet importing the google font for each unique font
unique_custom_fonts = set(custom_fonts.values())
for font in unique_custom_fonts:
if (font):
if font:
google_fonts_embed_name = font.replace(" ", "+")
font_href = f"https://fonts.googleapis.com/css2?family={google_fonts_embed_name}:wght@500;600;700&display=swap"
custom_font_stylesheet = soup.new_tag("link", rel="stylesheet", href=font_href)
soup.head.append(custom_font_stylesheet);
custom_font_stylesheet = soup.new_tag(
"link", rel="stylesheet", href=font_href
)
soup.head.append(custom_font_stylesheet)
# go through each custom font, and add a css rule overriding the font-family
# to the font override stylesheet targetting the appropriate selector
font_override_stylesheet = soup.new_tag('style', type='text/css')
font_override_stylesheet = soup.new_tag("style", type="text/css")
for target, custom_font in custom_fonts.items():
if custom_font and not target == "site":
log.debug(f"Setting {target} font-family to {custom_font}")
font_override_stylesheet.append(fonts_selectors[target] + " {font-family:" + custom_font + " !important} ")
font_override_stylesheet.append(
fonts_selectors[target]
+ " {font-family:"
+ custom_font
+ " !important} "
)
site_font = custom_fonts.get("site", None)
# process global site font last to more granular settings can override it
if (site_font):
if site_font:
log.debug(f"Setting global site font-family to {site_font}"),
font_override_stylesheet.append(fonts_selectors["site"] + " {font-family:" + site_font + "} ")
font_override_stylesheet.append(
fonts_selectors["site"] + " {font-family:" + site_font + "} "
)
# finally append the font overrides stylesheets to the page
soup.head.append(font_override_stylesheet)
# inject any custom elements to the page
custom_injects = self.get_page_config(url).get("inject", {})
def injects_custom_tags(section):
section_custom_injects = custom_injects.get(section, {})
for tag, elements in section_custom_injects.items():
@ -418,74 +517,99 @@ class Parser():
for attr, value in element.items():
injected_tag[attr] = value
# if the value refers to a file, copy it to the dist folder
if (attr.lower() == "href" or attr.lower() == "src"):
if attr.lower() == "href" or attr.lower() == "src":
log.debug(f"Copying injected file '{value}'")
cached_custom_file = self.cache_file((Path.cwd() / value.strip("/")))
cached_custom_file = self.cache_file(
(Path.cwd() / value.strip("/"))
)
# destination = (self.dist_folder / source.name)
# shutil.copyfile(source, destination)
injected_tag[attr] = str(cached_custom_file) # source.name
log.debug(f"Injecting <{section}> tag: {str(injected_tag)}")
soup.find(section).append(injected_tag)
injects_custom_tags("head")
injects_custom_tags("body")
# inject loconotion's custom stylesheet and script
loconotion_custom_css = self.cache_file(Path("bundles/loconotion.css"))
custom_css = soup.new_tag("link", rel="stylesheet", href=str(loconotion_custom_css))
custom_css = soup.new_tag(
"link", rel="stylesheet", href=str(loconotion_custom_css)
)
soup.head.insert(-1, custom_css)
loconotion_custom_js = self.cache_file(Path("bundles/loconotion.js"))
custom_script = soup.new_tag("script", type="text/javascript", src=str(loconotion_custom_js))
custom_script = soup.new_tag(
"script", type="text/javascript", src=str(loconotion_custom_js)
)
soup.body.insert(-1, custom_script)
# find sub-pages and clean slugs / links
sub_pages = [];
for a in soup.findAll('a'):
if a['href'].startswith('/'):
sub_page_href = 'https://www.notion.so' + a['href']
# if the link is an anchor link, check if the page hasn't already been parsed
if ("#" in sub_page_href):
sub_pages = []
for a in soup.findAll("a"):
if a["href"].startswith("/"):
sub_page_href = "https://www.notion.so" + a["href"]
# if the link is an anchor link,
# check if the page hasn't already been parsed
if "#" in sub_page_href:
sub_page_href_tokens = sub_page_href.split("#")
sub_page_href = sub_page_href_tokens[0]
a['href'] = "#" + sub_page_href_tokens[-1]
a['class'] = a.get('class', []) + ['loconotion-anchor-link']
if (sub_page_href in processed_pages.keys() or sub_page_href in sub_pages):
log.debug(f"Original page for anchor link {sub_page_href} already parsed / pending parsing, skipping")
a["href"] = "#" + sub_page_href_tokens[-1]
a["class"] = a.get("class", []) + ["loconotion-anchor-link"]
if (
sub_page_href in processed_pages.keys()
or sub_page_href in sub_pages
):
log.debug(
f"Original page for anchor link {sub_page_href}"
" already parsed / pending parsing, skipping"
)
continue
else:
a['href'] = self.get_page_slug(sub_page_href) if sub_page_href != index else "index.html"
a["href"] = (
self.get_page_slug(sub_page_href)
if sub_page_href != index
else "index.html"
)
sub_pages.append(sub_page_href)
log.debug(f"Found link to page {a['href']}")
# exports the parsed page
html_str = str(soup)
html_file = self.get_page_slug(url) if url != index else "index.html"
if (html_file in processed_pages.values()):
log.error(f"Found duplicate pages with slug '{html_file}' - previous one will be overwritten." +
"make sure that your notion pages names or custom slugs in the configuration files are unique")
if html_file in processed_pages.values():
log.error(
f"Found duplicate pages with slug '{html_file}' - previous one will be"
" overwritten. Make sure that your notion pages names or custom slugs"
" in the configuration files are unique"
)
log.info(f"Exporting page '{url}' as '{html_file}'")
with open(self.dist_folder / html_file, "wb") as f:
f.write(html_str.encode('utf-8').strip())
f.write(html_str.encode("utf-8").strip())
processed_pages[url] = html_file
# parse sub-pages
if (sub_pages and not self.args.get("single_page", False)):
if (processed_pages): log.debug(f"Pages processed so far: {len(processed_pages)}")
if sub_pages and not self.args.get("single_page", False):
if processed_pages:
log.debug(f"Pages processed so far: {len(processed_pages)}")
for sub_page in sub_pages:
if not sub_page in processed_pages.keys():
self.parse_page(sub_page, processed_pages = processed_pages, index = index)
self.parse_page(
sub_page, processed_pages=processed_pages, index=index
)
# we're all done!
return processed_pages
def run(self, url):
start_time = time.time()
total_processed_pages = self.parse_page(url)
tot_processed_pages = self.parse_page(url)
elapsed_time = time.time() - start_time
formatted_time = '{:02d}:{:02d}:{:02d}'.format(int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60))
log.info(f'Finished!\n\nProcessed {len(total_processed_pages)} pages in {formatted_time}')
formatted_time = "{:02d}:{:02d}:{:02d}".format(
int(elapsed_time // 3600),
int(elapsed_time % 3600 // 60),
int(elapsed_time % 60),
tot_processed_pages,
)
log.info(
f"Finished!\n\nProcessed {len(tot_processed_pages)} pages in {formatted_time}"
)

315
poetry.lock generated Normal file
View File

@ -0,0 +1,315 @@
[[package]]
category = "dev"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
name = "appdirs"
optional = false
python-versions = "*"
version = "1.4.4"
[[package]]
category = "dev"
description = "Classes Without Boilerplate"
name = "attrs"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "19.3.0"
[package.extras]
azure-pipelines = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "pytest-azurepipelines"]
dev = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "pre-commit"]
docs = ["sphinx", "zope.interface"]
tests = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"]
[[package]]
category = "main"
description = "Screen-scraping library"
name = "beautifulsoup4"
optional = false
python-versions = "*"
version = "4.9.1"
[package.dependencies]
soupsieve = [">1.2", "<2.0"]
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
category = "dev"
description = "The uncompromising code formatter."
name = "black"
optional = false
python-versions = ">=3.6"
version = "19.10b0"
[package.dependencies]
appdirs = "*"
attrs = ">=18.1.0"
click = ">=6.5"
pathspec = ">=0.6,<1"
regex = "*"
toml = ">=0.9.4"
typed-ast = ">=1.4.0"
[package.extras]
d = ["aiohttp (>=3.3.2)", "aiohttp-cors"]
[[package]]
category = "main"
description = "Python package for providing Mozilla's CA Bundle."
name = "certifi"
optional = false
python-versions = "*"
version = "2020.4.5.1"
[[package]]
category = "main"
description = "Universal encoding detector for Python 2 and 3"
name = "chardet"
optional = false
python-versions = "*"
version = "3.0.4"
[[package]]
category = "main"
description = "Automatically install chromedriver that supports the currently installed version of chrome."
name = "chromedriver-autoinstaller"
optional = false
python-versions = ">=3"
version = "0.2.0"
[[package]]
category = "dev"
description = "Composable command line interface toolkit"
name = "click"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "7.1.2"
[[package]]
category = "main"
description = "Cross-platform colored terminal text."
name = "colorama"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "0.4.3"
[[package]]
category = "main"
description = "A CSS Cascading Style Sheets library for Python"
name = "cssutils"
optional = false
python-versions = "*"
version = "1.0.2"
[[package]]
category = "main"
description = "Internationalized Domain Names in Applications (IDNA)"
name = "idna"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "2.9"
[[package]]
category = "dev"
description = "Utility library for gitignore style pattern matching of file paths."
name = "pathspec"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "0.8.0"
[[package]]
category = "dev"
description = "Alternative regular expression module, to replace re."
name = "regex"
optional = false
python-versions = "*"
version = "2020.5.14"
[[package]]
category = "main"
description = "Python HTTP for Humans."
name = "requests"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "2.23.0"
[package.dependencies]
certifi = ">=2017.4.17"
chardet = ">=3.0.2,<4"
idna = ">=2.5,<3"
urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26"
[package.extras]
security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"]
[[package]]
category = "main"
description = "Python bindings for Selenium"
name = "selenium"
optional = false
python-versions = "*"
version = "3.141.0"
[package.dependencies]
urllib3 = "*"
[[package]]
category = "main"
description = "A modern CSS selector implementation for Beautiful Soup."
name = "soupsieve"
optional = false
python-versions = "*"
version = "1.9.6"
[[package]]
category = "main"
description = "Python Library for Tom's Obvious, Minimal Language"
name = "toml"
optional = false
python-versions = "*"
version = "0.10.1"
[[package]]
category = "dev"
description = "a fork of Python 2 and 3 ast modules with type comment support"
name = "typed-ast"
optional = false
python-versions = "*"
version = "1.4.1"
[[package]]
category = "main"
description = "HTTP library with thread-safe connection pooling, file post, and more."
name = "urllib3"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
version = "1.25.9"
[package.extras]
brotli = ["brotlipy (>=0.6.0)"]
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=0.14)", "ipaddress"]
socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"]
[metadata]
content-hash = "ef223e0d435f4ab7f38a6499586aecdb96924ccb7bd59cd0982d0496479ad60f"
python-versions = "^3.7"
[metadata.files]
appdirs = [
{file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
{file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
]
attrs = [
{file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"},
{file = "attrs-19.3.0.tar.gz", hash = "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"},
]
beautifulsoup4 = [
{file = "beautifulsoup4-4.9.1-py2-none-any.whl", hash = "sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c"},
{file = "beautifulsoup4-4.9.1-py3-none-any.whl", hash = "sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8"},
{file = "beautifulsoup4-4.9.1.tar.gz", hash = "sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7"},
]
black = [
{file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"},
{file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"},
]
certifi = [
{file = "certifi-2020.4.5.1-py2.py3-none-any.whl", hash = "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304"},
{file = "certifi-2020.4.5.1.tar.gz", hash = "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519"},
]
chardet = [
{file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"},
{file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"},
]
chromedriver-autoinstaller = [
{file = "chromedriver-autoinstaller-0.2.0.tar.gz", hash = "sha256:e6aadc277f2c3a1d247541eecb60bfdeabb3250c56ad9998595420840d1c7f71"},
{file = "chromedriver_autoinstaller-0.2.0-py3-none-any.whl", hash = "sha256:290a72a1e60e5d806ac0d7cc14bd6aa0746bf8e007899efca48b25eb239ea851"},
]
click = [
{file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
{file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"},
]
colorama = [
{file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"},
{file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"},
]
cssutils = [
{file = "cssutils-1.0.2-py3-none-any.whl", hash = "sha256:c74dbe19c92f5052774eadb15136263548dd013250f1ed1027988e7fef125c8d"},
{file = "cssutils-1.0.2.tar.gz", hash = "sha256:a2fcf06467553038e98fea9cfe36af2bf14063eb147a70958cfcaa8f5786acaf"},
]
idna = [
{file = "idna-2.9-py2.py3-none-any.whl", hash = "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa"},
{file = "idna-2.9.tar.gz", hash = "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb"},
]
pathspec = [
{file = "pathspec-0.8.0-py2.py3-none-any.whl", hash = "sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0"},
{file = "pathspec-0.8.0.tar.gz", hash = "sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061"},
]
regex = [
{file = "regex-2020.5.14-cp27-cp27m-win32.whl", hash = "sha256:e565569fc28e3ba3e475ec344d87ed3cd8ba2d575335359749298a0899fe122e"},
{file = "regex-2020.5.14-cp27-cp27m-win_amd64.whl", hash = "sha256:d466967ac8e45244b9dfe302bbe5e3337f8dc4dec8d7d10f5e950d83b140d33a"},
{file = "regex-2020.5.14-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:27ff7325b297fb6e5ebb70d10437592433601c423f5acf86e5bc1ee2919b9561"},
{file = "regex-2020.5.14-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ea55b80eb0d1c3f1d8d784264a6764f931e172480a2f1868f2536444c5f01e01"},
{file = "regex-2020.5.14-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:c9bce6e006fbe771a02bda468ec40ffccbf954803b470a0345ad39c603402577"},
{file = "regex-2020.5.14-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:d881c2e657c51d89f02ae4c21d9adbef76b8325fe4d5cf0e9ad62f850f3a98fd"},
{file = "regex-2020.5.14-cp36-cp36m-win32.whl", hash = "sha256:99568f00f7bf820c620f01721485cad230f3fb28f57d8fbf4a7967ec2e446994"},
{file = "regex-2020.5.14-cp36-cp36m-win_amd64.whl", hash = "sha256:70c14743320a68c5dac7fc5a0f685be63bc2024b062fe2aaccc4acc3d01b14a1"},
{file = "regex-2020.5.14-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:a7c37f048ec3920783abab99f8f4036561a174f1314302ccfa4e9ad31cb00eb4"},
{file = "regex-2020.5.14-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:89d76ce33d3266173f5be80bd4efcbd5196cafc34100fdab814f9b228dee0fa4"},
{file = "regex-2020.5.14-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:51f17abbe973c7673a61863516bdc9c0ef467407a940f39501e786a07406699c"},
{file = "regex-2020.5.14-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:ce5cc53aa9fbbf6712e92c7cf268274eaff30f6bd12a0754e8133d85a8fb0f5f"},
{file = "regex-2020.5.14-cp37-cp37m-win32.whl", hash = "sha256:8044d1c085d49673aadb3d7dc20ef5cb5b030c7a4fa253a593dda2eab3059929"},
{file = "regex-2020.5.14-cp37-cp37m-win_amd64.whl", hash = "sha256:c2062c7d470751b648f1cacc3f54460aebfc261285f14bc6da49c6943bd48bdd"},
{file = "regex-2020.5.14-cp38-cp38-manylinux1_i686.whl", hash = "sha256:329ba35d711e3428db6b45a53b1b13a0a8ba07cbbcf10bbed291a7da45f106c3"},
{file = "regex-2020.5.14-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:579ea215c81d18da550b62ff97ee187b99f1b135fd894a13451e00986a080cad"},
{file = "regex-2020.5.14-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:3a9394197664e35566242686d84dfd264c07b20f93514e2e09d3c2b3ffdf78fe"},
{file = "regex-2020.5.14-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ce367d21f33e23a84fb83a641b3834dd7dd8e9318ad8ff677fbfae5915a239f7"},
{file = "regex-2020.5.14-cp38-cp38-win32.whl", hash = "sha256:1386e75c9d1574f6aa2e4eb5355374c8e55f9aac97e224a8a5a6abded0f9c927"},
{file = "regex-2020.5.14-cp38-cp38-win_amd64.whl", hash = "sha256:7e61be8a2900897803c293247ef87366d5df86bf701083b6c43119c7c6c99108"},
{file = "regex-2020.5.14.tar.gz", hash = "sha256:ce450ffbfec93821ab1fea94779a8440e10cf63819be6e176eb1973a6017aff5"},
]
requests = [
{file = "requests-2.23.0-py2.py3-none-any.whl", hash = "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee"},
{file = "requests-2.23.0.tar.gz", hash = "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6"},
]
selenium = [
{file = "selenium-3.141.0-py2.py3-none-any.whl", hash = "sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c"},
{file = "selenium-3.141.0.tar.gz", hash = "sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d"},
]
soupsieve = [
{file = "soupsieve-1.9.6-py2.py3-none-any.whl", hash = "sha256:feb1e937fa26a69e08436aad4a9037cd7e1d4c7212909502ba30701247ff8abd"},
{file = "soupsieve-1.9.6.tar.gz", hash = "sha256:7985bacc98c34923a439967c1a602dc4f1e15f923b6fcf02344184f86cc7efaa"},
]
toml = [
{file = "toml-0.10.1-py2.py3-none-any.whl", hash = "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88"},
{file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
]
typed-ast = [
{file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3"},
{file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb"},
{file = "typed_ast-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919"},
{file = "typed_ast-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01"},
{file = "typed_ast-1.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75"},
{file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652"},
{file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"},
{file = "typed_ast-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1"},
{file = "typed_ast-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa"},
{file = "typed_ast-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614"},
{file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41"},
{file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b"},
{file = "typed_ast-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe"},
{file = "typed_ast-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355"},
{file = "typed_ast-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6"},
{file = "typed_ast-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907"},
{file = "typed_ast-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d"},
{file = "typed_ast-1.4.1-cp38-cp38-win32.whl", hash = "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c"},
{file = "typed_ast-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4"},
{file = "typed_ast-1.4.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34"},
{file = "typed_ast-1.4.1.tar.gz", hash = "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b"},
]
urllib3 = [
{file = "urllib3-1.25.9-py2.py3-none-any.whl", hash = "sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115"},
{file = "urllib3-1.25.9.tar.gz", hash = "sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527"},
]

22
pyproject.toml Normal file
View File

@ -0,0 +1,22 @@
[tool.poetry]
name = "loconotion"
version = "0.1.0"
description = ""
authors = ["Leonardo Cavaletti <impeto.blu@gmail.com>"]
[tool.poetry.dependencies]
python = "^3.7"
beautifulsoup4 = "^4.9.1"
chromedriver-autoinstaller = "^0.2.0"
colorama = "^0.4.3"
cssutils = "^1.0.2"
requests = "^2.23.0"
selenium = "^3.141.0"
toml = "^0.10.1"
[tool.poetry.dev-dependencies]
black = "^19.10b0"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"

View File

@ -1,12 +1,37 @@
beautifulsoup4==4.9.1
certifi==2020.4.5.1
chardet==3.0.4
chromedriver-autoinstaller==0.2.0
colorama==0.4.3
cssutils==1.0.2
idna==2.9
requests==2.23.0
selenium==3.141.0
soupsieve==2.0.1
toml==0.10.1
urllib3==1.25.9
beautifulsoup4==4.9.1 \
--hash=sha256:e718f2342e2e099b640a34ab782407b7b676f47ee272d6739e60b8ea23829f2c \
--hash=sha256:a6237df3c32ccfaee4fd201c8f5f9d9df619b93121d01353a64a73ce8c6ef9a8 \
--hash=sha256:73cc4d115b96f79c7d77c1c7f7a0a8d4c57860d1041df407dd1aae7f07a77fd7
certifi==2020.4.5.1 \
--hash=sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304 \
--hash=sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519
chardet==3.0.4 \
--hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
--hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae
chromedriver-autoinstaller==0.2.0 \
--hash=sha256:e6aadc277f2c3a1d247541eecb60bfdeabb3250c56ad9998595420840d1c7f71 \
--hash=sha256:290a72a1e60e5d806ac0d7cc14bd6aa0746bf8e007899efca48b25eb239ea851
colorama==0.4.3 \
--hash=sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff \
--hash=sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1
cssutils==1.0.2 \
--hash=sha256:c74dbe19c92f5052774eadb15136263548dd013250f1ed1027988e7fef125c8d \
--hash=sha256:a2fcf06467553038e98fea9cfe36af2bf14063eb147a70958cfcaa8f5786acaf
idna==2.9 \
--hash=sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa \
--hash=sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb
requests==2.23.0 \
--hash=sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee \
--hash=sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6
selenium==3.141.0 \
--hash=sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c \
--hash=sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d
soupsieve==1.9.6 \
--hash=sha256:feb1e937fa26a69e08436aad4a9037cd7e1d4c7212909502ba30701247ff8abd \
--hash=sha256:7985bacc98c34923a439967c1a602dc4f1e15f923b6fcf02344184f86cc7efaa
toml==0.10.1 \
--hash=sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88 \
--hash=sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f
urllib3==1.25.9 \
--hash=sha256:88206b0eb87e6d677d424843ac5209e3fb9d0190d0ee169599165ec25e9d9115 \
--hash=sha256:3018294ebefce6572a474f0604c2021e33b3fd8006ecd11d62107a5d2a963527