mirror of
https://github.com/ihabunek/twitch-dl
synced 2024-08-30 18:32:25 +00:00
dd1f4e0d26
Distinct VODs took from disk and freshly downloaded
104 lines
3.3 KiB
Python
104 lines
3.3 KiB
Python
import os
|
|
import requests
|
|
|
|
from collections import OrderedDict
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from datetime import datetime
|
|
from functools import partial
|
|
from requests.exceptions import RequestException
|
|
from twitchdl.output import print_out
|
|
from twitchdl.utils import format_size, format_duration
|
|
|
|
|
|
CHUNK_SIZE = 1024
|
|
CONNECT_TIMEOUT = 5
|
|
RETRY_COUNT = 5
|
|
|
|
|
|
class DownloadFailed(Exception):
|
|
pass
|
|
|
|
|
|
def _download(url, path):
|
|
tmp_path = path + ".tmp"
|
|
response = requests.get(url, stream=True, timeout=CONNECT_TIMEOUT)
|
|
size = 0
|
|
with open(tmp_path, 'wb') as target:
|
|
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
|
|
target.write(chunk)
|
|
size += len(chunk)
|
|
|
|
os.rename(tmp_path, path)
|
|
return size
|
|
|
|
|
|
def download_file(url, path, retries=RETRY_COUNT):
|
|
if os.path.exists(path):
|
|
from_disk = True
|
|
return (os.path.getsize(path), from_disk)
|
|
|
|
from_disk = False
|
|
for _ in range(retries):
|
|
try:
|
|
return (_download(url, path), from_disk)
|
|
except RequestException:
|
|
pass
|
|
|
|
raise DownloadFailed(":(")
|
|
|
|
|
|
def _print_progress(futures):
|
|
downloaded_count = 0
|
|
downloaded_size = 0
|
|
max_msg_size = 0
|
|
start_time = datetime.now()
|
|
total_count = len(futures)
|
|
current_download_size = 0
|
|
current_downloaded_count = 0
|
|
|
|
for future in as_completed(futures):
|
|
size, from_disk = future.result()
|
|
downloaded_count += 1
|
|
downloaded_size += size
|
|
|
|
# If we find something on disk, we don't want to take it in account in
|
|
# the speed calculation
|
|
if not from_disk:
|
|
current_download_size += size
|
|
current_downloaded_count += 1
|
|
|
|
percentage = 100 * downloaded_count // total_count
|
|
est_total_size = int(total_count * downloaded_size / downloaded_count)
|
|
duration = (datetime.now() - start_time).seconds
|
|
speed = current_download_size // duration if duration else 0
|
|
remaining = (total_count - downloaded_count) * duration / current_downloaded_count \
|
|
if current_downloaded_count else 0
|
|
|
|
msg = " ".join([
|
|
"Downloaded VOD {}/{}".format(downloaded_count, total_count),
|
|
"({}%)".format(percentage),
|
|
"<cyan>{}</cyan>".format(format_size(downloaded_size)),
|
|
"of <cyan>~{}</cyan>".format(format_size(est_total_size)),
|
|
"at <cyan>{}/s</cyan>".format(format_size(speed)) if speed > 0 else "",
|
|
"remaining <cyan>~{}</cyan>".format(format_duration(remaining)) if remaining > 0 else "",
|
|
])
|
|
|
|
max_msg_size = max(len(msg), max_msg_size)
|
|
print_out("\r" + msg.ljust(max_msg_size), end="")
|
|
|
|
|
|
def download_files(base_url, target_dir, vod_paths, max_workers):
|
|
"""
|
|
Downloads a list of VODs defined by a common `base_url` and a list of
|
|
`vod_paths`, returning a dict which maps the paths to the downloaded files.
|
|
"""
|
|
urls = [base_url + path for path in vod_paths]
|
|
targets = [os.path.join(target_dir, "{:05d}.ts".format(k)) for k, _ in enumerate(vod_paths)]
|
|
partials = (partial(download_file, url, path) for url, path in zip(urls, targets))
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
futures = [executor.submit(fn) for fn in partials]
|
|
_print_progress(futures)
|
|
|
|
return OrderedDict(zip(vod_paths, targets))
|