twitch-dl/twitchdl/commands.py

import m3u8
import os
import pathlib
import re
import requests
import shutil
import subprocess
import tempfile

from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from functools import partial
from urllib.parse import urlparse

from twitchdl import twitch
from twitchdl.download import download_file
from twitchdl.exceptions import ConsoleError
from twitchdl.output import print_out
from twitchdl.utils import slugify


def read_int(msg, min, max, default):
    msg = msg + " [default {}]: ".format(default)

    while True:
        try:
            val = input(msg)
            if not val:
                return default
            if min <= int(val) <= max:
                return int(val)
        except ValueError:
            pass


def format_size(bytes_):
    if bytes_ < 1024:
        return str(bytes_)

    kilo = bytes_ / 1024
    if kilo < 1024:
        return "{:.1f}K".format(kilo)

    mega = kilo / 1024
    if mega < 1024:
        return "{:.1f}M".format(mega)

    return "{:.1f}G".format(mega / 1024)


def format_duration(total_seconds):
    total_seconds = int(total_seconds)
    hours = total_seconds // 3600
    remainder = total_seconds % 3600
    minutes = remainder // 60
    seconds = total_seconds % 60

    if hours:
        return "{} h {} min".format(hours, minutes)

    if minutes:
        return "{} min {} sec".format(minutes, seconds)

    return "{} sec".format(seconds)


def _print_video(video):
    published_at = video['published_at'].replace('T', ' @ ').replace('Z', '')
    length = format_duration(video['length'])
    name = video['channel']['display_name']

    print_out("\n<bold>{}</bold>".format(video['_id'][1:]))
    print_out("<green>{}</green>".format(video["title"]))
    print_out("<cyan>{}</cyan> playing <cyan>{}</cyan>".format(name, video['game']))
    print_out("Published <cyan>{}</cyan>  Length: <cyan>{}</cyan> ".format(published_at, length))
    print_out("<i>{}</i>".format(video["url"]))


def videos(channel_name, limit, offset, sort, **kwargs):
    print_out("Looking up user...")
    user = twitch.get_user(channel_name)
    if not user:
        raise ConsoleError("User {} not found.".format(channel_name))

    print_out("Loading videos...")
    videos = twitch.get_channel_videos(user["id"], limit, offset, sort)
    count = len(videos['videos'])
    if not count:
        print_out("No videos found")
        return

    first = offset + 1
    last = offset + len(videos['videos'])
    total = videos["_total"]
    print_out("<yellow>Showing videos {}-{} of {}</yellow>".format(first, last, total))

    for video in videos['videos']:
        _print_video(video)


def _select_quality(playlists):
    print_out("\nAvailable qualities:")
    for n, p in enumerate(playlists):
        name = p.media[0].name if p.media else ""
        resolution = "x".join(str(r) for r in p.stream_info.resolution)
        print_out("{}) {} [{}]".format(n + 1, name, resolution))

    no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)

    return playlists[no - 1]


def _print_progress(futures):
    counter = 1
    total = len(futures)
    total_size = 0
    start_time = datetime.now()

    for future in as_completed(futures):
        size = future.result()
        percentage = 100 * counter // total
        total_size += size
        duration = (datetime.now() - start_time).seconds
        speed = total_size // duration if duration else 0
        remaining = (total - counter) * duration / counter

        msg = "Downloaded VOD {}/{} ({}%) total <cyan>{}B</cyan> at <cyan>{}B/s</cyan> remaining <cyan>{}</cyan>".format(
            counter, total, percentage, format_size(total_size), format_size(speed), format_duration(remaining))

        print_out("\r" + msg.ljust(80), end='')
        counter += 1


def _download_files(base_url, directory, filenames, max_workers):
    urls = [base_url + f for f in filenames]
    paths = ["/".join([directory, f]) for f in filenames]
    partials = (partial(download_file, url, path) for url, path in zip(urls, paths))

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fn) for fn in partials]
        _print_progress(futures)

    return paths


def _join_vods(directory, paths, target):
    input_path = "{}/files.txt".format(directory)

    with open(input_path, 'w') as f:
        for path in paths:
            f.write('file {}\n'.format(os.path.basename(path)))

    result = subprocess.run([
        "ffmpeg",
        "-f", "concat",
        "-i", input_path,
        "-c", "copy",
        target,
        "-stats",
        "-loglevel", "warning",
    ])

    result.check_returncode()


def _video_target_filename(video, format):
    match = re.search(r"^(\d{4})-(\d{2})-(\d{2})T", video['published_at'])
    date = "".join(match.groups())

    name = "_".join([
        date,
        video['_id'][1:],
        video['channel']['name'],
        slugify(video['title']),
    ])

    return name + "." + format


def _parse_video_id(video_id):
    """This can be either a integer ID or an URL to the video on twitch."""
    if re.search(r"^\d+$", video_id):
        return int(video_id)

    match = re.search(r"^https://www.twitch.tv/videos/(\d+)(\?.+)?$", video_id)
    if match:
        return int(match.group(1))

    raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")


def _get_files(playlist, start, end):
    """Extract files for download from playlist."""
    vod_start = 0
    for segment in playlist.segments:
        vod_end = vod_start + segment.duration

        # `vod_end > start` is used here becuase it's better to download a bit
        # more than a bit less, similar for the end condition
        start_condition = not start or vod_end > start
        end_condition = not end or vod_start < end

        if start_condition and end_condition:
            yield segment.uri

        vod_start = vod_end


def _crete_temp_dir(base_uri):
    """Create a temp dir to store downloads if it doesn't exist."""
    path = urlparse(base_uri).path
    directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    return directory


def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
    video_id = _parse_video_id(video_id)

    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")

    print_out("Looking up video...")
    video = twitch.get_video(video_id)

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    print_out("Fetching access token...")
    access_token = twitch.get_access_token(video_id)

    # TODO: save playlists for debugging purposes

    print_out("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
    playlists = m3u8.loads(playlists)
    selected = _select_quality(playlists.playlists)

    print_out("\nFetching playlist...")
    response = requests.get(selected.uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", selected.uri)
    target_dir = _crete_temp_dir(base_uri)
    filenames = list(_get_files(playlist, start, end))

    print_out("\nDownloading {} VODs using {} workers to {}".format(
        len(filenames), max_workers, target_dir))
    _download_files(base_uri, target_dir, filenames, max_workers)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, format)
    _join_vods(target_dir, filenames, target)

    if keep:
        print_out("\nTemporary files not deleted: {}".format(target_dir))
    else:
        print_out("\nDeleting temporary files...")
        shutil.rmtree(target_dir)

    print_out("Downloaded: {}".format(target))
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`import m3u8`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`import os`
			`import pathlib`
Initial commit 2018-01-25 10:09:20 +00:00			`import re`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`import requests`
			`import shutil`
Initial commit 2018-01-25 10:09:20 +00:00			`import subprocess`
			`import tempfile`

			`from concurrent.futures import ThreadPoolExecutor, as_completed`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`from datetime import datetime`
Initial commit 2018-01-25 10:09:20 +00:00			`from functools import partial`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`from urllib.parse import urlparse`
Initial commit 2018-01-25 10:09:20 +00:00
			`from twitchdl import twitch`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`from twitchdl.download import download_file`
Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00			`from twitchdl.exceptions import ConsoleError`
Initial commit 2018-01-25 10:09:20 +00:00			`from twitchdl.output import print_out`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`from twitchdl.utils import slugify`
Initial commit 2018-01-25 10:09:20 +00:00

			`def read_int(msg, min, max, default):`
			`msg = msg + " [default {}]: ".format(default)`

			`while True:`
			`try:`
			`val = input(msg)`
			`if not val:`
			`return default`
			`if min <= int(val) <= max:`
			`return int(val)`
			`except ValueError:`
			`pass`


Extended output when downloading 2018-01-26 20:50:55 +00:00			`def format_size(bytes_):`
			`if bytes_ < 1024:`
			`return str(bytes_)`

			`kilo = bytes_ / 1024`
			`if kilo < 1024:`
			`return "{:.1f}K".format(kilo)`

			`mega = kilo / 1024`
			`if mega < 1024:`
			`return "{:.1f}M".format(mega)`

			`return "{:.1f}G".format(mega / 1024)`


			`def format_duration(total_seconds):`
			`total_seconds = int(total_seconds)`
			`hours = total_seconds // 3600`
			`remainder = total_seconds % 3600`
Initial commit 2018-01-25 10:09:20 +00:00			`minutes = remainder // 60`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`seconds = total_seconds % 60`
Initial commit 2018-01-25 10:09:20 +00:00
			`if hours:`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`return "{} h {} min".format(hours, minutes)`

			`if minutes:`
			`return "{} min {} sec".format(minutes, seconds)`
Initial commit 2018-01-25 10:09:20 +00:00
Extended output when downloading 2018-01-26 20:50:55 +00:00			`return "{} sec".format(seconds)`
Initial commit 2018-01-25 10:09:20 +00:00

			`def _print_video(video):`
			`published_at = video['published_at'].replace('T', ' @ ').replace('Z', '')`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`length = format_duration(video['length'])`
Initial commit 2018-01-25 10:09:20 +00:00			`name = video['channel']['display_name']`

			`print_out("\n<bold>{}</bold>".format(video['_id'][1:]))`
			`print_out("<green>{}</green>".format(video["title"]))`
			`print_out("<cyan>{}</cyan> playing <cyan>{}</cyan>".format(name, video['game']))`
			`print_out("Published <cyan>{}</cyan> Length: <cyan>{}</cyan> ".format(published_at, length))`
Style the url in video list 2019-08-13 10:29:42 +00:00			`print_out("<i>{}</i>".format(video["url"]))`
Initial commit 2018-01-25 10:09:20 +00:00

Add offset and sort options to videos command fixes #7 2019-08-13 10:25:25 +00:00			`def videos(channel_name, limit, offset, sort, **kwargs):`
Fix usage of deprecated v3 API related #8 2019-08-23 07:03:30 +00:00			`print_out("Looking up user...")`
			`user = twitch.get_user(channel_name)`
			`if not user:`
			`raise ConsoleError("User {} not found.".format(channel_name))`
Add offset and sort options to videos command fixes #7 2019-08-13 10:25:25 +00:00
Fix usage of deprecated v3 API related #8 2019-08-23 07:03:30 +00:00			`print_out("Loading videos...")`
			`videos = twitch.get_channel_videos(user["id"], limit, offset, sort)`
Add offset and sort options to videos command fixes #7 2019-08-13 10:25:25 +00:00			`count = len(videos['videos'])`
			`if not count:`
			`print_out("No videos found")`
			`return`

			`first = offset + 1`
			`last = offset + len(videos['videos'])`
			`total = videos["_total"]`
			`print_out("<yellow>Showing videos {}-{} of {}</yellow>".format(first, last, total))`
Initial commit 2018-01-25 10:09:20 +00:00
			`for video in videos['videos']:`
			`_print_video(video)`


Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`def _select_quality(playlists):`
Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("\nAvailable qualities:")`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`for n, p in enumerate(playlists):`
			`name = p.media[0].name if p.media else ""`
			`resolution = "x".join(str(r) for r in p.stream_info.resolution)`
			`print_out("{}) {} [{}]".format(n + 1, name, resolution))`
Initial commit 2018-01-25 10:09:20 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)`
Initial commit 2018-01-25 10:09:20 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`return playlists[no - 1]`
Initial commit 2018-01-25 10:09:20 +00:00

			`def _print_progress(futures):`
			`counter = 1`
			`total = len(futures)`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`total_size = 0`
			`start_time = datetime.now()`
Initial commit 2018-01-25 10:09:20 +00:00
			`for future in as_completed(futures):`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`size = future.result()`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`percentage = 100 * counter // total`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`total_size += size`
Extended output when downloading 2018-01-26 20:50:55 +00:00			`duration = (datetime.now() - start_time).seconds`
			`speed = total_size // duration if duration else 0`
			`remaining = (total - counter) * duration / counter`

			`msg = "Downloaded VOD {}/{} ({}%) total <cyan>{}B</cyan> at <cyan>{}B/s</cyan> remaining <cyan>{}</cyan>".format(`
			`counter, total, percentage, format_size(total_size), format_size(speed), format_duration(remaining))`

			`print_out("\r" + msg.ljust(80), end='')`
Initial commit 2018-01-25 10:09:20 +00:00			`counter += 1`


			`def _download_files(base_url, directory, filenames, max_workers):`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`urls = [base_url + f for f in filenames]`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`paths = ["/".join([directory, f]) for f in filenames]`
			`partials = (partial(download_file, url, path) for url, path in zip(urls, paths))`
Initial commit 2018-01-25 10:09:20 +00:00
			`with ThreadPoolExecutor(max_workers=max_workers) as executor:`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`futures = [executor.submit(fn) for fn in partials]`
Initial commit 2018-01-25 10:09:20 +00:00			`_print_progress(futures)`

Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`return paths`
Initial commit 2018-01-25 10:09:20 +00:00

Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`def _join_vods(directory, paths, target):`
Initial commit 2018-01-25 10:09:20 +00:00			`input_path = "{}/files.txt".format(directory)`

			`with open(input_path, 'w') as f:`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`for path in paths:`
			`f.write('file {}\n'.format(os.path.basename(path)))`
Initial commit 2018-01-25 10:09:20 +00:00
			`result = subprocess.run([`
			`"ffmpeg",`
			`"-f", "concat",`
			`"-i", input_path,`
			`"-c", "copy",`
			`target,`
			`"-stats",`
			`"-loglevel", "warning",`
			`])`

			`result.check_returncode()`


			`def _video_target_filename(video, format):`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`match = re.search(r"^(\d{4})-(\d{2})-(\d{2})T", video['published_at'])`
			`date = "".join(match.groups())`

			`name = "_".join([`
			`date,`
			`video['_id'][1:],`
			`video['channel']['name'],`
			`slugify(video['title']),`
			`])`

			`return name + "." + format`
Initial commit 2018-01-25 10:09:20 +00:00

Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`def _parse_video_id(video_id):`
Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00			`"""This can be either a integer ID or an URL to the video on twitch."""`
			`if re.search(r"^\d+$", video_id):`
			`return int(video_id)`

Better parsing of twitch urls, strip query 2019-02-20 15:04:23 +00:00			`match = re.search(r"^https://www.twitch.tv/videos/(\d+)(\?.+)?$", video_id)`
Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00			`if match:`
			`return int(match.group(1))`

			`raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")`


Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`def _get_files(playlist, start, end):`
			`"""Extract files for download from playlist."""`
			`vod_start = 0`
			`for segment in playlist.segments:`
			`vod_end = vod_start + segment.duration`

			# `vod_end > start` is used here becuase it's better to download a bit
			`# more than a bit less, similar for the end condition`
			`start_condition = not start or vod_end > start`
			`end_condition = not end or vod_start < end`

			`if start_condition and end_condition:`
			`yield segment.uri`

			`vod_start = vod_end`


			`def _crete_temp_dir(base_uri):`
			`"""Create a temp dir to store downloads if it doesn't exist."""`
			`path = urlparse(base_uri).path`
			`directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)`
			`pathlib.Path(directory).mkdir(parents=True, exist_ok=True)`
			`return directory`


Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`video_id = _parse_video_id(video_id)`
Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00
Allow limiting download by start and end time 2019-06-06 09:06:33 +00:00			`if start and end and end <= start:`
			`raise ConsoleError("End time must be greater than start time")`

Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("Looking up video...")`
Initial commit 2018-01-25 10:09:20 +00:00			`video = twitch.get_video(video_id)`

Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(`
			`video['title'], video['channel']['display_name']))`

			`print_out("Fetching access token...")`
Initial commit 2018-01-25 10:09:20 +00:00			`access_token = twitch.get_access_token(video_id)`

Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`# TODO: save playlists for debugging purposes`

Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("Fetching playlists...")`
Initial commit 2018-01-25 10:09:20 +00:00			`playlists = twitch.get_playlists(video_id, access_token)`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`playlists = m3u8.loads(playlists)`
			`selected = _select_quality(playlists.playlists)`
Initial commit 2018-01-25 10:09:20 +00:00
Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("\nFetching playlist...")`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`response = requests.get(selected.uri)`
			`response.raise_for_status()`
			`playlist = m3u8.loads(response.text)`
Allow limiting download by start and end time 2019-06-06 09:06:33 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`base_uri = re.sub("/[^/]+$", "/", selected.uri)`
			`target_dir = _crete_temp_dir(base_uri)`
			`filenames = list(_get_files(playlist, start, end))`
Initial commit 2018-01-25 10:09:20 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nDownloading {} VODs using {} workers to {}".format(`
			`len(filenames), max_workers, target_dir))`
			`_download_files(base_uri, target_dir, filenames, max_workers)`
Initial commit 2018-01-25 10:09:20 +00:00
Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("\n\nJoining files...")`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`target = _video_target_filename(video, format)`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`_join_vods(target_dir, filenames, target)`
Initial commit 2018-01-25 10:09:20 +00:00
Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`if keep:`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nTemporary files not deleted: {}".format(target_dir))`
Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`else:`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nDeleting temporary files...")`
			`shutil.rmtree(target_dir)`
Initial commit 2018-01-25 10:09:20 +00:00
Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`print_out("Downloaded: {}".format(target))`