twitch-dl/twitchdl/commands.py

import m3u8
import os
import pathlib
import re
import requests
import shutil
import subprocess
import tempfile

from pathlib import Path
from urllib.parse import urlparse

from twitchdl import twitch, utils
from twitchdl.download import download_file, download_files
from twitchdl.exceptions import ConsoleError
from twitchdl.output import print_out, print_video


def _continue():
    print_out(
        "\nThere are more videos. "
        "Press <green><b>Enter</green> to continue, "
        "<yellow><b>Ctrl+C</yellow> to break."
    )

    try:
        input()
    except KeyboardInterrupt:
        return False

    return True


def _get_game_ids(names):
    if not names:
        return []

    game_ids = []
    for name in names:
        print_out("<dim>Looking up game '{}'...</dim>".format(name))
        game_id = twitch.get_game_id(name)
        if not game_id:
            raise ConsoleError("Game '{}' not found".format(name))
        game_ids.append(int(game_id))

    return game_ids


def videos(args):
    game_ids = _get_game_ids(args.game)

    print_out("<dim>Loading videos...</dim>")
    generator = twitch.channel_videos_generator(
        args.channel_name, args.limit, args.sort, args.type, game_ids=game_ids)

    first = 1

    for videos, has_more in generator:
        count = len(videos["edges"]) if "edges" in videos else 0
        total = videos["totalCount"]
        last = first + count - 1

        print_out("-" * 80)
        print_out("<yellow>Showing videos {}-{} of {}</yellow>".format(first, last, total))

        for video in videos["edges"]:
            print_video(video["node"])

        if not has_more or not _continue():
            break

        first += count
    else:
        print_out("<yellow>No videos found</yellow>")


def _select_quality(playlists):
    print_out("\nAvailable qualities:")
    for n, p in enumerate(playlists):
        name = p.media[0].name if p.media else ""
        resolution = "x".join(str(r) for r in p.stream_info.resolution)
        print_out("{}) {} [{}]".format(n + 1, name, resolution))

    no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)

    return playlists[no - 1]


def _join_vods(directory, file_paths, target):
    input_path = "{}/files.txt".format(directory)

    with open(input_path, 'w') as f:
        for path in file_paths:
            f.write('file {}\n'.format(os.path.basename(path)))

    result = subprocess.run([
        "ffmpeg",
        "-f", "concat",
        "-i", input_path,
        "-c", "copy",
        target,
        "-stats",
        "-loglevel", "warning",
    ])

    result.check_returncode()


def _video_target_filename(video, format):
    match = re.search(r"^(\d{4})-(\d{2})-(\d{2})T", video['published_at'])
    date = "".join(match.groups())

    name = "_".join([
        date,
        video['_id'][1:],
        video['channel']['name'],
        utils.slugify(video['title']),
    ])

    return name + "." + format


def _get_files(playlist, start, end):
    """Extract files for download from playlist."""
    vod_start = 0
    for segment in playlist.segments:
        vod_end = vod_start + segment.duration

        # `vod_end > start` is used here becuase it's better to download a bit
        # more than a bit less, similar for the end condition
        start_condition = not start or vod_end > start
        end_condition = not end or vod_start < end

        if start_condition and end_condition:
            yield segment.uri

        vod_start = vod_end


def _crete_temp_dir(base_uri):
    """Create a temp dir to store downloads if it doesn't exist."""
    path = urlparse(base_uri).path
    directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    return directory


VIDEO_PATTERNS = [
    r"^(?P<id>\d+)?$",
    r"^https://www.twitch.tv/videos/(?P<id>\d+)(\?.+)?$",
]

CLIP_PATTERNS = [
    r"^(?P<slug>[A-Za-z]+)$",
    r"^https://www.twitch.tv/\w+/clip/(?P<slug>[A-Za-z]+)(\?.+)?$",
    r"^https://clips.twitch.tv/(?P<slug>[A-Za-z]+)(\?.+)?$",
]


def download(args):
    for pattern in CLIP_PATTERNS:
        match = re.match(pattern, args.video)
        if match:
            clip_slug = match.group('slug')
            return _download_clip(clip_slug, args)

    for pattern in VIDEO_PATTERNS:
        match = re.match(pattern, args.video)
        if match:
            video_id = match.group('id')
            return _download_video(video_id, args)

    raise ConsoleError("Invalid video: {}".format(args.video))


def _download_clip(slug, args):
    print_out("<dim>Looking up clip...</dim>")
    clip = twitch.get_clip(slug)

    print_out("Found: <green>{}</green> by <yellow>{}</yellow>, playing <blue>{}</blue> ({})".format(
        clip["title"],
        clip["broadcaster"]["displayName"],
        clip["game"]["name"],
        utils.format_duration(clip["durationSeconds"])
    ))

    print_out("\nAvailable qualities:")
    qualities = clip["videoQualities"]
    for n, q in enumerate(qualities):
        print_out("{}) {} [{} fps]".format(n + 1, q["quality"], q["frameRate"]))

    no = utils.read_int("Choose quality", min=1, max=len(qualities), default=1)
    selected_quality = qualities[no - 1]
    url = selected_quality["sourceURL"]

    url_path = urlparse(url).path
    extension = Path(url_path).suffix
    filename = "{}_{}{}".format(
        clip["broadcaster"]["login"],
        utils.slugify(clip["title"]),
        extension
    )

    print("Downloading clip...")
    download_file(url, filename)

    print("Downloaded: {}".format(filename))


def _download_video(video_id, args):
    if args.start and args.end and args.end <= args.start:
        raise ConsoleError("End time must be greater than start time")

    print_out("<dim>Looking up video...</dim>")
    video = twitch.get_video(video_id)

    print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(
        video['title'], video['channel']['display_name']))

    print_out("<dim>Fetching access token...</dim>")
    access_token = twitch.get_access_token(video_id)

    print_out("<dim>Fetching playlists...</dim>")
    playlists = twitch.get_playlists(video_id, access_token)
    parsed = m3u8.loads(playlists)
    selected = _select_quality(parsed.playlists)

    print_out("<dim>\nFetching playlist...</dim>")
    response = requests.get(selected.uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)

    base_uri = re.sub("/[^/]+$", "/", selected.uri)
    target_dir = _crete_temp_dir(base_uri)
    filenames = list(_get_files(playlist, args.start, args.end))

    # Save playlists for debugging purposes
    with open(target_dir + "playlists.m3u8", "w") as f:
        f.write(playlists)
    with open(target_dir + "playlist.m3u8", "w") as f:
        f.write(response.text)

    print_out("\nDownloading {} VODs using {} workers to {}".format(
        len(filenames), args.max_workers, target_dir))
    file_paths = download_files(base_uri, target_dir, filenames, args.max_workers)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, args.format)
    _join_vods(target_dir, file_paths, target)

    if args.keep:
        print_out("\nTemporary files not deleted: {}".format(target_dir))
    else:
        print_out("\nDeleting temporary files...")
        shutil.rmtree(target_dir)

    print_out("Downloaded: {}".format(target))
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`import m3u8`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`import os`
			`import pathlib`
Initial commit 2018-01-25 10:09:20 +00:00			`import re`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`import requests`
			`import shutil`
Initial commit 2018-01-25 10:09:20 +00:00			`import subprocess`
			`import tempfile`

Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`from pathlib import Path`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`from urllib.parse import urlparse`
Initial commit 2018-01-25 10:09:20 +00:00
Reorganise code 2020-04-11 11:08:42 +00:00			`from twitchdl import twitch, utils`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`from twitchdl.download import download_file, download_files`
Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00			`from twitchdl.exceptions import ConsoleError`
Reorganise code 2020-04-11 11:08:42 +00:00			`from twitchdl.output import print_out, print_video`
Initial commit 2018-01-25 10:09:20 +00:00

Add paging to videos command 2020-05-17 11:35:51 +00:00			`def _continue():`
			`print_out(`
			`"\nThere are more videos. "`
			`"Press <green><b>Enter</green> to continue, "`
			`"<yellow><b>Ctrl+C</yellow> to break."`
			`)`

			`try:`
			`input()`
			`except KeyboardInterrupt:`
			`return False`

			`return True`

Add filtering videos by game 2020-05-17 12:35:33 +00:00
			`def _get_game_ids(names):`
			`if not names:`
			`return []`

			`game_ids = []`
			`for name in names:`
			`print_out("<dim>Looking up game '{}'...</dim>".format(name))`
			`game_id = twitch.get_game_id(name)`
			`if not game_id:`
			`raise ConsoleError("Game '{}' not found".format(name))`
			`game_ids.append(int(game_id))`

			`return game_ids`


Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`def videos(args):`
			`game_ids = _get_game_ids(args.game)`
Add filtering videos by game 2020-05-17 12:35:33 +00:00
			`print_out("<dim>Loading videos...</dim>")`
			`generator = twitch.channel_videos_generator(`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`args.channel_name, args.limit, args.sort, args.type, game_ids=game_ids)`
Use graphql to fetch channel videos The old helix endpoint returns HTTP 401 fixes #18 2020-05-17 09:56:21 +00:00
			`first = 1`
Initial commit 2018-01-25 10:09:20 +00:00
Add paging to videos command 2020-05-17 11:35:51 +00:00			`for videos, has_more in generator:`
			`count = len(videos["edges"]) if "edges" in videos else 0`
			`total = videos["totalCount"]`
			`last = first + count - 1`

			`print_out("-" * 80)`
			`print_out("<yellow>Showing videos {}-{} of {}</yellow>".format(first, last, total))`

			`for video in videos["edges"]:`
			`print_video(video["node"])`

			`if not has_more or not _continue():`
			`break`

			`first += count`
Add filtering videos by game 2020-05-17 12:35:33 +00:00			`else:`
			`print_out("<yellow>No videos found</yellow>")`
Initial commit 2018-01-25 10:09:20 +00:00

Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`def _select_quality(playlists):`
Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("\nAvailable qualities:")`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`for n, p in enumerate(playlists):`
			`name = p.media[0].name if p.media else ""`
			`resolution = "x".join(str(r) for r in p.stream_info.resolution)`
			`print_out("{}) {} [{}]".format(n + 1, name, resolution))`
Initial commit 2018-01-25 10:09:20 +00:00
Reorganise code 2020-04-11 11:08:42 +00:00			`no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)`
Initial commit 2018-01-25 10:09:20 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`return playlists[no - 1]`
Initial commit 2018-01-25 10:09:20 +00:00

Override local file names for downloaded vods Sometimes the playlists contain more than just file names which can break the ffmpeg join, so just name downloaded vods sequentially. fixes #12 2020-04-11 11:20:59 +00:00			`def _join_vods(directory, file_paths, target):`
Initial commit 2018-01-25 10:09:20 +00:00			`input_path = "{}/files.txt".format(directory)`

			`with open(input_path, 'w') as f:`
Override local file names for downloaded vods Sometimes the playlists contain more than just file names which can break the ffmpeg join, so just name downloaded vods sequentially. fixes #12 2020-04-11 11:20:59 +00:00			`for path in file_paths:`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`f.write('file {}\n'.format(os.path.basename(path)))`
Initial commit 2018-01-25 10:09:20 +00:00
			`result = subprocess.run([`
			`"ffmpeg",`
			`"-f", "concat",`
			`"-i", input_path,`
			`"-c", "copy",`
			`target,`
			`"-stats",`
			`"-loglevel", "warning",`
			`])`

			`result.check_returncode()`


			`def _video_target_filename(video, format):`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`match = re.search(r"^(\d{4})-(\d{2})-(\d{2})T", video['published_at'])`
			`date = "".join(match.groups())`

			`name = "_".join([`
			`date,`
			`video['_id'][1:],`
			`video['channel']['name'],`
Reorganise code 2020-04-11 11:08:42 +00:00			`utils.slugify(video['title']),`
Improve download process * allow resuming * retry downloads * add timeouts 2019-01-29 13:46:24 +00:00			`])`

			`return name + "." + format`
Initial commit 2018-01-25 10:09:20 +00:00

Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`def _get_files(playlist, start, end):`
			`"""Extract files for download from playlist."""`
			`vod_start = 0`
			`for segment in playlist.segments:`
			`vod_end = vod_start + segment.duration`

			# `vod_end > start` is used here becuase it's better to download a bit
			`# more than a bit less, similar for the end condition`
			`start_condition = not start or vod_end > start`
			`end_condition = not end or vod_start < end`

			`if start_condition and end_condition:`
			`yield segment.uri`

			`vod_start = vod_end`


			`def _crete_temp_dir(base_uri):`
			`"""Create a temp dir to store downloads if it doesn't exist."""`
			`path = urlparse(base_uri).path`
			`directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)`
			`pathlib.Path(directory).mkdir(parents=True, exist_ok=True)`
			`return directory`


Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`VIDEO_PATTERNS = [`
			`r"^(?P<id>\d+)?$",`
			`r"^https://www.twitch.tv/videos/(?P<id>\d+)(\?.+)?$",`
			`]`

			`CLIP_PATTERNS = [`
			`r"^(?P<slug>[A-Za-z]+)$",`
			`r"^https://www.twitch.tv/\w+/clip/(?P<slug>[A-Za-z]+)(\?.+)?$",`
			`r"^https://clips.twitch.tv/(?P<slug>[A-Za-z]+)(\?.+)?$",`
			`]`


Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`def download(args):`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`for pattern in CLIP_PATTERNS:`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`match = re.match(pattern, args.video)`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`if match:`
			`clip_slug = match.group('slug')`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`return _download_clip(clip_slug, args)`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00
			`for pattern in VIDEO_PATTERNS:`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`match = re.match(pattern, args.video)`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`if match:`
			`video_id = match.group('id')`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`return _download_video(video_id, args)`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`raise ConsoleError("Invalid video: {}".format(args.video))`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00

Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`def _download_clip(slug, args):`
Make less important messages dim 2020-05-17 12:32:37 +00:00			`print_out("<dim>Looking up clip...</dim>")`
Implement downloading clips issue #15 2020-04-11 14:07:17 +00:00			`clip = twitch.get_clip(slug)`

			`print_out("Found: <green>{}</green> by <yellow>{}</yellow>, playing <blue>{}</blue> ({})".format(`
			`clip["title"],`
			`clip["broadcaster"]["displayName"],`
			`clip["game"]["name"],`
			`utils.format_duration(clip["durationSeconds"])`
			`))`

			`print_out("\nAvailable qualities:")`
			`qualities = clip["videoQualities"]`
			`for n, q in enumerate(qualities):`
			`print_out("{}) {} [{} fps]".format(n + 1, q["quality"], q["frameRate"]))`

			`no = utils.read_int("Choose quality", min=1, max=len(qualities), default=1)`
			`selected_quality = qualities[no - 1]`
			`url = selected_quality["sourceURL"]`

			`url_path = urlparse(url).path`
			`extension = Path(url_path).suffix`
			`filename = "{}_{}{}".format(`
			`clip["broadcaster"]["login"],`
			`utils.slugify(clip["title"]),`
			`extension`
			`)`

			`print("Downloading clip...")`
			`download_file(url, filename)`

			`print("Downloaded: {}".format(filename))`

Allow using video URL as arg to download command 2019-02-09 10:52:15 +00:00
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`def _download_video(video_id, args):`
			`if args.start and args.end and args.end <= args.start:`
Allow limiting download by start and end time 2019-06-06 09:06:33 +00:00			`raise ConsoleError("End time must be greater than start time")`

Make less important messages dim 2020-05-17 12:32:37 +00:00			`print_out("<dim>Looking up video...</dim>")`
Initial commit 2018-01-25 10:09:20 +00:00			`video = twitch.get_video(video_id)`

Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format(`
			`video['title'], video['channel']['display_name']))`

Make less important messages dim 2020-05-17 12:32:37 +00:00			`print_out("<dim>Fetching access token...</dim>")`
Initial commit 2018-01-25 10:09:20 +00:00			`access_token = twitch.get_access_token(video_id)`

Make less important messages dim 2020-05-17 12:32:37 +00:00			`print_out("<dim>Fetching playlists...</dim>")`
Initial commit 2018-01-25 10:09:20 +00:00			`playlists = twitch.get_playlists(video_id, access_token)`
Save playlists to temp dir for debugging 2019-08-23 11:08:35 +00:00			`parsed = m3u8.loads(playlists)`
			`selected = _select_quality(parsed.playlists)`
Initial commit 2018-01-25 10:09:20 +00:00
Make less important messages dim 2020-05-17 12:32:37 +00:00			`print_out("<dim>\nFetching playlist...</dim>")`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`response = requests.get(selected.uri)`
			`response.raise_for_status()`
			`playlist = m3u8.loads(response.text)`
Allow limiting download by start and end time 2019-06-06 09:06:33 +00:00
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`base_uri = re.sub("/[^/]+$", "/", selected.uri)`
			`target_dir = _crete_temp_dir(base_uri)`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`filenames = list(_get_files(playlist, args.start, args.end))`
Initial commit 2018-01-25 10:09:20 +00:00
Save playlists to temp dir for debugging 2019-08-23 11:08:35 +00:00			`# Save playlists for debugging purposes`
			`with open(target_dir + "playlists.m3u8", "w") as f:`
			`f.write(playlists)`
			`with open(target_dir + "playlist.m3u8", "w") as f:`
			`f.write(response.text)`

Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nDownloading {} VODs using {} workers to {}".format(`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`len(filenames), args.max_workers, target_dir))`
			`file_paths = download_files(base_uri, target_dir, filenames, args.max_workers)`
Initial commit 2018-01-25 10:09:20 +00:00
Use print_out for output 2019-04-30 11:34:54 +00:00			`print_out("\n\nJoining files...")`
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`target = _video_target_filename(video, args.format)`
Override local file names for downloaded vods Sometimes the playlists contain more than just file names which can break the ffmpeg join, so just name downloaded vods sequentially. fixes #12 2020-04-11 11:20:59 +00:00			`_join_vods(target_dir, file_paths, target)`
Initial commit 2018-01-25 10:09:20 +00:00
Don't unpack options This makes it more readable as option count increases. 2020-05-30 08:07:00 +00:00			`if args.keep:`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nTemporary files not deleted: {}".format(target_dir))`
Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`else:`
Use m3u8 lib to parse playlists 2019-08-23 10:36:05 +00:00			`print_out("\nDeleting temporary files...")`
			`shutil.rmtree(target_dir)`
Initial commit 2018-01-25 10:09:20 +00:00
Add option not to delete downloaded VODs 2019-08-23 08:16:49 +00:00			`print_out("Downloaded: {}".format(target))`