twitch-dl/twitchdl/commands/download.py

390 lines
12 KiB
Python
Raw Normal View History

2022-08-14 09:02:29 +00:00
import asyncio
import httpx
2019-08-23 10:36:05 +00:00
import m3u8
2022-08-14 09:02:29 +00:00
import os
2018-01-25 10:09:20 +00:00
import re
2019-08-23 10:36:05 +00:00
import shutil
2018-01-25 10:09:20 +00:00
import subprocess
import tempfile
from os import path
2020-04-11 14:07:17 +00:00
from pathlib import Path
2022-08-20 09:35:07 +00:00
from typing import List, Optional, OrderedDict
from urllib.parse import urlparse, urlencode
2018-01-25 10:09:20 +00:00
2020-04-11 11:08:42 +00:00
from twitchdl import twitch, utils
2022-08-14 09:02:29 +00:00
from twitchdl.download import download_file
2024-03-23 09:50:42 +00:00
from twitchdl.entities import DownloadOptions
from twitchdl.exceptions import ConsoleError
2022-08-14 09:02:29 +00:00
from twitchdl.http import download_all
2021-01-07 08:34:14 +00:00
from twitchdl.output import print_out
2018-01-25 10:09:20 +00:00
2024-03-23 09:50:42 +00:00
def download(ids, args: DownloadOptions):
for video_id in ids:
download_one(video_id, args)
def download_one(video: str, args: DownloadOptions):
video_id = utils.parse_video_identifier(video)
if video_id:
return _download_video(video_id, args)
clip_slug = utils.parse_clip_identifier(video)
if clip_slug:
return _download_clip(clip_slug, args)
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Invalid input: {video}")
2024-03-23 09:50:42 +00:00
def _parse_playlists(playlists_m3u8):
playlists = m3u8.loads(playlists_m3u8)
2022-02-05 12:57:49 +00:00
for p in sorted(playlists.playlists, key=lambda p: p.stream_info.resolution is None):
if p.stream_info.resolution:
name = p.media[0].name
description = "x".join(str(r) for r in p.stream_info.resolution)
else:
name = p.media[0].group_id
description = None
yield name, description, p.uri
def _get_playlist_by_name(playlists, quality):
2020-09-29 06:26:40 +00:00
if quality == "source":
_, _, uri = playlists[0]
return uri
for name, _, uri in playlists:
if name == quality:
return uri
available = ", ".join([name for (name, _, _) in playlists])
2024-03-28 11:06:50 +00:00
msg = f"Quality '{quality}' not found. Available qualities are: {available}"
raise ConsoleError(msg)
def _select_playlist_interactive(playlists):
print_out("\nAvailable qualities:")
for n, (name, resolution, uri) in enumerate(playlists):
2022-02-05 12:57:49 +00:00
if resolution:
2024-03-28 11:06:50 +00:00
print_out(f"{n + 1}) <b>{name}</b> <dim>({resolution})</dim>")
2022-02-05 12:57:49 +00:00
else:
2024-03-28 11:06:50 +00:00
print_out(f"{n + 1}) <b>{name}</b>")
2018-01-25 10:09:20 +00:00
2020-04-11 11:08:42 +00:00
no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
_, _, uri = playlists[no - 1]
return uri
2018-01-25 10:09:20 +00:00
2021-11-20 12:31:17 +00:00
def _join_vods(playlist_path, target, overwrite, video):
command = [
2018-01-25 10:09:20 +00:00
"ffmpeg",
"-i", playlist_path,
2018-01-25 10:09:20 +00:00
"-c", "copy",
2024-03-28 11:06:50 +00:00
"-metadata", f"artist={video['creator']['displayName']}",
"-metadata", f"title={video['title']}",
2021-11-20 12:31:17 +00:00
"-metadata", "encoded_by=twitch-dl",
2018-01-25 10:09:20 +00:00
"-stats",
"-loglevel", "warning",
2024-03-28 11:06:50 +00:00
f"file:{target}",
]
2018-01-25 10:09:20 +00:00
2020-09-29 08:57:09 +00:00
if overwrite:
command.append("-y")
2024-03-28 11:06:50 +00:00
print_out(f"<dim>{' '.join(command)}</dim>")
result = subprocess.run(command)
if result.returncode != 0:
raise ConsoleError("Joining files failed")
2018-01-25 10:09:20 +00:00
2024-03-23 09:50:42 +00:00
def _video_target_filename(video, args: DownloadOptions):
2022-01-23 08:14:40 +00:00
date, time = video['publishedAt'].split("T")
game = video["game"]["name"] if video["game"] else "Unknown"
2022-01-23 08:14:40 +00:00
subs = {
"channel": video["creator"]["displayName"],
"channel_login": video["creator"]["login"],
"date": date,
"datetime": video["publishedAt"],
"format": args.format,
"game": game,
"game_slug": utils.slugify(game),
2022-01-23 08:14:40 +00:00
"id": video["id"],
"time": time,
"title": utils.titlify(video["title"]),
"title_slug": utils.slugify(video["title"]),
}
try:
return args.output.format(**subs)
except KeyError as e:
supported = ", ".join(subs.keys())
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Invalid key {e} used in --output. Supported keys are: {supported}")
2022-01-23 08:14:40 +00:00
2024-03-23 09:50:42 +00:00
def _clip_target_filename(clip, args: DownloadOptions):
2022-01-23 08:14:40 +00:00
date, time = clip["createdAt"].split("T")
game = clip["game"]["name"] if clip["game"] else "Unknown"
2020-11-10 08:21:37 +00:00
url = clip["videoQualities"][0]["sourceURL"]
_, ext = path.splitext(url)
ext = ext.lstrip(".")
2022-01-23 08:14:40 +00:00
subs = {
"channel": clip["broadcaster"]["displayName"],
"channel_login": clip["broadcaster"]["login"],
"date": date,
"datetime": clip["createdAt"],
"format": ext,
"game": game,
"game_slug": utils.slugify(game),
2022-01-23 08:14:40 +00:00
"id": clip["id"],
2022-02-05 08:36:50 +00:00
"slug": clip["slug"],
2022-01-23 08:14:40 +00:00
"time": time,
"title": utils.titlify(clip["title"]),
"title_slug": utils.slugify(clip["title"]),
}
try:
return args.output.format(**subs)
except KeyError as e:
supported = ", ".join(subs.keys())
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Invalid key {e} used in --output. Supported keys are: {supported}")
2020-11-10 08:21:37 +00:00
2022-08-20 09:35:07 +00:00
def _get_vod_paths(playlist, start: Optional[int], end: Optional[int]) -> List[str]:
"""Extract unique VOD paths for download from playlist."""
files = []
2019-08-23 10:36:05 +00:00
vod_start = 0
for segment in playlist.segments:
vod_end = vod_start + segment.duration
# `vod_end > start` is used here becuase it's better to download a bit
# more than a bit less, similar for the end condition
start_condition = not start or vod_end > start
end_condition = not end or vod_start < end
if start_condition and end_condition and segment.uri not in files:
files.append(segment.uri)
2019-08-23 10:36:05 +00:00
vod_start = vod_end
return files
2019-08-23 10:36:05 +00:00
2022-08-20 09:35:07 +00:00
def _crete_temp_dir(base_uri: str) -> str:
2019-08-23 10:36:05 +00:00
"""Create a temp dir to store downloads if it doesn't exist."""
path = urlparse(base_uri).path.lstrip("/")
temp_dir = Path(tempfile.gettempdir(), "twitch-dl", path)
temp_dir.mkdir(parents=True, exist_ok=True)
return str(temp_dir)
2019-08-23 10:36:05 +00:00
def _get_clip_url(clip, quality):
qualities = clip["videoQualities"]
# Quality given as an argument
if quality:
if quality == "source":
2020-09-29 06:26:40 +00:00
return qualities[0]["sourceURL"]
selected_quality = quality.rstrip("p") # allow 720p as well as 720
for q in qualities:
if q["quality"] == selected_quality:
return q["sourceURL"]
available = ", ".join([str(q["quality"]) for q in qualities])
2024-03-28 11:06:50 +00:00
msg = f"Quality '{quality}' not found. Available qualities are: {available}"
raise ConsoleError(msg)
# Ask user to select quality
print_out("\nAvailable qualities:")
for n, q in enumerate(qualities):
2024-03-28 11:06:50 +00:00
print_out(f"{n + 1}) {q['quality']} [{q['frameRate']} fps]")
print_out()
no = utils.read_int("Choose quality", min=1, max=len(qualities), default=1)
selected_quality = qualities[no - 1]
return selected_quality["sourceURL"]
def get_clip_authenticated_url(slug, quality):
print_out("<dim>Fetching access token...</dim>")
access_token = twitch.get_clip_access_token(slug)
if not access_token:
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Access token not found for slug '{slug}'")
url = _get_clip_url(access_token, quality)
query = urlencode({
"sig": access_token["playbackAccessToken"]["signature"],
"token": access_token["playbackAccessToken"]["value"],
})
2024-03-28 11:06:50 +00:00
return f"{url}?{query}"
2024-03-23 09:50:42 +00:00
def _download_clip(slug: str, args: DownloadOptions) -> None:
2020-05-17 12:32:37 +00:00
print_out("<dim>Looking up clip...</dim>")
2020-04-11 14:07:17 +00:00
clip = twitch.get_clip(slug)
if not clip:
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Clip '{slug}' not found")
title = clip["title"]
user = clip["broadcaster"]["displayName"]
game = clip["game"]["name"] if clip["game"] else "Unknown"
duration = utils.format_duration(clip["durationSeconds"])
2024-03-28 11:06:50 +00:00
print_out(
f"Found: <green>{title}</green> by <yellow>{user}</yellow>, "+
f"playing <blue>{game}</blue> ({duration})"
)
2020-04-11 14:07:17 +00:00
2022-01-23 08:14:40 +00:00
target = _clip_target_filename(clip, args)
2024-03-28 11:06:50 +00:00
print_out(f"Target: <blue>{target}</blue>")
2022-01-23 08:14:40 +00:00
if not args.overwrite and path.exists(target):
response = input("File exists. Overwrite? [Y/n]: ")
if response.lower().strip() not in ["", "y"]:
raise ConsoleError("Aborted")
args.overwrite = True
url = get_clip_authenticated_url(slug, args.quality)
2024-03-28 11:06:50 +00:00
print_out(f"<dim>Selected URL: {url}</dim>")
2020-04-11 14:07:17 +00:00
2022-01-25 06:59:54 +00:00
print_out("<dim>Downloading clip...</dim>")
2023-11-30 17:15:55 +00:00
if (args.dry_run is False):
download_file(url, target)
2020-04-11 14:07:17 +00:00
2024-03-28 11:06:50 +00:00
print_out(f"Downloaded: <blue>{target}</blue>")
2020-04-11 14:07:17 +00:00
2024-03-28 11:06:50 +00:00
def _download_video(video_id, args: DownloadOptions) -> None:
if args.start and args.end and args.end <= args.start:
raise ConsoleError("End time must be greater than start time")
2020-05-17 12:32:37 +00:00
print_out("<dim>Looking up video...</dim>")
2018-01-25 10:09:20 +00:00
video = twitch.get_video(video_id)
2021-04-25 11:02:07 +00:00
if not video:
2024-03-28 11:06:50 +00:00
raise ConsoleError(f"Video {video_id} not found")
2021-04-25 11:02:07 +00:00
2024-03-28 11:06:50 +00:00
title = video['title']
user = video['creator']['displayName']
print_out(f"Found: <blue>{title}</blue> by <yellow>{user}</yellow>")
2019-04-30 11:34:54 +00:00
2022-01-23 08:14:40 +00:00
target = _video_target_filename(video, args)
2024-03-28 11:06:50 +00:00
print_out(f"Output: <blue>{target}</blue>")
2022-01-23 08:14:40 +00:00
if not args.overwrite and path.exists(target):
response = input("File exists. Overwrite? [Y/n]: ")
if response.lower().strip() not in ["", "y"]:
raise ConsoleError("Aborted")
args.overwrite = True
# Chapter select or manual offset
start, end = _determine_time_range(video_id, args)
2020-05-17 12:32:37 +00:00
print_out("<dim>Fetching access token...</dim>")
2022-06-25 07:59:31 +00:00
access_token = twitch.get_access_token(video_id, auth_token=args.auth_token)
2018-01-25 10:09:20 +00:00
2020-05-17 12:32:37 +00:00
print_out("<dim>Fetching playlists...</dim>")
playlists_m3u8 = twitch.get_playlists(video_id, access_token)
playlists = list(_parse_playlists(playlists_m3u8))
playlist_uri = (_get_playlist_by_name(playlists, args.quality) if args.quality
else _select_playlist_interactive(playlists))
2018-01-25 10:09:20 +00:00
print_out("<dim>Fetching playlist...</dim>")
response = httpx.get(playlist_uri)
2019-08-23 10:36:05 +00:00
response.raise_for_status()
playlist = m3u8.loads(response.text)
base_uri = re.sub("/[^/]+$", "/", playlist_uri)
2019-08-23 10:36:05 +00:00
target_dir = _crete_temp_dir(base_uri)
vod_paths = _get_vod_paths(playlist, start, end)
2018-01-25 10:09:20 +00:00
# Save playlists for debugging purposes
with open(path.join(target_dir, "playlists.m3u8"), "w") as f:
f.write(playlists_m3u8)
with open(path.join(target_dir, "playlist.m3u8"), "w") as f:
f.write(response.text)
2024-03-28 11:06:50 +00:00
print_out(f"\nDownloading {len(vod_paths)} VODs using {args.max_workers} workers to {target_dir}")
2022-08-14 09:02:29 +00:00
sources = [base_uri + path for path in vod_paths]
2024-03-28 11:06:50 +00:00
targets = [os.path.join(target_dir, f"{k:05d}.ts") for k, _ in enumerate(vod_paths)]
2022-08-14 09:13:11 +00:00
asyncio.run(download_all(sources, targets, args.max_workers, rate_limit=args.rate_limit))
# Make a modified playlist which references downloaded VODs
# Keep only the downloaded segments and skip the rest
org_segments = playlist.segments.copy()
2022-08-14 09:02:29 +00:00
path_map = OrderedDict(zip(vod_paths, targets))
playlist.segments.clear()
for segment in org_segments:
if segment.uri in path_map:
segment.uri = path_map[segment.uri]
playlist.segments.append(segment)
playlist_path = path.join(target_dir, "playlist_downloaded.m3u8")
playlist.dump(playlist_path)
2018-01-25 10:09:20 +00:00
if args.no_join:
print_out("\n\n<dim>Skipping joining files...</dim>")
2024-03-28 11:06:50 +00:00
print_out(f"VODs downloaded to:\n<blue>{target_dir}</blue>")
return
2019-04-30 11:34:54 +00:00
print_out("\n\nJoining files...")
2021-11-20 12:31:17 +00:00
_join_vods(playlist_path, target, args.overwrite, video)
2018-01-25 10:09:20 +00:00
if args.keep:
2024-03-28 11:06:50 +00:00
print_out(f"\n<dim>Temporary files not deleted: {target_dir}</dim>")
else:
print_out("\n<dim>Deleting temporary files...</dim>")
2019-08-23 10:36:05 +00:00
shutil.rmtree(target_dir)
2018-01-25 10:09:20 +00:00
2024-03-28 11:06:50 +00:00
print_out(f"\nDownloaded: <green>{target}</green>")
2024-03-28 11:06:50 +00:00
def _determine_time_range(video_id, args: DownloadOptions):
if args.start or args.end:
return args.start, args.end
if args.chapter is not None:
print_out("<dim>Fetching chapters...</dim>")
chapters = twitch.get_video_chapters(video_id)
if not chapters:
raise ConsoleError("This video has no chapters")
if args.chapter == 0:
chapter = _choose_chapter_interactive(chapters)
else:
try:
chapter = chapters[args.chapter - 1]
except IndexError:
raise ConsoleError(f"Chapter {args.chapter} does not exist. This video has {len(chapters)} chapters.")
2022-11-20 10:13:48 +00:00
print_out(f'Chapter selected: <blue>{chapter["description"]}</blue>\n')
start = chapter["positionMilliseconds"] // 1000
duration = chapter["durationMilliseconds"] // 1000
return start, start + duration
return None, None
def _choose_chapter_interactive(chapters):
print_out("\nChapters:")
for index, chapter in enumerate(chapters):
duration = utils.format_time(chapter["durationMilliseconds"] // 1000)
2022-11-20 10:13:48 +00:00
print_out(f'{index + 1}) <b>{chapter["description"]}</b> <dim>({duration})</dim>')
index = utils.read_int("Select a chapter", 1, len(chapters))
chapter = chapters[index - 1]
return chapter