mirror of
https://github.com/ihabunek/twitch-dl
synced 2024-08-30 18:32:25 +00:00
Extract playlist parsing code
This commit is contained in:
parent
d6390bc7a2
commit
a7ad4d8dcc
@ -1,9 +1,10 @@
|
|||||||
import click
|
|
||||||
import logging
|
import logging
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
from twitchdl import __version__
|
from twitchdl import __version__
|
||||||
from twitchdl.entities import DownloadOptions
|
from twitchdl.entities import DownloadOptions
|
||||||
from twitchdl.twitch import ClipsPeriod, VideosSort, VideosType
|
from twitchdl.twitch import ClipsPeriod, VideosSort, VideosType
|
||||||
@ -258,7 +259,7 @@ def download(
|
|||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
output: str,
|
output: str,
|
||||||
quality: str | None,
|
quality: str | None,
|
||||||
rate_limit: str | None,
|
rate_limit: int | None,
|
||||||
start: int | None,
|
start: int | None,
|
||||||
max_workers: int,
|
max_workers: int,
|
||||||
):
|
):
|
||||||
|
@ -7,19 +7,24 @@ import subprocess
|
|||||||
import tempfile
|
import tempfile
|
||||||
from os import path
|
from os import path
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, OrderedDict
|
|
||||||
from urllib.parse import urlencode, urlparse
|
from urllib.parse import urlencode, urlparse
|
||||||
|
|
||||||
import click
|
import click
|
||||||
import httpx
|
import httpx
|
||||||
import m3u8
|
|
||||||
|
|
||||||
from twitchdl import twitch, utils
|
from twitchdl import twitch, utils
|
||||||
from twitchdl.download import download_file
|
from twitchdl.download import download_file
|
||||||
from twitchdl.entities import DownloadOptions
|
from twitchdl.entities import DownloadOptions
|
||||||
from twitchdl.exceptions import ConsoleError
|
from twitchdl.exceptions import ConsoleError
|
||||||
from twitchdl.http import download_all
|
from twitchdl.http import download_all
|
||||||
from twitchdl.output import blue, bold, dim, green, print_log, yellow
|
from twitchdl.output import blue, bold, green, print_log, yellow
|
||||||
|
from twitchdl.playlists import (
|
||||||
|
enumerate_vods,
|
||||||
|
load_m3u8,
|
||||||
|
make_join_playlist,
|
||||||
|
parse_playlists,
|
||||||
|
select_playlist,
|
||||||
|
)
|
||||||
from twitchdl.twitch import Chapter, Clip, ClipAccessToken, Video
|
from twitchdl.twitch import Chapter, Clip, ClipAccessToken, Video
|
||||||
|
|
||||||
|
|
||||||
@ -40,48 +45,7 @@ def download_one(video: str, args: DownloadOptions):
|
|||||||
raise ConsoleError(f"Invalid input: {video}")
|
raise ConsoleError(f"Invalid input: {video}")
|
||||||
|
|
||||||
|
|
||||||
def _parse_playlists(playlists_m3u8):
|
def _join_vods(playlist_path: str, target: str, overwrite: bool, video: Video):
|
||||||
playlists = m3u8.loads(playlists_m3u8)
|
|
||||||
|
|
||||||
for p in sorted(playlists.playlists, key=lambda p: p.stream_info.resolution is None):
|
|
||||||
if p.stream_info.resolution:
|
|
||||||
name = p.media[0].name
|
|
||||||
description = "x".join(str(r) for r in p.stream_info.resolution)
|
|
||||||
else:
|
|
||||||
name = p.media[0].group_id
|
|
||||||
description = None
|
|
||||||
|
|
||||||
yield name, description, p.uri
|
|
||||||
|
|
||||||
|
|
||||||
def _get_playlist_by_name(playlists, quality):
|
|
||||||
if quality == "source":
|
|
||||||
_, _, uri = playlists[0]
|
|
||||||
return uri
|
|
||||||
|
|
||||||
for name, _, uri in playlists:
|
|
||||||
if name == quality:
|
|
||||||
return uri
|
|
||||||
|
|
||||||
available = ", ".join([name for (name, _, _) in playlists])
|
|
||||||
msg = f"Quality '{quality}' not found. Available qualities are: {available}"
|
|
||||||
raise ConsoleError(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def _select_playlist_interactive(playlists):
|
|
||||||
click.echo("\nAvailable qualities:")
|
|
||||||
for n, (name, resolution, uri) in enumerate(playlists):
|
|
||||||
if resolution:
|
|
||||||
click.echo(f"{n + 1}) {bold(name)} {dim(f'({resolution})')}")
|
|
||||||
else:
|
|
||||||
click.echo(f"{n + 1}) {bold(name)}")
|
|
||||||
|
|
||||||
no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
|
|
||||||
_, _, uri = playlists[no - 1]
|
|
||||||
return uri
|
|
||||||
|
|
||||||
|
|
||||||
def _join_vods(playlist_path: str, target: str, overwrite: bool, video):
|
|
||||||
description = video["description"] or ""
|
description = video["description"] or ""
|
||||||
description = description.strip()
|
description = description.strip()
|
||||||
|
|
||||||
@ -183,26 +147,6 @@ def _clip_target_filename(clip: Clip, args: DownloadOptions):
|
|||||||
raise ConsoleError(f"Invalid key {e} used in --output. Supported keys are: {supported}")
|
raise ConsoleError(f"Invalid key {e} used in --output. Supported keys are: {supported}")
|
||||||
|
|
||||||
|
|
||||||
def _get_vod_paths(playlist, start: Optional[int], end: Optional[int]) -> List[str]:
|
|
||||||
"""Extract unique VOD paths for download from playlist."""
|
|
||||||
files = []
|
|
||||||
vod_start = 0
|
|
||||||
for segment in playlist.segments:
|
|
||||||
vod_end = vod_start + segment.duration
|
|
||||||
|
|
||||||
# `vod_end > start` is used here becuase it's better to download a bit
|
|
||||||
# more than a bit less, similar for the end condition
|
|
||||||
start_condition = not start or vod_end > start
|
|
||||||
end_condition = not end or vod_start < end
|
|
||||||
|
|
||||||
if start_condition and end_condition and segment.uri not in files:
|
|
||||||
files.append(segment.uri)
|
|
||||||
|
|
||||||
vod_start = vod_end
|
|
||||||
|
|
||||||
return files
|
|
||||||
|
|
||||||
|
|
||||||
def _crete_temp_dir(base_uri: str) -> str:
|
def _crete_temp_dir(base_uri: str) -> str:
|
||||||
"""Create a temp dir to store downloads if it doesn't exist."""
|
"""Create a temp dir to store downloads if it doesn't exist."""
|
||||||
path = urlparse(base_uri).path.lstrip("/")
|
path = urlparse(base_uri).path.lstrip("/")
|
||||||
@ -291,7 +235,7 @@ def _download_clip(slug: str, args: DownloadOptions) -> None:
|
|||||||
click.echo(f"Downloaded: {blue(target)}")
|
click.echo(f"Downloaded: {blue(target)}")
|
||||||
|
|
||||||
|
|
||||||
def _download_video(video_id, args: DownloadOptions) -> None:
|
def _download_video(video_id: str, args: DownloadOptions) -> None:
|
||||||
if args.start and args.end and args.end <= args.start:
|
if args.start and args.end and args.end <= args.start:
|
||||||
raise ConsoleError("End time must be greater than start time")
|
raise ConsoleError("End time must be greater than start time")
|
||||||
|
|
||||||
@ -301,9 +245,7 @@ def _download_video(video_id, args: DownloadOptions) -> None:
|
|||||||
if not video:
|
if not video:
|
||||||
raise ConsoleError(f"Video {video_id} not found")
|
raise ConsoleError(f"Video {video_id} not found")
|
||||||
|
|
||||||
title = video["title"]
|
click.echo(f"Found: {blue(video['title'])} by {yellow(video['creator']['displayName'])}")
|
||||||
user = video["creator"]["displayName"]
|
|
||||||
click.echo(f"Found: {blue(title)} by {yellow(user)}")
|
|
||||||
|
|
||||||
target = _video_target_filename(video, args)
|
target = _video_target_filename(video, args)
|
||||||
click.echo(f"Output: {blue(target)}")
|
click.echo(f"Output: {blue(target)}")
|
||||||
@ -321,50 +263,33 @@ def _download_video(video_id, args: DownloadOptions) -> None:
|
|||||||
access_token = twitch.get_access_token(video_id, auth_token=args.auth_token)
|
access_token = twitch.get_access_token(video_id, auth_token=args.auth_token)
|
||||||
|
|
||||||
print_log("Fetching playlists...")
|
print_log("Fetching playlists...")
|
||||||
playlists_m3u8 = twitch.get_playlists(video_id, access_token)
|
playlists_text = twitch.get_playlists(video_id, access_token)
|
||||||
playlists = list(_parse_playlists(playlists_m3u8))
|
playlists = parse_playlists(playlists_text)
|
||||||
playlist_uri = (
|
playlist = select_playlist(playlists, args.quality)
|
||||||
_get_playlist_by_name(playlists, args.quality)
|
|
||||||
if args.quality
|
|
||||||
else _select_playlist_interactive(playlists)
|
|
||||||
)
|
|
||||||
|
|
||||||
print_log("Fetching playlist...")
|
print_log("Fetching playlist...")
|
||||||
response = httpx.get(playlist_uri)
|
vods_text = http_get(playlist.url)
|
||||||
response.raise_for_status()
|
vods_m3u8 = load_m3u8(vods_text)
|
||||||
playlist = m3u8.loads(response.text)
|
vods = enumerate_vods(vods_m3u8, start, end)
|
||||||
|
|
||||||
base_uri = re.sub("/[^/]+$", "/", playlist_uri)
|
base_uri = re.sub("/[^/]+$", "/", playlist.url)
|
||||||
target_dir = _crete_temp_dir(base_uri)
|
target_dir = _crete_temp_dir(base_uri)
|
||||||
vod_paths = _get_vod_paths(playlist, start, end)
|
|
||||||
|
|
||||||
# Save playlists for debugging purposes
|
# Save playlists for debugging purposes
|
||||||
with open(path.join(target_dir, "playlists.m3u8"), "w") as f:
|
with open(path.join(target_dir, "playlists.m3u8"), "w") as f:
|
||||||
f.write(playlists_m3u8)
|
f.write(playlists_text)
|
||||||
with open(path.join(target_dir, "playlist.m3u8"), "w") as f:
|
with open(path.join(target_dir, "playlist.m3u8"), "w") as f:
|
||||||
f.write(response.text)
|
f.write(vods_text)
|
||||||
|
|
||||||
click.echo(
|
click.echo(f"\nDownloading {len(vods)} VODs using {args.max_workers} workers to {target_dir}")
|
||||||
f"\nDownloading {len(vod_paths)} VODs using {args.max_workers} workers to {target_dir}"
|
|
||||||
)
|
sources = [base_uri + vod.path for vod in vods]
|
||||||
sources = [base_uri + path for path in vod_paths]
|
targets = [os.path.join(target_dir, f"{vod.index:05d}.ts") for vod in vods]
|
||||||
targets = [os.path.join(target_dir, f"{k:05d}.ts") for k, _ in enumerate(vod_paths)]
|
|
||||||
asyncio.run(download_all(sources, targets, args.max_workers, rate_limit=args.rate_limit))
|
asyncio.run(download_all(sources, targets, args.max_workers, rate_limit=args.rate_limit))
|
||||||
|
|
||||||
# Make a modified playlist which references downloaded VODs
|
join_playlist = make_join_playlist(vods_m3u8, vods, targets)
|
||||||
# Keep only the downloaded segments and skip the rest
|
join_playlist_path = path.join(target_dir, "playlist_downloaded.m3u8")
|
||||||
org_segments = playlist.segments.copy()
|
join_playlist.dump(join_playlist_path) # type: ignore
|
||||||
|
|
||||||
path_map = OrderedDict(zip(vod_paths, targets))
|
|
||||||
playlist.segments.clear()
|
|
||||||
for segment in org_segments:
|
|
||||||
if segment.uri in path_map:
|
|
||||||
segment.uri = path_map[segment.uri]
|
|
||||||
playlist.segments.append(segment)
|
|
||||||
|
|
||||||
playlist_path = path.join(target_dir, "playlist_downloaded.m3u8")
|
|
||||||
playlist.dump(playlist_path)
|
|
||||||
|
|
||||||
click.echo()
|
click.echo()
|
||||||
|
|
||||||
if args.no_join:
|
if args.no_join:
|
||||||
@ -377,7 +302,7 @@ def _download_video(video_id, args: DownloadOptions) -> None:
|
|||||||
_concat_vods(targets, target)
|
_concat_vods(targets, target)
|
||||||
else:
|
else:
|
||||||
print_log("Joining files...")
|
print_log("Joining files...")
|
||||||
_join_vods(playlist_path, target, args.overwrite, video)
|
_join_vods(join_playlist_path, target, args.overwrite, video)
|
||||||
|
|
||||||
click.echo()
|
click.echo()
|
||||||
|
|
||||||
@ -390,6 +315,12 @@ def _download_video(video_id, args: DownloadOptions) -> None:
|
|||||||
click.echo(f"\nDownloaded: {green(target)}")
|
click.echo(f"\nDownloaded: {green(target)}")
|
||||||
|
|
||||||
|
|
||||||
|
def http_get(url: str) -> str:
|
||||||
|
response = httpx.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
def _determine_time_range(video_id: str, args: DownloadOptions):
|
def _determine_time_range(video_id: str, args: DownloadOptions):
|
||||||
if args.start or args.end:
|
if args.start or args.end:
|
||||||
return args.start, args.end
|
return args.start, args.end
|
||||||
|
@ -15,7 +15,7 @@ class DownloadOptions:
|
|||||||
overwrite: bool
|
overwrite: bool
|
||||||
output: str
|
output: str
|
||||||
quality: str | None
|
quality: str | None
|
||||||
rate_limit: str | None
|
rate_limit: int | None
|
||||||
start: int | None
|
start: int | None
|
||||||
max_workers: int
|
max_workers: int
|
||||||
|
|
||||||
|
127
twitchdl/playlists.py
Normal file
127
twitchdl/playlists.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
"""
|
||||||
|
Parse and manipulate m3u8 playlists.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Generator, OrderedDict
|
||||||
|
|
||||||
|
import click
|
||||||
|
import m3u8
|
||||||
|
|
||||||
|
from twitchdl import utils
|
||||||
|
from twitchdl.output import bold, dim
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Playlist:
|
||||||
|
name: str
|
||||||
|
resolution: str | None
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Vod:
|
||||||
|
index: int
|
||||||
|
"""Ordinal number of the VOD in the playlist"""
|
||||||
|
path: str
|
||||||
|
"""Path part of the VOD URL"""
|
||||||
|
duration: int
|
||||||
|
"""Segment duration in seconds"""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_playlists(playlists_m3u8: str):
|
||||||
|
def _parse(source: str) -> Generator[Playlist, None, None]:
|
||||||
|
document = load_m3u8(source)
|
||||||
|
|
||||||
|
for p in document.playlists:
|
||||||
|
if p.stream_info.resolution:
|
||||||
|
name = p.media[0].name
|
||||||
|
resolution = "x".join(str(r) for r in p.stream_info.resolution)
|
||||||
|
else:
|
||||||
|
name = p.media[0].group_id
|
||||||
|
resolution = None
|
||||||
|
|
||||||
|
yield Playlist(name, resolution, p.uri)
|
||||||
|
|
||||||
|
# Move audio to bottom, it has no resolution
|
||||||
|
return sorted(_parse(playlists_m3u8), key=lambda p: p.resolution is None)
|
||||||
|
|
||||||
|
|
||||||
|
def load_m3u8(playlist_m3u8: str) -> m3u8.M3U8:
|
||||||
|
return m3u8.loads(playlist_m3u8)
|
||||||
|
|
||||||
|
|
||||||
|
def enumerate_vods(document: m3u8.M3U8, start: int | None, end: int | None) -> list[Vod]:
|
||||||
|
"""Extract VODs for download from document."""
|
||||||
|
vods = []
|
||||||
|
vod_start = 0
|
||||||
|
|
||||||
|
for index, segment in enumerate(document.segments):
|
||||||
|
vod_end = vod_start + segment.duration
|
||||||
|
|
||||||
|
# `vod_end > start` is used here becuase it's better to download a bit
|
||||||
|
# more than a bit less, similar for the end condition
|
||||||
|
start_condition = not start or vod_end > start
|
||||||
|
end_condition = not end or vod_start < end
|
||||||
|
|
||||||
|
if start_condition and end_condition:
|
||||||
|
vods.append(Vod(index, segment.uri, segment.duration))
|
||||||
|
|
||||||
|
vod_start = vod_end
|
||||||
|
|
||||||
|
return vods
|
||||||
|
|
||||||
|
|
||||||
|
def make_join_playlist(
|
||||||
|
playlist: m3u8.M3U8,
|
||||||
|
vods: list[Vod],
|
||||||
|
targets: list[str],
|
||||||
|
) -> m3u8.Playlist:
|
||||||
|
"""
|
||||||
|
Make a modified playlist which references downloaded VODs
|
||||||
|
Keep only the downloaded segments and skip the rest
|
||||||
|
"""
|
||||||
|
org_segments = playlist.segments.copy()
|
||||||
|
|
||||||
|
path_map = OrderedDict(zip([v.path for v in vods], targets))
|
||||||
|
playlist.segments.clear()
|
||||||
|
for segment in org_segments:
|
||||||
|
if segment.uri in path_map:
|
||||||
|
segment.uri = path_map[segment.uri]
|
||||||
|
playlist.segments.append(segment)
|
||||||
|
|
||||||
|
return playlist
|
||||||
|
|
||||||
|
|
||||||
|
def select_playlist(playlists: list[Playlist], quality: str | None) -> Playlist:
|
||||||
|
return (
|
||||||
|
select_playlist_by_name(playlists, quality)
|
||||||
|
if quality is not None
|
||||||
|
else select_playlist_interactive(playlists)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def select_playlist_by_name(playlists: list[Playlist], quality: str) -> Playlist:
|
||||||
|
if quality == "source":
|
||||||
|
return playlists[0]
|
||||||
|
|
||||||
|
for playlist in playlists:
|
||||||
|
if playlist.name == quality:
|
||||||
|
return playlist
|
||||||
|
|
||||||
|
available = ", ".join([p.name for p in playlists])
|
||||||
|
msg = f"Quality '{quality}' not found. Available qualities are: {available}"
|
||||||
|
raise click.ClickException(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def select_playlist_interactive(playlists: list[Playlist]) -> Playlist:
|
||||||
|
click.echo("\nAvailable qualities:")
|
||||||
|
for n, playlist in enumerate(playlists):
|
||||||
|
if playlist.resolution:
|
||||||
|
click.echo(f"{n + 1}) {bold(playlist.name)} {dim(f'({playlist.resolution})')}")
|
||||||
|
else:
|
||||||
|
click.echo(f"{n + 1}) {bold(playlist.name)}")
|
||||||
|
|
||||||
|
no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
|
||||||
|
playlist = playlists[no - 1]
|
||||||
|
return playlist
|
@ -403,7 +403,7 @@ def get_access_token(video_id: str, auth_token: str | None = None) -> AccessToke
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def get_playlists(video_id: str, access_token: AccessToken):
|
def get_playlists(video_id: str, access_token: AccessToken) -> str:
|
||||||
"""
|
"""
|
||||||
For a given video return a playlist which contains possible video qualities.
|
For a given video return a playlist which contains possible video qualities.
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user