From a7ad4d8dcc9e5d56469007c55900a8a5064467ae Mon Sep 17 00:00:00 2001 From: Ivan Habunek Date: Sat, 6 Apr 2024 10:15:26 +0200 Subject: [PATCH] Extract playlist parsing code --- twitchdl/cli.py | 5 +- twitchdl/commands/download.py | 137 +++++++++------------------------- twitchdl/entities.py | 2 +- twitchdl/playlists.py | 127 +++++++++++++++++++++++++++++++ twitchdl/twitch.py | 2 +- 5 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 twitchdl/playlists.py diff --git a/twitchdl/cli.py b/twitchdl/cli.py index 25c0b30..2745cb3 100644 --- a/twitchdl/cli.py +++ b/twitchdl/cli.py @@ -1,9 +1,10 @@ -import click import logging import platform import re import sys +import click + from twitchdl import __version__ from twitchdl.entities import DownloadOptions from twitchdl.twitch import ClipsPeriod, VideosSort, VideosType @@ -258,7 +259,7 @@ def download( overwrite: bool, output: str, quality: str | None, - rate_limit: str | None, + rate_limit: int | None, start: int | None, max_workers: int, ): diff --git a/twitchdl/commands/download.py b/twitchdl/commands/download.py index e70f5f9..18e4ae4 100644 --- a/twitchdl/commands/download.py +++ b/twitchdl/commands/download.py @@ -7,19 +7,24 @@ import subprocess import tempfile from os import path from pathlib import Path -from typing import List, Optional, OrderedDict from urllib.parse import urlencode, urlparse import click import httpx -import m3u8 from twitchdl import twitch, utils from twitchdl.download import download_file from twitchdl.entities import DownloadOptions from twitchdl.exceptions import ConsoleError from twitchdl.http import download_all -from twitchdl.output import blue, bold, dim, green, print_log, yellow +from twitchdl.output import blue, bold, green, print_log, yellow +from twitchdl.playlists import ( + enumerate_vods, + load_m3u8, + make_join_playlist, + parse_playlists, + select_playlist, +) from twitchdl.twitch import Chapter, Clip, ClipAccessToken, Video @@ -40,48 +45,7 @@ def download_one(video: str, args: DownloadOptions): raise ConsoleError(f"Invalid input: {video}") -def _parse_playlists(playlists_m3u8): - playlists = m3u8.loads(playlists_m3u8) - - for p in sorted(playlists.playlists, key=lambda p: p.stream_info.resolution is None): - if p.stream_info.resolution: - name = p.media[0].name - description = "x".join(str(r) for r in p.stream_info.resolution) - else: - name = p.media[0].group_id - description = None - - yield name, description, p.uri - - -def _get_playlist_by_name(playlists, quality): - if quality == "source": - _, _, uri = playlists[0] - return uri - - for name, _, uri in playlists: - if name == quality: - return uri - - available = ", ".join([name for (name, _, _) in playlists]) - msg = f"Quality '{quality}' not found. Available qualities are: {available}" - raise ConsoleError(msg) - - -def _select_playlist_interactive(playlists): - click.echo("\nAvailable qualities:") - for n, (name, resolution, uri) in enumerate(playlists): - if resolution: - click.echo(f"{n + 1}) {bold(name)} {dim(f'({resolution})')}") - else: - click.echo(f"{n + 1}) {bold(name)}") - - no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1) - _, _, uri = playlists[no - 1] - return uri - - -def _join_vods(playlist_path: str, target: str, overwrite: bool, video): +def _join_vods(playlist_path: str, target: str, overwrite: bool, video: Video): description = video["description"] or "" description = description.strip() @@ -183,26 +147,6 @@ def _clip_target_filename(clip: Clip, args: DownloadOptions): raise ConsoleError(f"Invalid key {e} used in --output. Supported keys are: {supported}") -def _get_vod_paths(playlist, start: Optional[int], end: Optional[int]) -> List[str]: - """Extract unique VOD paths for download from playlist.""" - files = [] - vod_start = 0 - for segment in playlist.segments: - vod_end = vod_start + segment.duration - - # `vod_end > start` is used here becuase it's better to download a bit - # more than a bit less, similar for the end condition - start_condition = not start or vod_end > start - end_condition = not end or vod_start < end - - if start_condition and end_condition and segment.uri not in files: - files.append(segment.uri) - - vod_start = vod_end - - return files - - def _crete_temp_dir(base_uri: str) -> str: """Create a temp dir to store downloads if it doesn't exist.""" path = urlparse(base_uri).path.lstrip("/") @@ -291,7 +235,7 @@ def _download_clip(slug: str, args: DownloadOptions) -> None: click.echo(f"Downloaded: {blue(target)}") -def _download_video(video_id, args: DownloadOptions) -> None: +def _download_video(video_id: str, args: DownloadOptions) -> None: if args.start and args.end and args.end <= args.start: raise ConsoleError("End time must be greater than start time") @@ -301,9 +245,7 @@ def _download_video(video_id, args: DownloadOptions) -> None: if not video: raise ConsoleError(f"Video {video_id} not found") - title = video["title"] - user = video["creator"]["displayName"] - click.echo(f"Found: {blue(title)} by {yellow(user)}") + click.echo(f"Found: {blue(video['title'])} by {yellow(video['creator']['displayName'])}") target = _video_target_filename(video, args) click.echo(f"Output: {blue(target)}") @@ -321,50 +263,33 @@ def _download_video(video_id, args: DownloadOptions) -> None: access_token = twitch.get_access_token(video_id, auth_token=args.auth_token) print_log("Fetching playlists...") - playlists_m3u8 = twitch.get_playlists(video_id, access_token) - playlists = list(_parse_playlists(playlists_m3u8)) - playlist_uri = ( - _get_playlist_by_name(playlists, args.quality) - if args.quality - else _select_playlist_interactive(playlists) - ) + playlists_text = twitch.get_playlists(video_id, access_token) + playlists = parse_playlists(playlists_text) + playlist = select_playlist(playlists, args.quality) print_log("Fetching playlist...") - response = httpx.get(playlist_uri) - response.raise_for_status() - playlist = m3u8.loads(response.text) + vods_text = http_get(playlist.url) + vods_m3u8 = load_m3u8(vods_text) + vods = enumerate_vods(vods_m3u8, start, end) - base_uri = re.sub("/[^/]+$", "/", playlist_uri) + base_uri = re.sub("/[^/]+$", "/", playlist.url) target_dir = _crete_temp_dir(base_uri) - vod_paths = _get_vod_paths(playlist, start, end) # Save playlists for debugging purposes with open(path.join(target_dir, "playlists.m3u8"), "w") as f: - f.write(playlists_m3u8) + f.write(playlists_text) with open(path.join(target_dir, "playlist.m3u8"), "w") as f: - f.write(response.text) + f.write(vods_text) - click.echo( - f"\nDownloading {len(vod_paths)} VODs using {args.max_workers} workers to {target_dir}" - ) - sources = [base_uri + path for path in vod_paths] - targets = [os.path.join(target_dir, f"{k:05d}.ts") for k, _ in enumerate(vod_paths)] + click.echo(f"\nDownloading {len(vods)} VODs using {args.max_workers} workers to {target_dir}") + + sources = [base_uri + vod.path for vod in vods] + targets = [os.path.join(target_dir, f"{vod.index:05d}.ts") for vod in vods] asyncio.run(download_all(sources, targets, args.max_workers, rate_limit=args.rate_limit)) - # Make a modified playlist which references downloaded VODs - # Keep only the downloaded segments and skip the rest - org_segments = playlist.segments.copy() - - path_map = OrderedDict(zip(vod_paths, targets)) - playlist.segments.clear() - for segment in org_segments: - if segment.uri in path_map: - segment.uri = path_map[segment.uri] - playlist.segments.append(segment) - - playlist_path = path.join(target_dir, "playlist_downloaded.m3u8") - playlist.dump(playlist_path) - + join_playlist = make_join_playlist(vods_m3u8, vods, targets) + join_playlist_path = path.join(target_dir, "playlist_downloaded.m3u8") + join_playlist.dump(join_playlist_path) # type: ignore click.echo() if args.no_join: @@ -377,7 +302,7 @@ def _download_video(video_id, args: DownloadOptions) -> None: _concat_vods(targets, target) else: print_log("Joining files...") - _join_vods(playlist_path, target, args.overwrite, video) + _join_vods(join_playlist_path, target, args.overwrite, video) click.echo() @@ -390,6 +315,12 @@ def _download_video(video_id, args: DownloadOptions) -> None: click.echo(f"\nDownloaded: {green(target)}") +def http_get(url: str) -> str: + response = httpx.get(url) + response.raise_for_status() + return response.text + + def _determine_time_range(video_id: str, args: DownloadOptions): if args.start or args.end: return args.start, args.end diff --git a/twitchdl/entities.py b/twitchdl/entities.py index 78c744e..97ac7b8 100644 --- a/twitchdl/entities.py +++ b/twitchdl/entities.py @@ -15,7 +15,7 @@ class DownloadOptions: overwrite: bool output: str quality: str | None - rate_limit: str | None + rate_limit: int | None start: int | None max_workers: int diff --git a/twitchdl/playlists.py b/twitchdl/playlists.py new file mode 100644 index 0000000..76aab78 --- /dev/null +++ b/twitchdl/playlists.py @@ -0,0 +1,127 @@ +""" +Parse and manipulate m3u8 playlists. +""" + +from dataclasses import dataclass +from typing import Generator, OrderedDict + +import click +import m3u8 + +from twitchdl import utils +from twitchdl.output import bold, dim + + +@dataclass +class Playlist: + name: str + resolution: str | None + url: str + + +@dataclass +class Vod: + index: int + """Ordinal number of the VOD in the playlist""" + path: str + """Path part of the VOD URL""" + duration: int + """Segment duration in seconds""" + + +def parse_playlists(playlists_m3u8: str): + def _parse(source: str) -> Generator[Playlist, None, None]: + document = load_m3u8(source) + + for p in document.playlists: + if p.stream_info.resolution: + name = p.media[0].name + resolution = "x".join(str(r) for r in p.stream_info.resolution) + else: + name = p.media[0].group_id + resolution = None + + yield Playlist(name, resolution, p.uri) + + # Move audio to bottom, it has no resolution + return sorted(_parse(playlists_m3u8), key=lambda p: p.resolution is None) + + +def load_m3u8(playlist_m3u8: str) -> m3u8.M3U8: + return m3u8.loads(playlist_m3u8) + + +def enumerate_vods(document: m3u8.M3U8, start: int | None, end: int | None) -> list[Vod]: + """Extract VODs for download from document.""" + vods = [] + vod_start = 0 + + for index, segment in enumerate(document.segments): + vod_end = vod_start + segment.duration + + # `vod_end > start` is used here becuase it's better to download a bit + # more than a bit less, similar for the end condition + start_condition = not start or vod_end > start + end_condition = not end or vod_start < end + + if start_condition and end_condition: + vods.append(Vod(index, segment.uri, segment.duration)) + + vod_start = vod_end + + return vods + + +def make_join_playlist( + playlist: m3u8.M3U8, + vods: list[Vod], + targets: list[str], +) -> m3u8.Playlist: + """ + Make a modified playlist which references downloaded VODs + Keep only the downloaded segments and skip the rest + """ + org_segments = playlist.segments.copy() + + path_map = OrderedDict(zip([v.path for v in vods], targets)) + playlist.segments.clear() + for segment in org_segments: + if segment.uri in path_map: + segment.uri = path_map[segment.uri] + playlist.segments.append(segment) + + return playlist + + +def select_playlist(playlists: list[Playlist], quality: str | None) -> Playlist: + return ( + select_playlist_by_name(playlists, quality) + if quality is not None + else select_playlist_interactive(playlists) + ) + + +def select_playlist_by_name(playlists: list[Playlist], quality: str) -> Playlist: + if quality == "source": + return playlists[0] + + for playlist in playlists: + if playlist.name == quality: + return playlist + + available = ", ".join([p.name for p in playlists]) + msg = f"Quality '{quality}' not found. Available qualities are: {available}" + raise click.ClickException(msg) + + +def select_playlist_interactive(playlists: list[Playlist]) -> Playlist: + click.echo("\nAvailable qualities:") + for n, playlist in enumerate(playlists): + if playlist.resolution: + click.echo(f"{n + 1}) {bold(playlist.name)} {dim(f'({playlist.resolution})')}") + else: + click.echo(f"{n + 1}) {bold(playlist.name)}") + + no = utils.read_int("Choose quality", min=1, max=len(playlists) + 1, default=1) + playlist = playlists[no - 1] + return playlist diff --git a/twitchdl/twitch.py b/twitchdl/twitch.py index fbeef5e..be50c29 100644 --- a/twitchdl/twitch.py +++ b/twitchdl/twitch.py @@ -403,7 +403,7 @@ def get_access_token(video_id: str, auth_token: str | None = None) -> AccessToke raise -def get_playlists(video_id: str, access_token: AccessToken): +def get_playlists(video_id: str, access_token: AccessToken) -> str: """ For a given video return a playlist which contains possible video qualities. """