Use m3u8 lib to parse playlists

2024-08-30 18:32:25 +00:00 · 2019-08-23 12:36:05 +02:00
parent 6813bb51b4
commit 000754af8c
4 changed files with 61 additions and 102 deletions
--- a/setup.py
+++ b/setup.py
@ -24,6 +24,7 @@ setup(
    packages=['twitchdl'],
    python_requires='>=3.5',
    install_requires=[
        "m3u8>=0.3.12,<0.4",
        "requests>=2.13,<3.0",
    ],
    entry_points={
--- a/twitchdl/commands.py
+++ b/twitchdl/commands.py
@ -1,12 +1,16 @@
 import m3u8
 import os
 import pathlib
 import re
 import requests
 import shutil
 import subprocess
 import tempfile
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from functools import partial
 from urllib.parse import urlparse
 from twitchdl import twitch
 from twitchdl.download import download_file
@ -96,13 +100,14 @@ def videos(channel_name, limit, offset, sort, **kwargs):
 def _select_quality(playlists):
    print_out("\nAvailable qualities:")
-    for no, v in playlists.items():
+    for n, p in enumerate(playlists):
-        print_out("{}) {}".format(no, v[0]))
+        name = p.media[0].name if p.media else ""
        resolution = "x".join(str(r) for r in p.stream_info.resolution)
        print_out("{}) {} [{}]".format(n + 1, name, resolution))
-    keys = list(playlists.keys())
+    no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
    no = read_int("Choose quality", min=min(keys), max=max(keys), default=keys[0])
-    return playlists[no]
+    return playlists[no - 1]
 def _print_progress(futures):
@ -127,7 +132,7 @@ def _print_progress(futures):
 def _download_files(base_url, directory, filenames, max_workers):
-    urls = [base_url.format(f) for f in filenames]
+    urls = [base_url + f for f in filenames]
    paths = ["/".join([directory, f]) for f in filenames]
    partials = (partial(download_file, url, path) for url, path in zip(urls, paths))
@ -172,7 +177,7 @@ def _video_target_filename(video, format):
    return name + "." + format
-def parse_video_id(video_id):
+def _parse_video_id(video_id):
    """This can be either a integer ID or an URL to the video on twitch."""
    if re.search(r"^\d+$", video_id):
        return int(video_id)
@ -184,8 +189,33 @@ def parse_video_id(video_id):
    raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")
 def _get_files(playlist, start, end):
    """Extract files for download from playlist."""
    vod_start = 0
    for segment in playlist.segments:
        vod_end = vod_start + segment.duration
        # `vod_end > start` is used here becuase it's better to download a bit
        # more than a bit less, similar for the end condition
        start_condition = not start or vod_end > start
        end_condition = not end or vod_start < end
        if start_condition and end_condition:
            yield segment.uri
        vod_start = vod_end
 def _crete_temp_dir(base_uri):
    """Create a temp dir to store downloads if it doesn't exist."""
    path = urlparse(base_uri).path
    directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    return directory
 def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
-    video_id = parse_video_id(video_id)
+    video_id = _parse_video_id(video_id)
    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")
@ -199,33 +229,34 @@ def download(video_id, max_workers, format='mkv', start=None, end=None, keep=Fal
    print_out("Fetching access token...")
    access_token = twitch.get_access_token(video_id)
    # TODO: save playlists for debugging purposes
    print_out("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
-    quality, playlist_url = _select_quality(playlists)
+    playlists = m3u8.loads(playlists)
    selected = _select_quality(playlists.playlists)
    print_out("\nFetching playlist...")
-    base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)
+    response = requests.get(selected.uri)
    response.raise_for_status()
    playlist = m3u8.loads(response.text)
-    if not filenames:
+    base_uri = re.sub("/[^/]+$", "/", selected.uri)
-        raise ConsoleError("No vods matched, check your start and end times")
+    target_dir = _crete_temp_dir(base_uri)
    filenames = list(_get_files(playlist, start, end))
-    # Create a temp dir to store downloads if it doesn't exist
+    print_out("\nDownloading {} VODs using {} workers to {}".format(
-    directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id, quality)
+        len(filenames), max_workers, target_dir))
-    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
+    _download_files(base_uri, target_dir, filenames, max_workers)
    print_out("Download dir: {}".format(directory))
    print_out("Downloading {} VODs using {} workers...".format(len(filenames), max_workers))
    paths = _download_files(base_url, directory, filenames, max_workers)
    print_out("\n\nJoining files...")
    target = _video_target_filename(video, format)
-    _join_vods(directory, paths, target)
+    _join_vods(target_dir, filenames, target)
    if keep:
-        print_out("\nTemporary files not deleted: {}".format(directory))
+        print_out("\nTemporary files not deleted: {}".format(target_dir))
    else:
-        print_out("\nDeleting vods...")
+        print_out("\nDeleting temporary files...")
-        for path in paths:
+        shutil.rmtree(target_dir)
            os.unlink(path)
    print_out("Downloaded: {}".format(target))
--- a/twitchdl/parse.py
+++ b/twitchdl/parse.py
@ -1,64 +0,0 @@
 import re
 from collections import OrderedDict
 from datetime import timedelta
 from twitchdl.exceptions import ConsoleError
 def parse_playlists(data):
    media_pattern = re.compile(r'^#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="(?P<group>\w+)",NAME="(?P<name>\w+)"')
    playlists = OrderedDict()
    n = 1
    name = None
    for line in data.split():
        match = re.match(media_pattern, line)
        if match:
            name = match.group('name')
        elif line.startswith('http'):
            playlists[n] = (name, line)
            n += 1
    return playlists
 def _get_files(playlist, start, end):
    matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
    vod_start = 0
    for m in matches:
        filename = m[2]
        vod_duration = int(m[0])
        vod_end = vod_start + vod_duration
        # `vod_end > start` is used here becuase it's better to download a bit
        # more than a bit less, similar for the end condition
        start_condition = not start or vod_end > start
        end_condition = not end or vod_start < end
        if start_condition and end_condition:
            yield filename
        vod_start = vod_end
 def parse_playlist(url, playlist, start, end):
    base_url = re.sub("/[^/]+$", "/{}", url)
    match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
    total_seconds = int(match.group(1))
    # Now that video duration is known, validate start and end max values
    if start and start > total_seconds:
        raise ConsoleError("Start time {} greater than video duration {}".format(
            timedelta(seconds=start),
            timedelta(seconds=total_seconds)
        ))
    if end and end > total_seconds:
        raise ConsoleError("End time {} greater than video duration {}".format(
            timedelta(seconds=end),
            timedelta(seconds=total_seconds)
        ))
    files = list(_get_files(playlist, start, end))
    return base_url, files
--- a/twitchdl/twitch.py
+++ b/twitchdl/twitch.py
@ -6,7 +6,6 @@ import requests
 from twitchdl import CLIENT_ID
 from twitchdl.exceptions import ConsoleError
 from twitchdl.parse import parse_playlists, parse_playlist
 def authenticated_get(url, params={}, headers={}):
@ -73,6 +72,9 @@ def get_access_token(video_id):
 def get_playlists(video_id, access_token):
    """
    For a given video return a playlist which contains possible video qualities.
    """
    url = "http://usher.twitch.tv/vod/{}".format(video_id)
    response = requests.get(url, params={
@ -82,15 +84,4 @@ def get_playlists(video_id, access_token):
        "player": "twitchweb",
    })
    response.raise_for_status()
-
+    return response.content.decode('utf-8')
    data = response.content.decode('utf-8')
    return parse_playlists(data)
 def get_playlist_urls(url, start, end):
    response = requests.get(url)
    response.raise_for_status()
    data = response.content.decode('utf-8')
    return parse_playlist(url, data, start, end)