Bump version

Save playlists to temp dir for debugging
Use m3u8 lib to parse playlists
2024-08-30 18:32:25 +00:00 · 2019-08-23 13:08:57 +02:00 · 2019-08-23 13:08:35 +02:00 · 2019-08-23 12:36:05 +02:00 · 2019-08-23 10:16:49 +02:00 · 2019-08-23 09:03:33 +02:00
7 changed files with 119 additions and 111 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,6 +1,13 @@
 Twitch Downloader change log
 ============================

+1.4.0 (2019-08-23)
+------------------
+
+* Fix usage of deprecated v3 API
+* Use m3u8 lib for parsing playlists
+* Add `--keep` option not preserve downloaded VODs
+
 1.3.1 (2019-08-13)
 ------------------

--- a/setup.py
+++ b/setup.py
@ -5,7 +5,7 @@ from setuptools import setup

 setup(
    name='twitch-dl',
-    version='1.3.1',
+    version='1.4.0',
    description='Twitch downloader',
    long_description="Quickly download videos from Twitch",
    author='Ivan Habunek',
@ -24,6 +24,7 @@ setup(
    packages=['twitchdl'],
    python_requires='>=3.5',
    install_requires=[
+        "m3u8>=0.3.12,<0.4",
        "requests>=2.13,<3.0",
    ],
    entry_points={
--- a/twitchdl/init.py
+++ b/twitchdl/init.py
@ -1,3 +1,3 @@
-__version__ = "1.3.1"
+__version__ = "1.4.0"

 CLIENT_ID = "miwy5zk23vh2he94san0bzj5ks1r0p"
--- a/twitchdl/commands.py
+++ b/twitchdl/commands.py
@ -1,12 +1,16 @@
+import m3u8
 import os
 import pathlib
 import re
+import requests
+import shutil
 import subprocess
 import tempfile

-from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
 from functools import partial
+from urllib.parse import urlparse

 from twitchdl import twitch
 from twitchdl.download import download_file
@ -73,8 +77,13 @@ def _print_video(video):


 def videos(channel_name, limit, offset, sort, **kwargs):
-    videos = twitch.get_channel_videos(channel_name, limit, offset, sort)
+    print_out("Looking up user...")
+    user = twitch.get_user(channel_name)
+    if not user:
+        raise ConsoleError("User {} not found.".format(channel_name))

+    print_out("Loading videos...")
+    videos = twitch.get_channel_videos(user["id"], limit, offset, sort)
    count = len(videos['videos'])
    if not count:
        print_out("No videos found")
@ -91,13 +100,14 @@ def videos(channel_name, limit, offset, sort, **kwargs):

 def _select_quality(playlists):
    print_out("\nAvailable qualities:")
-    for no, v in playlists.items():
-        print_out("{}) {}".format(no, v[0]))
+    for n, p in enumerate(playlists):
+        name = p.media[0].name if p.media else ""
+        resolution = "x".join(str(r) for r in p.stream_info.resolution)
+        print_out("{}) {} [{}]".format(n + 1, name, resolution))

-    keys = list(playlists.keys())
-    no = read_int("Choose quality", min=min(keys), max=max(keys), default=keys[0])
+    no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)

-    return playlists[no]
+    return playlists[no - 1]


 def _print_progress(futures):
@ -122,7 +132,7 @@ def _print_progress(futures):


 def _download_files(base_url, directory, filenames, max_workers):
-    urls = [base_url.format(f) for f in filenames]
+    urls = [base_url + f for f in filenames]
    paths = ["/".join([directory, f]) for f in filenames]
    partials = (partial(download_file, url, path) for url, path in zip(urls, paths))

@ -167,7 +177,7 @@ def _video_target_filename(video, format):
    return name + "." + format


-def parse_video_id(video_id):
+def _parse_video_id(video_id):
    """This can be either a integer ID or an URL to the video on twitch."""
    if re.search(r"^\d+$", video_id):
        return int(video_id)
@ -179,8 +189,33 @@ def parse_video_id(video_id):
    raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")


-def download(video_id, max_workers, format='mkv', start=None, end=None, **kwargs):
-    video_id = parse_video_id(video_id)
+def _get_files(playlist, start, end):
+    """Extract files for download from playlist."""
+    vod_start = 0
+    for segment in playlist.segments:
+        vod_end = vod_start + segment.duration
+
+        # `vod_end > start` is used here becuase it's better to download a bit
+        # more than a bit less, similar for the end condition
+        start_condition = not start or vod_end > start
+        end_condition = not end or vod_start < end
+
+        if start_condition and end_condition:
+            yield segment.uri
+
+        vod_start = vod_end
+
+
+def _crete_temp_dir(base_uri):
+    """Create a temp dir to store downloads if it doesn't exist."""
+    path = urlparse(base_uri).path
+    directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
+    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
+    return directory
+
+
+def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
+    video_id = _parse_video_id(video_id)

    if start and end and end <= start:
        raise ConsoleError("End time must be greater than start time")
@ -196,28 +231,36 @@ def download(video_id, max_workers, format='mkv', start=None, end=None, **kwargs

    print_out("Fetching playlists...")
    playlists = twitch.get_playlists(video_id, access_token)
-    quality, playlist_url = _select_quality(playlists)
+    parsed = m3u8.loads(playlists)
+    selected = _select_quality(parsed.playlists)

    print_out("\nFetching playlist...")
-    base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)
+    response = requests.get(selected.uri)
+    response.raise_for_status()
+    playlist = m3u8.loads(response.text)

-    if not filenames:
-        raise ConsoleError("No vods matched, check your start and end times")
+    base_uri = re.sub("/[^/]+$", "/", selected.uri)
+    target_dir = _crete_temp_dir(base_uri)
+    filenames = list(_get_files(playlist, start, end))

-    # Create a temp dir to store downloads if it doesn't exist
-    directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id, quality)
-    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
-    print_out("Download dir: {}".format(directory))
+    # Save playlists for debugging purposes
+    with open(target_dir + "playlists.m3u8", "w") as f:
+        f.write(playlists)
+    with open(target_dir + "playlist.m3u8", "w") as f:
+        f.write(response.text)

-    print_out("Downloading {} VODs using {} workers...".format(len(filenames), max_workers))
-    paths = _download_files(base_url, directory, filenames, max_workers)
+    print_out("\nDownloading {} VODs using {} workers to {}".format(
+        len(filenames), max_workers, target_dir))
+    _download_files(base_uri, target_dir, filenames, max_workers)

    print_out("\n\nJoining files...")
    target = _video_target_filename(video, format)
-    _join_vods(directory, paths, target)
+    _join_vods(target_dir, filenames, target)

-    print_out("\nDeleting vods...")
-    for path in paths:
-        os.unlink(path)
+    if keep:
+        print_out("\nTemporary files not deleted: {}".format(target_dir))
+    else:
+        print_out("\nDeleting temporary files...")
+        shutil.rmtree(target_dir)

-    print_out("\nDownloaded: {}".format(target))
+    print_out("Downloaded: {}".format(target))
--- a/twitchdl/console.py
+++ b/twitchdl/console.py
@ -89,6 +89,11 @@ COMMANDS = [
                "type": str,
                "default": "mkv",
            }),
+            (["-k", "--keep"], {
+                "help": "Don't delete downloaded VODs and playlists after merging.",
+                "action": "store_true",
+                "default": False,
+            }),
        ],
    ),
 ]
--- a/twitchdl/parse.py
+++ b/twitchdl/parse.py
@ -1,64 +0,0 @@
-import re
-
-from collections import OrderedDict
-from datetime import timedelta
-from twitchdl.exceptions import ConsoleError
-
-
-def parse_playlists(data):
-    media_pattern = re.compile(r'^#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="(?P<group>\w+)",NAME="(?P<name>\w+)"')
-
-    playlists = OrderedDict()
-    n = 1
-    name = None
-    for line in data.split():
-        match = re.match(media_pattern, line)
-        if match:
-            name = match.group('name')
-        elif line.startswith('http'):
-            playlists[n] = (name, line)
-            n += 1
-
-    return playlists
-
-
-def _get_files(playlist, start, end):
-    matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
-    vod_start = 0
-    for m in matches:
-        filename = m[2]
-        vod_duration = int(m[0])
-        vod_end = vod_start + vod_duration
-
-        # `vod_end > start` is used here becuase it's better to download a bit
-        # more than a bit less, similar for the end condition
-        start_condition = not start or vod_end > start
-        end_condition = not end or vod_start < end
-
-        if start_condition and end_condition:
-            yield filename
-
-        vod_start = vod_end
-
-
-def parse_playlist(url, playlist, start, end):
-    base_url = re.sub("/[^/]+$", "/{}", url)
-
-    match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
-    total_seconds = int(match.group(1))
-
-    # Now that video duration is known, validate start and end max values
-    if start and start > total_seconds:
-        raise ConsoleError("Start time {} greater than video duration {}".format(
-            timedelta(seconds=start),
-            timedelta(seconds=total_seconds)
-        ))
-
-    if end and end > total_seconds:
-        raise ConsoleError("End time {} greater than video duration {}".format(
-            timedelta(seconds=end),
-            timedelta(seconds=total_seconds)
-        ))
-
-    files = list(_get_files(playlist, start, end))
-    return base_url, files
--- a/twitchdl/twitch.py
+++ b/twitchdl/twitch.py
@ -1,12 +1,15 @@
+"""
+Twitch API access.
+"""
+
 import requests

 from twitchdl import CLIENT_ID
 from twitchdl.exceptions import ConsoleError
-from twitchdl.parse import parse_playlists, parse_playlist


-def authenticated_get(url, params={}):
-    headers = {'Client-ID': CLIENT_ID}
+def authenticated_get(url, params={}, headers={}):
+    headers['Client-ID'] = CLIENT_ID

    response = requests.get(url, params, headers=headers)
    if response.status_code == 400:
@ -18,22 +21,43 @@ def authenticated_get(url, params={}):
    return response


+def kraken_get(url, params={}, headers={}):
+    """
+    Add accept header required by kraken API v5.
+    see: https://discuss.dev.twitch.tv/t/change-in-access-to-deprecated-kraken-twitch-apis/22241
+    """
+    headers["Accept"] = "application/vnd.twitchtv.v5+json"
+    return authenticated_get(url, params, headers)
+
+
+def get_user(login):
+    """
+    https://dev.twitch.tv/docs/api/reference/#get-users
+    """
+    response = authenticated_get("https://api.twitch.tv/helix/users", {
+        "login": login
+    })
+
+    users = response.json()["data"]
+    return users[0] if users else None
+
+
 def get_video(video_id):
    """
    https://dev.twitch.tv/docs/v5/reference/videos#get-video
    """
    url = "https://api.twitch.tv/kraken/videos/%d" % video_id

-    return authenticated_get(url).json()
+    return kraken_get(url).json()


-def get_channel_videos(channel_name, limit, offset, sort):
+def get_channel_videos(channel_id, limit, offset, sort):
    """
    https://dev.twitch.tv/docs/v5/reference/channels#get-channel-videos
    """
-    url = "https://api.twitch.tv/kraken/channels/%s/videos" % channel_name
+    url = "https://api.twitch.tv/kraken/channels/{}/videos".format(channel_id)

-    return authenticated_get(url, {
+    return kraken_get(url, {
        "broadcast_type": "archive",
        "limit": limit,
        "offset": offset,
@ -48,6 +72,9 @@ def get_access_token(video_id):


 def get_playlists(video_id, access_token):
+    """
+    For a given video return a playlist which contains possible video qualities.
+    """
    url = "http://usher.twitch.tv/vod/{}".format(video_id)

    response = requests.get(url, params={
@ -57,15 +84,4 @@ def get_playlists(video_id, access_token):
        "player": "twitchweb",
    })
    response.raise_for_status()
-
-    data = response.content.decode('utf-8')
-
-    return parse_playlists(data)
-
-
-def get_playlist_urls(url, start, end):
-    response = requests.get(url)
-    response.raise_for_status()
-
-    data = response.content.decode('utf-8')
-    return parse_playlist(url, data, start, end)
+    return response.content.decode('utf-8')
Author	SHA1	Message	Date
Ivan Habunek	100aa53b84	Bump version	2019-08-23 13:08:57 +02:00
Ivan Habunek	e384f26444	Save playlists to temp dir for debugging	2019-08-23 13:08:35 +02:00
Ivan Habunek	000754af8c	Use m3u8 lib to parse playlists	2019-08-23 12:36:05 +02:00
Ivan Habunek	6813bb51b4	Add option not to delete downloaded VODs	2019-08-23 10:16:49 +02:00
Ivan Habunek	34b0592cf3	Fix usage of deprecated v3 API related #8	2019-08-23 09:03:33 +02:00