Allow limiting download by start and end time

2024-08-30 18:32:25 +00:00 · 2019-06-06 11:06:33 +02:00
parent 357379a6a1
commit e9bd706194
4 changed files with 79 additions and 10 deletions
--- a/twitchdl/commands.py
+++ b/twitchdl/commands.py
@ -170,9 +170,12 @@ def parse_video_id(video_id):
    raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")


-def download(video_id, max_workers, format='mkv', **kwargs):
+def download(video_id, max_workers, format='mkv', start=None, end=None, **kwargs):
    video_id = parse_video_id(video_id)

+    if start and end and end <= start:
+        raise ConsoleError("End time must be greater than start time")
+
    print_out("Looking up video...")
    video = twitch.get_video(video_id)

@ -187,14 +190,17 @@ def download(video_id, max_workers, format='mkv', **kwargs):
    quality, playlist_url = _select_quality(playlists)

    print_out("\nFetching playlist...")
-    base_url, filenames = twitch.get_playlist_urls(playlist_url)
+    base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)
+
+    if not filenames:
+        raise ConsoleError("No vods matched, check your start and end times")

    # Create a temp dir to store downloads if it doesn't exist
    directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id, quality)
    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
    print_out("Download dir: {}".format(directory))

-    print_out("Downloading VODs with {} workers...".format(max_workers))
+    print_out("Downloading {} VODs using {} workers...".format(len(filenames), max_workers))
    paths = _download_files(base_url, directory, filenames, max_workers)

    print_out("\n\nJoining files...")
--- a/twitchdl/console.py
+++ b/twitchdl/console.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-from argparse import ArgumentParser
+from argparse import ArgumentParser, ArgumentTypeError
 from collections import namedtuple

 from twitchdl.exceptions import ConsoleError
@ -12,6 +12,24 @@ Command = namedtuple("Command", ["name", "description", "arguments"])

 CLIENT_WEBSITE = 'https://github.com/ihabunek/twitch-dl'

+
+def time(value):
+    """Parse a time string (hh:mm or hh:mm:ss) to number of seconds."""
+    parts = [int(p) for p in value.split(":")]
+
+    if not 2 <= len(parts) <= 3:
+        raise ArgumentTypeError()
+
+    hours = parts[0]
+    minutes = parts[1]
+    seconds = parts[2] if len(parts) > 2 else 0
+
+    if hours < 0 or not (0 <= minutes <= 59) or not (0 <= seconds <= 59):
+        raise ArgumentTypeError()
+
+    return hours * 3600 + minutes * 60 + seconds
+
+
 COMMANDS = [
    Command(
        name="videos",
@ -36,6 +54,16 @@ COMMANDS = [
                "type": int,
                "default": 20,
            }),
+            (["-s", "--start"], {
+                "help": "Download video from this time (hh:mm or hh:mm:ss)",
+                "type": time,
+                "default": None,
+            }),
+            (["-e", "--end"], {
+                "help": "Download video up to this time (hh:mm or hh:mm:ss)",
+                "type": time,
+                "default": None,
+            }),
        ],
    ),
 ]
--- a/twitchdl/parse.py
+++ b/twitchdl/parse.py
@ -1,6 +1,8 @@
 import re

 from collections import OrderedDict
+from datetime import timedelta
+from twitchdl.exceptions import ConsoleError


 def parse_playlists(data):
@ -20,9 +22,43 @@ def parse_playlists(data):
    return playlists


-def parse_playlist(url, data):
+def _get_files(playlist, start, end):
+    matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
+    vod_start = 0
+    for m in matches:
+        filename = m[2]
+        vod_duration = int(m[0])
+        vod_end = vod_start + vod_duration
+
+        # `vod_end > start` is used here becuase it's better to download a bit
+        # more than a bit less, similar for the end condition
+        start_condition = not start or vod_end > start
+        end_condition = not end or vod_start < end
+
+        if start_condition and end_condition:
+            yield filename
+
+        vod_start = vod_end
+
+
+def parse_playlist(url, playlist, start, end):
    base_url = re.sub("/[^/]+$", "/{}", url)

-    filenames = [line for line in data.split() if re.match(r"\d+\.ts", line)]
+    match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
+    total_seconds = int(match.group(1))

-    return base_url, filenames
+    # Now that video duration is known, validate start and end max values
+    if start and start > total_seconds:
+        raise ConsoleError("Start time {} greater than video duration {}".format(
+            timedelta(seconds=start),
+            timedelta(seconds=total_seconds)
+        ))
+
+    if end and end > total_seconds:
+        raise ConsoleError("End time {} greater than video duration {}".format(
+            timedelta(seconds=end),
+            timedelta(seconds=total_seconds)
+        ))
+
+    files = list(_get_files(playlist, start, end))
+    return base_url, files
--- a/twitchdl/twitch.py
+++ b/twitchdl/twitch.py
@ -56,10 +56,9 @@ def get_playlists(video_id, access_token):
    return parse_playlists(data)


-def get_playlist_urls(url):
+def get_playlist_urls(url, start, end):
    response = requests.get(url)
    response.raise_for_status()

    data = response.content.decode('utf-8')
-
-    return parse_playlist(url, data)
+    return parse_playlist(url, data, start, end)