From bf2a4558f4b75eca2407f9a3047360d60f1755e2 Mon Sep 17 00:00:00 2001 From: Ivan Habunek Date: Thu, 3 Sep 2020 10:33:13 +0200 Subject: [PATCH] Improve VOD joining logic Instead of creating a file list, create a modified playlist which references the downloaded files, and give this as input to ffmpeg. Since ffmpeg handles M3U8 playlists, this means options such as `EXT-X-BYTERANGE` are supported. issue #35 --- twitchdl/commands.py | 36 +++++++++++++++++++----------------- twitchdl/download.py | 15 ++++++++++----- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/twitchdl/commands.py b/twitchdl/commands.py index 0cc0d5b..134f3d0 100644 --- a/twitchdl/commands.py +++ b/twitchdl/commands.py @@ -103,17 +103,10 @@ def _select_playlist_interactive(playlists): return uri -def _join_vods(directory, file_paths, target): - input_path = "{}files.txt".format(directory) - - with open(input_path, 'w') as f: - for path in file_paths: - f.write('file {}\n'.format(os.path.basename(path))) - +def _join_vods(playlist_path, target): command = [ "ffmpeg", - "-f", "concat", - "-i", input_path, + "-i", playlist_path, "-c", "copy", target, "-stats", @@ -140,8 +133,9 @@ def _video_target_filename(video, format): return name + "." + format -def _get_files(playlist, start, end): - """Extract files for download from playlist.""" +def _get_vod_paths(playlist, start, end): + """Extract unique VOD paths for download from playlist.""" + files = [] vod_start = 0 for segment in playlist.segments: vod_end = vod_start + segment.duration @@ -151,11 +145,13 @@ def _get_files(playlist, start, end): start_condition = not start or vod_end > start end_condition = not end or vod_start < end - if start_condition and end_condition: - yield segment.uri + if start_condition and end_condition and segment.uri not in files: + files.append(segment.uri) vod_start = vod_end + return files + def _crete_temp_dir(base_uri): """Create a temp dir to store downloads if it doesn't exist.""" @@ -275,7 +271,7 @@ def _download_video(video_id, args): base_uri = re.sub("/[^/]+$", "/", playlist_uri) target_dir = _crete_temp_dir(base_uri) - filenames = list(_get_files(playlist, args.start, args.end)) + vod_paths = _get_vod_paths(playlist, args.start, args.end) # Save playlists for debugging purposes with open(target_dir + "playlists.m3u8", "w") as f: @@ -284,12 +280,18 @@ def _download_video(video_id, args): f.write(response.text) print_out("\nDownloading {} VODs using {} workers to {}".format( - len(filenames), args.max_workers, target_dir)) - file_paths = download_files(base_uri, target_dir, filenames, args.max_workers) + len(vod_paths), args.max_workers, target_dir)) + path_map = download_files(base_uri, target_dir, vod_paths, args.max_workers) + + # Make a modified playlist which references downloaded VODs + for segment in playlist.segments: + segment.uri = path_map[segment.uri] + playlist_path = target_dir + "playlist_downloaded.m3u8" + playlist.dump(playlist_path) print_out("\n\nJoining files...") target = _video_target_filename(video, args.format) - _join_vods(target_dir, file_paths, target) + _join_vods(playlist_path, target) if args.keep: print_out("\nTemporary files not deleted: {}".format(target_dir)) diff --git a/twitchdl/download.py b/twitchdl/download.py index 37b5166..1d54e96 100644 --- a/twitchdl/download.py +++ b/twitchdl/download.py @@ -1,6 +1,7 @@ import os import requests +from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from functools import partial @@ -75,13 +76,17 @@ def _print_progress(futures): print_out("\r" + msg.ljust(max_msg_size), end="") -def download_files(base_url, directory, filenames, max_workers): - urls = [base_url + f for f in filenames] - paths = ["{}{:05d}.vod".format(directory, k) for k, _ in enumerate(filenames)] - partials = (partial(download_file, url, path) for url, path in zip(urls, paths)) +def download_files(base_url, target_dir, vod_paths, max_workers): + """ + Downloads a list of VODs defined by a common `base_url` and a list of + `vod_paths`, returning a dict which maps the paths to the downloaded files. + """ + urls = [base_url + path for path in vod_paths] + targets = ["{}{:05d}.ts".format(target_dir, k) for k, _ in enumerate(vod_paths)] + partials = (partial(download_file, url, path) for url, path in zip(urls, targets)) with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [executor.submit(fn) for fn in partials] _print_progress(futures) - return paths + return OrderedDict(zip(vod_paths, targets))