mirror of
https://github.com/ihabunek/twitch-dl
synced 2024-08-30 18:32:25 +00:00
Use m3u8 lib to parse playlists
This commit is contained in:
1
setup.py
1
setup.py
@ -24,6 +24,7 @@ setup(
|
|||||||
packages=['twitchdl'],
|
packages=['twitchdl'],
|
||||||
python_requires='>=3.5',
|
python_requires='>=3.5',
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
"m3u8>=0.3.12,<0.4",
|
||||||
"requests>=2.13,<3.0",
|
"requests>=2.13,<3.0",
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
|
@ -1,12 +1,16 @@
|
|||||||
|
import m3u8
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
|
import requests
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from datetime import datetime
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from twitchdl import twitch
|
from twitchdl import twitch
|
||||||
from twitchdl.download import download_file
|
from twitchdl.download import download_file
|
||||||
@ -96,13 +100,14 @@ def videos(channel_name, limit, offset, sort, **kwargs):
|
|||||||
|
|
||||||
def _select_quality(playlists):
|
def _select_quality(playlists):
|
||||||
print_out("\nAvailable qualities:")
|
print_out("\nAvailable qualities:")
|
||||||
for no, v in playlists.items():
|
for n, p in enumerate(playlists):
|
||||||
print_out("{}) {}".format(no, v[0]))
|
name = p.media[0].name if p.media else ""
|
||||||
|
resolution = "x".join(str(r) for r in p.stream_info.resolution)
|
||||||
|
print_out("{}) {} [{}]".format(n + 1, name, resolution))
|
||||||
|
|
||||||
keys = list(playlists.keys())
|
no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
|
||||||
no = read_int("Choose quality", min=min(keys), max=max(keys), default=keys[0])
|
|
||||||
|
|
||||||
return playlists[no]
|
return playlists[no - 1]
|
||||||
|
|
||||||
|
|
||||||
def _print_progress(futures):
|
def _print_progress(futures):
|
||||||
@ -127,7 +132,7 @@ def _print_progress(futures):
|
|||||||
|
|
||||||
|
|
||||||
def _download_files(base_url, directory, filenames, max_workers):
|
def _download_files(base_url, directory, filenames, max_workers):
|
||||||
urls = [base_url.format(f) for f in filenames]
|
urls = [base_url + f for f in filenames]
|
||||||
paths = ["/".join([directory, f]) for f in filenames]
|
paths = ["/".join([directory, f]) for f in filenames]
|
||||||
partials = (partial(download_file, url, path) for url, path in zip(urls, paths))
|
partials = (partial(download_file, url, path) for url, path in zip(urls, paths))
|
||||||
|
|
||||||
@ -172,7 +177,7 @@ def _video_target_filename(video, format):
|
|||||||
return name + "." + format
|
return name + "." + format
|
||||||
|
|
||||||
|
|
||||||
def parse_video_id(video_id):
|
def _parse_video_id(video_id):
|
||||||
"""This can be either a integer ID or an URL to the video on twitch."""
|
"""This can be either a integer ID or an URL to the video on twitch."""
|
||||||
if re.search(r"^\d+$", video_id):
|
if re.search(r"^\d+$", video_id):
|
||||||
return int(video_id)
|
return int(video_id)
|
||||||
@ -184,8 +189,33 @@ def parse_video_id(video_id):
|
|||||||
raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")
|
raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_files(playlist, start, end):
|
||||||
|
"""Extract files for download from playlist."""
|
||||||
|
vod_start = 0
|
||||||
|
for segment in playlist.segments:
|
||||||
|
vod_end = vod_start + segment.duration
|
||||||
|
|
||||||
|
# `vod_end > start` is used here becuase it's better to download a bit
|
||||||
|
# more than a bit less, similar for the end condition
|
||||||
|
start_condition = not start or vod_end > start
|
||||||
|
end_condition = not end or vod_start < end
|
||||||
|
|
||||||
|
if start_condition and end_condition:
|
||||||
|
yield segment.uri
|
||||||
|
|
||||||
|
vod_start = vod_end
|
||||||
|
|
||||||
|
|
||||||
|
def _crete_temp_dir(base_uri):
|
||||||
|
"""Create a temp dir to store downloads if it doesn't exist."""
|
||||||
|
path = urlparse(base_uri).path
|
||||||
|
directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
|
||||||
|
pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
|
||||||
|
return directory
|
||||||
|
|
||||||
|
|
||||||
def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
|
def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
|
||||||
video_id = parse_video_id(video_id)
|
video_id = _parse_video_id(video_id)
|
||||||
|
|
||||||
if start and end and end <= start:
|
if start and end and end <= start:
|
||||||
raise ConsoleError("End time must be greater than start time")
|
raise ConsoleError("End time must be greater than start time")
|
||||||
@ -199,33 +229,34 @@ def download(video_id, max_workers, format='mkv', start=None, end=None, keep=Fal
|
|||||||
print_out("Fetching access token...")
|
print_out("Fetching access token...")
|
||||||
access_token = twitch.get_access_token(video_id)
|
access_token = twitch.get_access_token(video_id)
|
||||||
|
|
||||||
|
# TODO: save playlists for debugging purposes
|
||||||
|
|
||||||
print_out("Fetching playlists...")
|
print_out("Fetching playlists...")
|
||||||
playlists = twitch.get_playlists(video_id, access_token)
|
playlists = twitch.get_playlists(video_id, access_token)
|
||||||
quality, playlist_url = _select_quality(playlists)
|
playlists = m3u8.loads(playlists)
|
||||||
|
selected = _select_quality(playlists.playlists)
|
||||||
|
|
||||||
print_out("\nFetching playlist...")
|
print_out("\nFetching playlist...")
|
||||||
base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)
|
response = requests.get(selected.uri)
|
||||||
|
response.raise_for_status()
|
||||||
|
playlist = m3u8.loads(response.text)
|
||||||
|
|
||||||
if not filenames:
|
base_uri = re.sub("/[^/]+$", "/", selected.uri)
|
||||||
raise ConsoleError("No vods matched, check your start and end times")
|
target_dir = _crete_temp_dir(base_uri)
|
||||||
|
filenames = list(_get_files(playlist, start, end))
|
||||||
|
|
||||||
# Create a temp dir to store downloads if it doesn't exist
|
print_out("\nDownloading {} VODs using {} workers to {}".format(
|
||||||
directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id, quality)
|
len(filenames), max_workers, target_dir))
|
||||||
pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
|
_download_files(base_uri, target_dir, filenames, max_workers)
|
||||||
print_out("Download dir: {}".format(directory))
|
|
||||||
|
|
||||||
print_out("Downloading {} VODs using {} workers...".format(len(filenames), max_workers))
|
|
||||||
paths = _download_files(base_url, directory, filenames, max_workers)
|
|
||||||
|
|
||||||
print_out("\n\nJoining files...")
|
print_out("\n\nJoining files...")
|
||||||
target = _video_target_filename(video, format)
|
target = _video_target_filename(video, format)
|
||||||
_join_vods(directory, paths, target)
|
_join_vods(target_dir, filenames, target)
|
||||||
|
|
||||||
if keep:
|
if keep:
|
||||||
print_out("\nTemporary files not deleted: {}".format(directory))
|
print_out("\nTemporary files not deleted: {}".format(target_dir))
|
||||||
else:
|
else:
|
||||||
print_out("\nDeleting vods...")
|
print_out("\nDeleting temporary files...")
|
||||||
for path in paths:
|
shutil.rmtree(target_dir)
|
||||||
os.unlink(path)
|
|
||||||
|
|
||||||
print_out("Downloaded: {}".format(target))
|
print_out("Downloaded: {}".format(target))
|
||||||
|
@ -1,64 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from collections import OrderedDict
|
|
||||||
from datetime import timedelta
|
|
||||||
from twitchdl.exceptions import ConsoleError
|
|
||||||
|
|
||||||
|
|
||||||
def parse_playlists(data):
|
|
||||||
media_pattern = re.compile(r'^#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="(?P<group>\w+)",NAME="(?P<name>\w+)"')
|
|
||||||
|
|
||||||
playlists = OrderedDict()
|
|
||||||
n = 1
|
|
||||||
name = None
|
|
||||||
for line in data.split():
|
|
||||||
match = re.match(media_pattern, line)
|
|
||||||
if match:
|
|
||||||
name = match.group('name')
|
|
||||||
elif line.startswith('http'):
|
|
||||||
playlists[n] = (name, line)
|
|
||||||
n += 1
|
|
||||||
|
|
||||||
return playlists
|
|
||||||
|
|
||||||
|
|
||||||
def _get_files(playlist, start, end):
|
|
||||||
matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
|
|
||||||
vod_start = 0
|
|
||||||
for m in matches:
|
|
||||||
filename = m[2]
|
|
||||||
vod_duration = int(m[0])
|
|
||||||
vod_end = vod_start + vod_duration
|
|
||||||
|
|
||||||
# `vod_end > start` is used here becuase it's better to download a bit
|
|
||||||
# more than a bit less, similar for the end condition
|
|
||||||
start_condition = not start or vod_end > start
|
|
||||||
end_condition = not end or vod_start < end
|
|
||||||
|
|
||||||
if start_condition and end_condition:
|
|
||||||
yield filename
|
|
||||||
|
|
||||||
vod_start = vod_end
|
|
||||||
|
|
||||||
|
|
||||||
def parse_playlist(url, playlist, start, end):
|
|
||||||
base_url = re.sub("/[^/]+$", "/{}", url)
|
|
||||||
|
|
||||||
match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
|
|
||||||
total_seconds = int(match.group(1))
|
|
||||||
|
|
||||||
# Now that video duration is known, validate start and end max values
|
|
||||||
if start and start > total_seconds:
|
|
||||||
raise ConsoleError("Start time {} greater than video duration {}".format(
|
|
||||||
timedelta(seconds=start),
|
|
||||||
timedelta(seconds=total_seconds)
|
|
||||||
))
|
|
||||||
|
|
||||||
if end and end > total_seconds:
|
|
||||||
raise ConsoleError("End time {} greater than video duration {}".format(
|
|
||||||
timedelta(seconds=end),
|
|
||||||
timedelta(seconds=total_seconds)
|
|
||||||
))
|
|
||||||
|
|
||||||
files = list(_get_files(playlist, start, end))
|
|
||||||
return base_url, files
|
|
@ -6,7 +6,6 @@ import requests
|
|||||||
|
|
||||||
from twitchdl import CLIENT_ID
|
from twitchdl import CLIENT_ID
|
||||||
from twitchdl.exceptions import ConsoleError
|
from twitchdl.exceptions import ConsoleError
|
||||||
from twitchdl.parse import parse_playlists, parse_playlist
|
|
||||||
|
|
||||||
|
|
||||||
def authenticated_get(url, params={}, headers={}):
|
def authenticated_get(url, params={}, headers={}):
|
||||||
@ -73,6 +72,9 @@ def get_access_token(video_id):
|
|||||||
|
|
||||||
|
|
||||||
def get_playlists(video_id, access_token):
|
def get_playlists(video_id, access_token):
|
||||||
|
"""
|
||||||
|
For a given video return a playlist which contains possible video qualities.
|
||||||
|
"""
|
||||||
url = "http://usher.twitch.tv/vod/{}".format(video_id)
|
url = "http://usher.twitch.tv/vod/{}".format(video_id)
|
||||||
|
|
||||||
response = requests.get(url, params={
|
response = requests.get(url, params={
|
||||||
@ -82,15 +84,4 @@ def get_playlists(video_id, access_token):
|
|||||||
"player": "twitchweb",
|
"player": "twitchweb",
|
||||||
})
|
})
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
return response.content.decode('utf-8')
|
||||||
data = response.content.decode('utf-8')
|
|
||||||
|
|
||||||
return parse_playlists(data)
|
|
||||||
|
|
||||||
|
|
||||||
def get_playlist_urls(url, start, end):
|
|
||||||
response = requests.get(url)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
data = response.content.decode('utf-8')
|
|
||||||
return parse_playlist(url, data, start, end)
|
|
||||||
|
Reference in New Issue
Block a user