twitch-dl/twitchdl/parse.py
2019-06-06 11:06:33 +02:00

65 lines
1.9 KiB
Python

import re
from collections import OrderedDict
from datetime import timedelta
from twitchdl.exceptions import ConsoleError
def parse_playlists(data):
media_pattern = re.compile(r'^#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="(?P<group>\w+)",NAME="(?P<name>\w+)"')
playlists = OrderedDict()
n = 1
name = None
for line in data.split():
match = re.match(media_pattern, line)
if match:
name = match.group('name')
elif line.startswith('http'):
playlists[n] = (name, line)
n += 1
return playlists
def _get_files(playlist, start, end):
matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
vod_start = 0
for m in matches:
filename = m[2]
vod_duration = int(m[0])
vod_end = vod_start + vod_duration
# `vod_end > start` is used here becuase it's better to download a bit
# more than a bit less, similar for the end condition
start_condition = not start or vod_end > start
end_condition = not end or vod_start < end
if start_condition and end_condition:
yield filename
vod_start = vod_end
def parse_playlist(url, playlist, start, end):
base_url = re.sub("/[^/]+$", "/{}", url)
match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
total_seconds = int(match.group(1))
# Now that video duration is known, validate start and end max values
if start and start > total_seconds:
raise ConsoleError("Start time {} greater than video duration {}".format(
timedelta(seconds=start),
timedelta(seconds=total_seconds)
))
if end and end > total_seconds:
raise ConsoleError("End time {} greater than video duration {}".format(
timedelta(seconds=end),
timedelta(seconds=total_seconds)
))
files = list(_get_files(playlist, start, end))
return base_url, files