Compare commits

..

9 Commits
1.3.1 ... 1.5.0

Author SHA1 Message Date
cb00accd6a Better long description 2020-04-10 16:42:35 +02:00
64157c1ef6 Bump version 2020-04-10 16:34:37 +02:00
6a8da3b01b Don't print errors messages when retrying
Only die if all retries fail.
2020-04-10 16:22:15 +02:00
e29d42e9ef Use Twitch's client ID
Fetching access token with own client ID no longer works.

Everybody else in the world seems to be doing it:
https://github.com/search?p=2&q=kimne78kx3ncx6brgo4mv6wki5h1ko&type=Code
2020-04-10 16:21:10 +02:00
100aa53b84 Bump version 2019-08-23 13:08:57 +02:00
e384f26444 Save playlists to temp dir for debugging 2019-08-23 13:08:35 +02:00
000754af8c Use m3u8 lib to parse playlists 2019-08-23 12:36:05 +02:00
6813bb51b4 Add option not to delete downloaded VODs 2019-08-23 10:16:49 +02:00
34b0592cf3 Fix usage of deprecated v3 API
related #8
2019-08-23 09:03:33 +02:00
8 changed files with 137 additions and 116 deletions

View File

@ -1,6 +1,19 @@
Twitch Downloader change log
============================
1.5.0 (2020-04-10)
------------------
* Fix video downloads after Twitch deprecated access token access
* Don't print errors when retrying download, only if all fails
1.4.0 (2019-08-23)
------------------
* Fix usage of deprecated v3 API
* Use m3u8 lib for parsing playlists
* Add `--keep` option not preserve downloaded VODs
1.3.1 (2019-08-13)
------------------

View File

@ -2,12 +2,18 @@
from setuptools import setup
long_description = """
Quickly download videos from twitch.tv.
Works simliarly to youtube-dl but downloads multiple VODs in parallel which
makes it faster.
"""
setup(
name='twitch-dl',
version='1.3.1',
version='1.5.0',
description='Twitch downloader',
long_description="Quickly download videos from Twitch",
long_description=long_description.strip(),
author='Ivan Habunek',
author_email='ivan@habunek.com',
url='https://github.com/ihabunek/twitch-dl/',
@ -24,6 +30,7 @@ setup(
packages=['twitchdl'],
python_requires='>=3.5',
install_requires=[
"m3u8>=0.3.12,<0.4",
"requests>=2.13,<3.0",
],
entry_points={

View File

@ -1,3 +1,3 @@
__version__ = "1.3.1"
__version__ = "1.5.0"
CLIENT_ID = "miwy5zk23vh2he94san0bzj5ks1r0p"
CLIENT_ID = "kimne78kx3ncx6brgo4mv6wki5h1ko"

View File

@ -1,12 +1,16 @@
import m3u8
import os
import pathlib
import re
import requests
import shutil
import subprocess
import tempfile
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from functools import partial
from urllib.parse import urlparse
from twitchdl import twitch
from twitchdl.download import download_file
@ -73,8 +77,13 @@ def _print_video(video):
def videos(channel_name, limit, offset, sort, **kwargs):
videos = twitch.get_channel_videos(channel_name, limit, offset, sort)
print_out("Looking up user...")
user = twitch.get_user(channel_name)
if not user:
raise ConsoleError("User {} not found.".format(channel_name))
print_out("Loading videos...")
videos = twitch.get_channel_videos(user["id"], limit, offset, sort)
count = len(videos['videos'])
if not count:
print_out("No videos found")
@ -91,13 +100,14 @@ def videos(channel_name, limit, offset, sort, **kwargs):
def _select_quality(playlists):
print_out("\nAvailable qualities:")
for no, v in playlists.items():
print_out("{}) {}".format(no, v[0]))
for n, p in enumerate(playlists):
name = p.media[0].name if p.media else ""
resolution = "x".join(str(r) for r in p.stream_info.resolution)
print_out("{}) {} [{}]".format(n + 1, name, resolution))
keys = list(playlists.keys())
no = read_int("Choose quality", min=min(keys), max=max(keys), default=keys[0])
no = read_int("Choose quality", min=1, max=len(playlists) + 1, default=1)
return playlists[no]
return playlists[no - 1]
def _print_progress(futures):
@ -122,7 +132,7 @@ def _print_progress(futures):
def _download_files(base_url, directory, filenames, max_workers):
urls = [base_url.format(f) for f in filenames]
urls = [base_url + f for f in filenames]
paths = ["/".join([directory, f]) for f in filenames]
partials = (partial(download_file, url, path) for url, path in zip(urls, paths))
@ -167,7 +177,7 @@ def _video_target_filename(video, format):
return name + "." + format
def parse_video_id(video_id):
def _parse_video_id(video_id):
"""This can be either a integer ID or an URL to the video on twitch."""
if re.search(r"^\d+$", video_id):
return int(video_id)
@ -179,8 +189,33 @@ def parse_video_id(video_id):
raise ConsoleError("Invalid video ID given, expected integer ID or Twitch URL")
def download(video_id, max_workers, format='mkv', start=None, end=None, **kwargs):
video_id = parse_video_id(video_id)
def _get_files(playlist, start, end):
"""Extract files for download from playlist."""
vod_start = 0
for segment in playlist.segments:
vod_end = vod_start + segment.duration
# `vod_end > start` is used here becuase it's better to download a bit
# more than a bit less, similar for the end condition
start_condition = not start or vod_end > start
end_condition = not end or vod_start < end
if start_condition and end_condition:
yield segment.uri
vod_start = vod_end
def _crete_temp_dir(base_uri):
"""Create a temp dir to store downloads if it doesn't exist."""
path = urlparse(base_uri).path
directory = '{}/twitch-dl{}'.format(tempfile.gettempdir(), path)
pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
return directory
def download(video_id, max_workers, format='mkv', start=None, end=None, keep=False, **kwargs):
video_id = _parse_video_id(video_id)
if start and end and end <= start:
raise ConsoleError("End time must be greater than start time")
@ -196,28 +231,36 @@ def download(video_id, max_workers, format='mkv', start=None, end=None, **kwargs
print_out("Fetching playlists...")
playlists = twitch.get_playlists(video_id, access_token)
quality, playlist_url = _select_quality(playlists)
parsed = m3u8.loads(playlists)
selected = _select_quality(parsed.playlists)
print_out("\nFetching playlist...")
base_url, filenames = twitch.get_playlist_urls(playlist_url, start, end)
response = requests.get(selected.uri)
response.raise_for_status()
playlist = m3u8.loads(response.text)
if not filenames:
raise ConsoleError("No vods matched, check your start and end times")
base_uri = re.sub("/[^/]+$", "/", selected.uri)
target_dir = _crete_temp_dir(base_uri)
filenames = list(_get_files(playlist, start, end))
# Create a temp dir to store downloads if it doesn't exist
directory = '{}/twitch-dl/{}/{}'.format(tempfile.gettempdir(), video_id, quality)
pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
print_out("Download dir: {}".format(directory))
# Save playlists for debugging purposes
with open(target_dir + "playlists.m3u8", "w") as f:
f.write(playlists)
with open(target_dir + "playlist.m3u8", "w") as f:
f.write(response.text)
print_out("Downloading {} VODs using {} workers...".format(len(filenames), max_workers))
paths = _download_files(base_url, directory, filenames, max_workers)
print_out("\nDownloading {} VODs using {} workers to {}".format(
len(filenames), max_workers, target_dir))
_download_files(base_uri, target_dir, filenames, max_workers)
print_out("\n\nJoining files...")
target = _video_target_filename(video, format)
_join_vods(directory, paths, target)
_join_vods(target_dir, filenames, target)
print_out("\nDeleting vods...")
for path in paths:
os.unlink(path)
if keep:
print_out("\nTemporary files not deleted: {}".format(target_dir))
else:
print_out("\nDeleting temporary files...")
shutil.rmtree(target_dir)
print_out("\nDownloaded: {}".format(target))
print_out("Downloaded: {}".format(target))

View File

@ -89,6 +89,11 @@ COMMANDS = [
"type": str,
"default": "mkv",
}),
(["-k", "--keep"], {
"help": "Don't delete downloaded VODs and playlists after merging.",
"action": "store_true",
"default": False,
}),
],
),
]

View File

@ -6,6 +6,7 @@ from requests.exceptions import RequestException
CHUNK_SIZE = 1024
CONNECT_TIMEOUT = 5
RETRY_COUNT = 5
class DownloadFailed(Exception):
@ -25,14 +26,14 @@ def _download(url, path):
return size
def download_file(url, path, retries=3):
def download_file(url, path, retries=RETRY_COUNT):
if os.path.exists(path):
return 0
for _ in range(retries):
try:
return _download(url, path)
except RequestException as e:
print("Download failed: {}".format(e))
except RequestException:
pass
raise DownloadFailed(":(")

View File

@ -1,64 +0,0 @@
import re
from collections import OrderedDict
from datetime import timedelta
from twitchdl.exceptions import ConsoleError
def parse_playlists(data):
media_pattern = re.compile(r'^#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="(?P<group>\w+)",NAME="(?P<name>\w+)"')
playlists = OrderedDict()
n = 1
name = None
for line in data.split():
match = re.match(media_pattern, line)
if match:
name = match.group('name')
elif line.startswith('http'):
playlists[n] = (name, line)
n += 1
return playlists
def _get_files(playlist, start, end):
matches = re.findall(r"#EXTINF:(\d+)(\.\d+)?,.*?\s+(\d+.ts)", playlist)
vod_start = 0
for m in matches:
filename = m[2]
vod_duration = int(m[0])
vod_end = vod_start + vod_duration
# `vod_end > start` is used here becuase it's better to download a bit
# more than a bit less, similar for the end condition
start_condition = not start or vod_end > start
end_condition = not end or vod_start < end
if start_condition and end_condition:
yield filename
vod_start = vod_end
def parse_playlist(url, playlist, start, end):
base_url = re.sub("/[^/]+$", "/{}", url)
match = re.search(r"#EXT-X-TWITCH-TOTAL-SECS:(\d+)(.\d+)?", playlist)
total_seconds = int(match.group(1))
# Now that video duration is known, validate start and end max values
if start and start > total_seconds:
raise ConsoleError("Start time {} greater than video duration {}".format(
timedelta(seconds=start),
timedelta(seconds=total_seconds)
))
if end and end > total_seconds:
raise ConsoleError("End time {} greater than video duration {}".format(
timedelta(seconds=end),
timedelta(seconds=total_seconds)
))
files = list(_get_files(playlist, start, end))
return base_url, files

View File

@ -1,12 +1,15 @@
"""
Twitch API access.
"""
import requests
from twitchdl import CLIENT_ID
from twitchdl.exceptions import ConsoleError
from twitchdl.parse import parse_playlists, parse_playlist
def authenticated_get(url, params={}):
headers = {'Client-ID': CLIENT_ID}
def authenticated_get(url, params={}, headers={}):
headers['Client-ID'] = CLIENT_ID
response = requests.get(url, params, headers=headers)
if response.status_code == 400:
@ -18,22 +21,43 @@ def authenticated_get(url, params={}):
return response
def kraken_get(url, params={}, headers={}):
"""
Add accept header required by kraken API v5.
see: https://discuss.dev.twitch.tv/t/change-in-access-to-deprecated-kraken-twitch-apis/22241
"""
headers["Accept"] = "application/vnd.twitchtv.v5+json"
return authenticated_get(url, params, headers)
def get_user(login):
"""
https://dev.twitch.tv/docs/api/reference/#get-users
"""
response = authenticated_get("https://api.twitch.tv/helix/users", {
"login": login
})
users = response.json()["data"]
return users[0] if users else None
def get_video(video_id):
"""
https://dev.twitch.tv/docs/v5/reference/videos#get-video
"""
url = "https://api.twitch.tv/kraken/videos/%d" % video_id
return authenticated_get(url).json()
return kraken_get(url).json()
def get_channel_videos(channel_name, limit, offset, sort):
def get_channel_videos(channel_id, limit, offset, sort):
"""
https://dev.twitch.tv/docs/v5/reference/channels#get-channel-videos
"""
url = "https://api.twitch.tv/kraken/channels/%s/videos" % channel_name
url = "https://api.twitch.tv/kraken/channels/{}/videos".format(channel_id)
return authenticated_get(url, {
return kraken_get(url, {
"broadcast_type": "archive",
"limit": limit,
"offset": offset,
@ -48,6 +72,9 @@ def get_access_token(video_id):
def get_playlists(video_id, access_token):
"""
For a given video return a playlist which contains possible video qualities.
"""
url = "http://usher.twitch.tv/vod/{}".format(video_id)
response = requests.get(url, params={
@ -57,15 +84,4 @@ def get_playlists(video_id, access_token):
"player": "twitchweb",
})
response.raise_for_status()
data = response.content.decode('utf-8')
return parse_playlists(data)
def get_playlist_urls(url, start, end):
response = requests.get(url)
response.raise_for_status()
data = response.content.decode('utf-8')
return parse_playlist(url, data, start, end)
return response.content.decode('utf-8')