From 2171a9e08e23fe6d0483eb40e8a9026d1057a2fa Mon Sep 17 00:00:00 2001 From: Ivan Habunek Date: Wed, 10 Jun 2020 10:54:28 +0200 Subject: [PATCH] Allow unicode values in slugs Otherwise non-ascii characters get stripped which is not good for e.g. titles in cyrillic script. --- twitchdl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/twitchdl/utils.py b/twitchdl/utils.py index a7724a8..5e1efa6 100644 --- a/twitchdl/utils.py +++ b/twitchdl/utils.py @@ -58,6 +58,6 @@ def slugify(value): re_pattern = re.compile(r'[^\w\s-]', flags=re.U) re_spaces = re.compile(r'[-\s]+', flags=re.U) value = str(value) - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') + value = unicodedata.normalize('NFKC', value) value = re_pattern.sub('', value).strip().lower() - return re_spaces.sub('-', value) + return re_spaces.sub('_', value)