youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 47f53ad95884d92c8e5be6ba5c35e2955b941b0c
parent c73cdd800f0dc7b465ac0b36d338875bb80c23aa
Author: Sergey M <dstftw@gmail.com>
Date:   Thu,  6 Aug 2015 00:04:40 +0600

Merge pull request #6463 from jaimeMF/format_spec_fix_dashes

[YoutubeDL] format spec: correctly handle dashes and other unused operators
Diffstat:
Mtest/test_YoutubeDL.py | 6++++++
Myoutube_dl/YoutubeDL.py | 33++++++++++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py @@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, @@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '35') + ydl = YDL({'format': 'example-with-dashes'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'example-with-dashes') + def test_format_selection_audio(self): formats = [ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py @@ -933,6 +933,37 @@ class YoutubeDL(object): else: filter_parts.append(string) + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the sourrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None @@ -1111,7 +1142,7 @@ class YoutubeDL(object): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(compat_tokenize_tokenize(stream.readline)) + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))