commit 9d6ac71c27b1dfb662c795ef598dbfd0286682da
parent 84f085d4bdb66ee025fb337bcd571eab7469da97
Author: Sergey M․ <dstftw@gmail.com>
Date: Fri, 29 Dec 2017 23:14:15 +0700
[extractor/common] Fix extraction of DASH formats with the same representation id (closes #15111)
Diffstat:
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
@@ -493,10 +493,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/13919
+ # Also tests duplicate representation ids, see
+ # https://github.com/rg3/youtube-dl/issues/15111
'float_duration',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'm4a',
+ 'format_id': '318597',
+ 'format_note': 'DASH audio',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 61.587,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '318597',
'format_note': 'DASH video',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -2007,16 +2007,14 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
- try:
- existing_format = next(
- fo for fo in formats
- if fo['format_id'] == representation_id)
- except StopIteration:
- full_info = formats_dict.get(representation_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- existing_format.update(f)
+ # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+ # is not necessarily unique within a Period thus formats with
+ # the same `format_id` are quite possible. There are numerous examples
+ # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+ # https://github.com/rg3/youtube-dl/issues/13919)
+ full_info = formats_dict.get(representation_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats