The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
+ # Check for mandatory fields that are automatically set by YoutubeDL
+ for key in ['webpage_url', 'extractor']:
+ self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally:
try_rm_tcs_files()
'_type': 'compat_list',
'entries': ie_result,
}
- if 'extractor' not in ie_result:
- ie_result['extractor'] = ie.IE_NAME
+ self.add_extra_info(ie_result,
+ {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url
+ })
return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
'playlist': playlist,
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
}
entry_result = self.process_ie_result(entry,
download=download,
elif result_type == 'compat_list':
def _fixup(r):
self.add_extra_info(r,
- {'extractor': ie_result['extractor']})
+ {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ })
return r
ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info)
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
+ webpage_url: The url to the video webpage, if given to youtube-dl it
+ should allow to get the same result again. (It will be set
+ by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings.
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+ _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TESTS = [
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
- if not mobj.group('proto'):
- url = 'https://' + url
- elif mobj.group('pro'):
+ if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
- elif mobj.group('direct_link'):
+ else:
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
if len(formats) == 0:
raise ExtractorError(u'No known codec found')
- return [{
+ return {
'id': video_id,
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail,
'description': video_description,
'formats': formats,
- }]
+ 'webpage_url': url,
+ }
class VimeoChannelIE(InfoExtractor):
'subtitles': video_subtitles,
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
- 'annotations': video_annotations
+ 'annotations': video_annotations,
+ 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
})
return results