dbtv.py (1955B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 8 9 class DBTVIE(InfoExtractor): 10 _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})' 11 _TESTS = [{ 12 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/', 13 'md5': 'b8f850ba1860adbda668d367f9b77699', 14 'info_dict': { 15 'id': 'PynxJnNWChE', 16 'ext': 'mp4', 17 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 18 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 19 'thumbnail': r're:https?://.*\.jpg', 20 'upload_date': '20160916', 21 'duration': 69, 22 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 23 'uploader': 'Dagbladet', 24 }, 25 'add_ie': ['Youtube'] 26 }, { 27 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 28 'only_matching': True, 29 }, { 30 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 31 'only_matching': True, 32 }] 33 34 @staticmethod 35 def _extract_urls(webpage): 36 return [url for _, url in re.findall( 37 r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1', 38 webpage)] 39 40 def _real_extract(self, url): 41 display_id, video_id = re.match(self._VALID_URL, url).groups() 42 info = { 43 '_type': 'url_transparent', 44 'id': video_id, 45 'display_id': display_id, 46 } 47 if len(video_id) == 11: 48 info.update({ 49 'url': video_id, 50 'ie_key': 'Youtube', 51 }) 52 else: 53 info.update({ 54 'url': 'jwplatform:' + video_id, 55 'ie_key': 'JWPlatform', 56 }) 57 return info