ruv.py (3359B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 determine_ext, 7 unified_timestamp, 8 ) 9 10 11 class RuvIE(InfoExtractor): 12 _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)' 13 _TESTS = [{ 14 # m3u8 15 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', 16 'md5': '66347652f4e13e71936817102acc1724', 17 'info_dict': { 18 'id': '1144499', 19 'display_id': 'fh-valur/20170516', 20 'ext': 'mp4', 21 'title': 'FH - Valur', 22 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', 23 'timestamp': 1494963600, 24 'upload_date': '20170516', 25 }, 26 }, { 27 # mp3 28 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', 29 'md5': '395ea250c8a13e5fdb39d4670ef85378', 30 'info_dict': { 31 'id': '1153630', 32 'display_id': 'morgunutvarpid/20170619', 33 'ext': 'mp3', 34 'title': 'Morgunútvarpið', 35 'description': 'md5:a4cf1202c0a1645ca096b06525915418', 36 'timestamp': 1497855000, 37 'upload_date': '20170619', 38 }, 39 }, { 40 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', 41 'only_matching': True, 42 }, { 43 'url': 'http://www.ruv.is/node/1151854', 44 'only_matching': True, 45 }, { 46 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', 47 'only_matching': True, 48 }, { 49 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', 50 'only_matching': True, 51 }] 52 53 def _real_extract(self, url): 54 display_id = self._match_id(url) 55 56 webpage = self._download_webpage(url, display_id) 57 58 title = self._og_search_title(webpage) 59 60 FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1' 61 62 media_url = self._html_search_regex( 63 FIELD_RE % 'src', webpage, 'video URL', group='url') 64 65 video_id = self._search_regex( 66 r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', 67 webpage, 'video id', default=display_id) 68 69 ext = determine_ext(media_url) 70 71 if ext == 'm3u8': 72 formats = self._extract_m3u8_formats( 73 media_url, video_id, 'mp4', entry_protocol='m3u8_native', 74 m3u8_id='hls') 75 elif ext == 'mp3': 76 formats = [{ 77 'format_id': 'mp3', 78 'url': media_url, 79 'vcodec': 'none', 80 }] 81 else: 82 formats = [{ 83 'url': media_url, 84 }] 85 86 description = self._og_search_description(webpage, default=None) 87 thumbnail = self._og_search_thumbnail( 88 webpage, default=None) or self._search_regex( 89 FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) 90 timestamp = unified_timestamp(self._html_search_meta( 91 'article:published_time', webpage, 'timestamp', fatal=False)) 92 93 return { 94 'id': video_id, 95 'display_id': display_id, 96 'title': title, 97 'description': description, 98 'thumbnail': thumbnail, 99 'timestamp': timestamp, 100 'formats': formats, 101 }