tvnet.py (4942B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 int_or_none, 9 unescapeHTML, 10 url_or_none, 11 ) 12 13 14 class TVNetIE(InfoExtractor): 15 _VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)' 16 _TESTS = [{ 17 # video 18 'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h', 19 'md5': 'b4d7abe0252c9b47774760b7519c7558', 20 'info_dict': { 21 'id': '109788', 22 'ext': 'mp4', 23 'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang', 24 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', 25 'is_live': False, 26 'view_count': int, 27 }, 28 }, { 29 # audio 30 'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi', 31 'md5': 'b5875ce9b0a2eecde029216d0e6db2ae', 32 'info_dict': { 33 'id': '27017', 34 'ext': 'm4a', 35 'title': 'VOV1 - Bản tin chiều (10/06/2018)', 36 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', 37 'is_live': False, 38 }, 39 }, { 40 'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705', 41 'info_dict': { 42 'id': '129999', 43 'ext': 'mp4', 44 'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)', 45 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', 46 'is_live': False, 47 }, 48 'params': { 49 'skip_download': True, 50 }, 51 }, { 52 # live stream 53 'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1', 54 'info_dict': { 55 'id': '1011', 56 'ext': 'mp4', 57 'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 58 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', 59 'is_live': True, 60 }, 61 'params': { 62 'skip_download': True, 63 }, 64 }, { 65 # radio live stream 66 'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014', 67 'info_dict': { 68 'id': '1014', 69 'ext': 'm4a', 70 'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 71 'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)', 72 'is_live': True, 73 }, 74 'params': { 75 'skip_download': True, 76 }, 77 }, { 78 'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh', 79 'only_matching': True, 80 }] 81 82 def _real_extract(self, url): 83 video_id = self._match_id(url) 84 85 webpage = self._download_webpage(url, video_id) 86 87 title = self._og_search_title( 88 webpage, default=None) or self._html_search_meta( 89 'title', webpage, default=None) or self._search_regex( 90 r'<title>([^<]+)<', webpage, 'title') 91 title = re.sub(r'\s*-\s*TV Net\s*$', '', title) 92 93 if '/video/' in url or '/radio/' in url: 94 is_live = False 95 elif '/kenh-truyen-hinh/' in url: 96 is_live = True 97 else: 98 is_live = None 99 100 data_file = unescapeHTML(self._search_regex( 101 r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, 102 'data file', group='url')) 103 104 stream_urls = set() 105 formats = [] 106 for stream in self._download_json(data_file, video_id): 107 if not isinstance(stream, dict): 108 continue 109 stream_url = url_or_none(stream.get('url')) 110 if stream_url in stream_urls or not stream_url: 111 continue 112 stream_urls.add(stream_url) 113 formats.extend(self._extract_m3u8_formats( 114 stream_url, video_id, 'mp4', 115 entry_protocol='m3u8' if is_live else 'm3u8_native', 116 m3u8_id='hls', fatal=False)) 117 self._sort_formats(formats) 118 119 # better support for radio streams 120 if title.startswith('VOV'): 121 for f in formats: 122 f.update({ 123 'ext': 'm4a', 124 'vcodec': 'none', 125 }) 126 127 thumbnail = self._og_search_thumbnail( 128 webpage, default=None) or unescapeHTML( 129 self._search_regex( 130 r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, 131 'thumbnail', default=None, group='url')) 132 133 if is_live: 134 title = self._live_title(title) 135 136 view_count = int_or_none(self._search_regex( 137 r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', 138 webpage, 'view count', default=None)) 139 140 return { 141 'id': video_id, 142 'title': title, 143 'thumbnail': thumbnail, 144 'is_live': is_live, 145 'view_count': view_count, 146 'formats': formats, 147 }