cctv.py (6903B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 float_or_none, 10 try_get, 11 unified_timestamp, 12 ) 13 14 15 class CCTVIE(InfoExtractor): 16 IE_DESC = '央视网' 17 _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' 18 _TESTS = [{ 19 # fo.addVariable("videoCenterId","id") 20 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', 21 'md5': 'd61ec00a493e09da810bf406a078f691', 22 'info_dict': { 23 'id': '5ecdbeab623f4973b40ff25f18b174e8', 24 'ext': 'mp4', 25 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', 26 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', 27 'duration': 98, 28 'uploader': 'songjunjie', 29 'timestamp': 1455279956, 30 'upload_date': '20160212', 31 }, 32 }, { 33 # var guid = "id" 34 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', 35 'info_dict': { 36 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', 37 'ext': 'mp4', 38 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', 39 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', 40 'duration': 37, 41 'uploader': 'shujun', 42 'timestamp': 1454677291, 43 'upload_date': '20160205', 44 }, 45 'params': { 46 'skip_download': True, 47 }, 48 }, { 49 # changePlayer('id') 50 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', 51 'info_dict': { 52 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', 53 'ext': 'mp4', 54 'title': 'NHnews008 ANNUAL POLITICAL SEASON', 55 'description': 'Four Comprehensives', 56 'duration': 60, 57 'uploader': 'zhangyunlei', 58 'timestamp': 1425385521, 59 'upload_date': '20150303', 60 }, 61 'params': { 62 'skip_download': True, 63 }, 64 }, { 65 # loadvideo('id') 66 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml', 67 'info_dict': { 68 'id': 'b15f009ff45c43968b9af583fc2e04b2', 69 'ext': 'mp4', 70 'title': 'Путь,усыпанный космеями Серия 1', 71 'description': 'Путь, усыпанный космеями', 72 'duration': 2645, 73 'uploader': 'renxue', 74 'timestamp': 1477479241, 75 'upload_date': '20161026', 76 }, 77 'params': { 78 'skip_download': True, 79 }, 80 }, { 81 # var initMyAray = 'id' 82 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml', 83 'info_dict': { 84 'id': 'a194cfa7f18c426b823d876668325946', 85 'ext': 'mp4', 86 'title': '小泽征尔音乐塾 音乐梦想无国界', 87 'duration': 2173, 88 'timestamp': 1369248264, 89 'upload_date': '20130522', 90 }, 91 'params': { 92 'skip_download': True, 93 }, 94 }, { 95 # var ids = ["id"] 96 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', 97 'info_dict': { 98 'id': 'a8606119a4884588a79d81c02abecc16', 99 'ext': 'mp3', 100 'title': '来自维也纳的新年贺礼', 101 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7', 102 'duration': 1578, 103 'uploader': 'djy', 104 'timestamp': 1482942419, 105 'upload_date': '20161228', 106 }, 107 'params': { 108 'skip_download': True, 109 }, 110 'expected_warnings': ['Failed to download m3u8 information'], 111 }, { 112 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', 113 'only_matching': True, 114 }, { 115 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', 116 'only_matching': True, 117 }, { 118 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', 119 'only_matching': True, 120 }, { 121 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', 122 'only_matching': True, 123 }, { 124 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', 125 'only_matching': True, 126 }] 127 128 def _real_extract(self, url): 129 video_id = self._match_id(url) 130 webpage = self._download_webpage(url, video_id) 131 132 video_id = self._search_regex( 133 [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', 134 r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', 135 r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', 136 r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)', 137 r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)', 138 r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'], 139 webpage, 'video id') 140 141 data = self._download_json( 142 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id, 143 query={ 144 'pid': video_id, 145 'url': url, 146 'idl': 32, 147 'idlr': 32, 148 'modifyed': 'false', 149 }) 150 151 title = data['title'] 152 153 formats = [] 154 155 video = data.get('video') 156 if isinstance(video, dict): 157 for quality, chapters_key in enumerate(('lowChapters', 'chapters')): 158 video_url = try_get( 159 video, lambda x: x[chapters_key][0]['url'], compat_str) 160 if video_url: 161 formats.append({ 162 'url': video_url, 163 'format_id': 'http', 164 'quality': quality, 165 'preference': -1, 166 }) 167 168 hls_url = try_get(data, lambda x: x['hls_url'], compat_str) 169 if hls_url: 170 hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) 171 formats.extend(self._extract_m3u8_formats( 172 hls_url, video_id, 'mp4', entry_protocol='m3u8_native', 173 m3u8_id='hls', fatal=False)) 174 175 self._sort_formats(formats) 176 177 uploader = data.get('editer_name') 178 description = self._html_search_meta( 179 'description', webpage, default=None) 180 timestamp = unified_timestamp(data.get('f_pgmtime')) 181 duration = float_or_none(try_get(video, lambda x: x['totalLength'])) 182 183 return { 184 'id': video_id, 185 'title': title, 186 'description': description, 187 'uploader': uploader, 188 'timestamp': timestamp, 189 'duration': duration, 190 'formats': formats, 191 }