mgtv.py (3548B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import base64 5 import time 6 import uuid 7 8 from .common import InfoExtractor 9 from ..compat import ( 10 compat_HTTPError, 11 compat_str, 12 ) 13 from ..utils import ( 14 ExtractorError, 15 int_or_none, 16 ) 17 18 19 class MGTVIE(InfoExtractor): 20 _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' 21 IE_DESC = '芒果TV' 22 23 _TESTS = [{ 24 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', 25 'info_dict': { 26 'id': '3116640', 27 'ext': 'mp4', 28 'title': '我是歌手 第四季', 29 'description': '我是歌手第四季双年巅峰会', 30 'duration': 7461, 31 'thumbnail': r're:^https?://.*\.jpg$', 32 }, 33 }, { 34 'url': 'http://www.mgtv.com/b/301817/3826653.html', 35 'only_matching': True, 36 }, { 37 'url': 'https://w.mgtv.com/b/301817/3826653.html', 38 'only_matching': True, 39 }] 40 41 def _real_extract(self, url): 42 video_id = self._match_id(url) 43 tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1] 44 try: 45 api_data = self._download_json( 46 'https://pcweb.api.mgtv.com/player/video', video_id, query={ 47 'tk2': tk2, 48 'video_id': video_id, 49 }, headers=self.geo_verification_headers())['data'] 50 except ExtractorError as e: 51 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 52 error = self._parse_json(e.cause.read().decode(), None) 53 if error.get('code') == 40005: 54 self.raise_geo_restricted(countries=self._GEO_COUNTRIES) 55 raise ExtractorError(error['msg'], expected=True) 56 raise 57 info = api_data['info'] 58 title = info['title'].strip() 59 stream_data = self._download_json( 60 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={ 61 'pm2': api_data['atc']['pm2'], 62 'tk2': tk2, 63 'video_id': video_id, 64 }, headers=self.geo_verification_headers())['data'] 65 stream_domain = stream_data['stream_domain'][0] 66 67 formats = [] 68 for idx, stream in enumerate(stream_data['stream']): 69 stream_path = stream.get('url') 70 if not stream_path: 71 continue 72 format_data = self._download_json( 73 stream_domain + stream_path, video_id, 74 note='Download video info for format #%d' % idx) 75 format_url = format_data.get('info') 76 if not format_url: 77 continue 78 tbr = int_or_none(stream.get('filebitrate') or self._search_regex( 79 r'_(\d+)_mp4/', format_url, 'tbr', default=None)) 80 formats.append({ 81 'format_id': compat_str(tbr or idx), 82 'url': format_url, 83 'ext': 'mp4', 84 'tbr': tbr, 85 'protocol': 'm3u8_native', 86 'http_headers': { 87 'Referer': url, 88 }, 89 'format_note': stream.get('name'), 90 }) 91 self._sort_formats(formats) 92 93 return { 94 'id': video_id, 95 'title': title, 96 'formats': formats, 97 'description': info.get('desc'), 98 'duration': int_or_none(info.get('duration')), 99 'thumbnail': info.get('thumb'), 100 }