newstube.py (3123B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import base64 5 import hashlib 6 7 from .common import InfoExtractor 8 from ..aes import aes_cbc_decrypt 9 from ..utils import ( 10 bytes_to_intlist, 11 int_or_none, 12 intlist_to_bytes, 13 parse_codecs, 14 parse_duration, 15 ) 16 17 18 class NewstubeIE(InfoExtractor): 19 _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' 20 _TEST = { 21 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 22 'md5': '9d10320ad473444352f72f746ccb8b8c', 23 'info_dict': { 24 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 25 'ext': 'mp4', 26 'title': 'Телеканал CNN переместил город Славянск в Крым', 27 'description': 'md5:419a8c9f03442bc0b0a794d689360335', 28 'duration': 31.05, 29 }, 30 } 31 32 def _real_extract(self, url): 33 video_id = self._match_id(url) 34 35 page = self._download_webpage(url, video_id) 36 title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True) 37 38 video_guid = self._html_search_regex( 39 r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', 40 page, 'video GUID') 41 42 enc_data = base64.b64decode(self._download_webpage( 43 'https://www.newstube.ru/embed/api/player/getsources2', 44 video_guid, query={ 45 'guid': video_guid, 46 'ff': 3, 47 })) 48 key = hashlib.pbkdf2_hmac( 49 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16] 50 dec_data = aes_cbc_decrypt( 51 bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key), 52 bytes_to_intlist(enc_data[16:32])) 53 sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid) 54 55 formats = [] 56 for source in sources: 57 source_url = source.get('Src') 58 if not source_url: 59 continue 60 height = int_or_none(source.get('Height')) 61 f = { 62 'format_id': 'http' + ('-%dp' % height if height else ''), 63 'url': source_url, 64 'width': int_or_none(source.get('Width')), 65 'height': height, 66 } 67 source_type = source.get('Type') 68 if source_type: 69 f.update(parse_codecs(self._search_regex( 70 r'codecs="([^"]+)"', source_type, 'codecs', fatal=False))) 71 formats.append(f) 72 73 self._check_formats(formats, video_guid) 74 self._sort_formats(formats) 75 76 return { 77 'id': video_guid, 78 'title': title, 79 'description': self._html_search_meta(['description', 'og:description'], page), 80 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page), 81 'duration': parse_duration(self._html_search_meta('duration', page)), 82 'formats': formats, 83 }