yandexdisk.py (5166B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import json 5 import re 6 7 from .common import InfoExtractor 8 from ..utils import ( 9 determine_ext, 10 float_or_none, 11 int_or_none, 12 mimetype2ext, 13 try_get, 14 urljoin, 15 ) 16 17 18 class YandexDiskIE(InfoExtractor): 19 _VALID_URL = r'''(?x)https?:// 20 (?P<domain> 21 yadi\.sk| 22 disk\.yandex\. 23 (?: 24 az| 25 by| 26 co(?:m(?:\.(?:am|ge|tr))?|\.il)| 27 ee| 28 fr| 29 k[gz]| 30 l[tv]| 31 md| 32 t[jm]| 33 u[az]| 34 ru 35 ) 36 )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)''' 37 38 _TESTS = [{ 39 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y', 40 'md5': 'a4a8d52958c8fddcf9845935070402ae', 41 'info_dict': { 42 'id': 'VdOeDou8eZs6Y', 43 'ext': 'mp4', 44 'title': '4.mp4', 45 'duration': 168.6, 46 'uploader': 'y.botova', 47 'uploader_id': '300043621', 48 'view_count': int, 49 }, 50 'expected_warnings': ['Unable to download JSON metadata'], 51 }, { 52 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce', 53 'only_matching': True, 54 }, { 55 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D', 56 'only_matching': True, 57 }] 58 59 def _real_extract(self, url): 60 domain, video_id = re.match(self._VALID_URL, url).groups() 61 62 webpage = self._download_webpage(url, video_id) 63 store = self._parse_json(self._search_regex( 64 r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>', 65 webpage, 'store'), video_id) 66 resource = store['resources'][store['rootResourceId']] 67 68 title = resource['name'] 69 meta = resource.get('meta') or {} 70 71 public_url = meta.get('short_url') 72 if public_url: 73 video_id = self._match_id(public_url) 74 75 source_url = (self._download_json( 76 'https://cloud-api.yandex.net/v1/disk/public/resources/download', 77 video_id, query={'public_key': url}, fatal=False) or {}).get('href') 78 video_streams = resource.get('videoStreams') or {} 79 video_hash = resource.get('hash') or url 80 environment = store.get('environment') or {} 81 sk = environment.get('sk') 82 yandexuid = environment.get('yandexuid') 83 if sk and yandexuid and not (source_url and video_streams): 84 self._set_cookie(domain, 'yandexuid', yandexuid) 85 86 def call_api(action): 87 return (self._download_json( 88 urljoin(url, '/public/api/') + action, video_id, data=json.dumps({ 89 'hash': video_hash, 90 'sk': sk, 91 }).encode(), headers={ 92 'Content-Type': 'text/plain', 93 }, fatal=False) or {}).get('data') or {} 94 if not source_url: 95 # TODO: figure out how to detect if download limit has 96 # been reached and then avoid unnecessary source format 97 # extraction requests 98 source_url = call_api('download-url').get('url') 99 if not video_streams: 100 video_streams = call_api('get-video-streams') 101 102 formats = [] 103 if source_url: 104 formats.append({ 105 'url': source_url, 106 'format_id': 'source', 107 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'), 108 'quality': 1, 109 'filesize': int_or_none(meta.get('size')) 110 }) 111 112 for video in (video_streams.get('videos') or []): 113 format_url = video.get('url') 114 if not format_url: 115 continue 116 if video.get('dimension') == 'adaptive': 117 formats.extend(self._extract_m3u8_formats( 118 format_url, video_id, 'mp4', 'm3u8_native', 119 m3u8_id='hls', fatal=False)) 120 else: 121 size = video.get('size') or {} 122 height = int_or_none(size.get('height')) 123 format_id = 'hls' 124 if height: 125 format_id += '-%dp' % height 126 formats.append({ 127 'ext': 'mp4', 128 'format_id': format_id, 129 'height': height, 130 'protocol': 'm3u8_native', 131 'url': format_url, 132 'width': int_or_none(size.get('width')), 133 }) 134 self._sort_formats(formats) 135 136 uid = resource.get('uid') 137 display_name = try_get(store, lambda x: x['users'][uid]['displayName']) 138 139 return { 140 'id': video_id, 141 'title': title, 142 'duration': float_or_none(video_streams.get('duration'), 1000), 143 'uploader': display_name, 144 'uploader_id': uid, 145 'view_count': int_or_none(meta.get('views_counter')), 146 'formats': formats, 147 }