youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

yandexdisk.py (5166B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import json
      5 import re
      6 
      7 from .common import InfoExtractor
      8 from ..utils import (
      9     determine_ext,
     10     float_or_none,
     11     int_or_none,
     12     mimetype2ext,
     13     try_get,
     14     urljoin,
     15 )
     16 
     17 
     18 class YandexDiskIE(InfoExtractor):
     19     _VALID_URL = r'''(?x)https?://
     20         (?P<domain>
     21             yadi\.sk|
     22             disk\.yandex\.
     23                 (?:
     24                     az|
     25                     by|
     26                     co(?:m(?:\.(?:am|ge|tr))?|\.il)|
     27                     ee|
     28                     fr|
     29                     k[gz]|
     30                     l[tv]|
     31                     md|
     32                     t[jm]|
     33                     u[az]|
     34                     ru
     35                 )
     36         )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
     37 
     38     _TESTS = [{
     39         'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
     40         'md5': 'a4a8d52958c8fddcf9845935070402ae',
     41         'info_dict': {
     42             'id': 'VdOeDou8eZs6Y',
     43             'ext': 'mp4',
     44             'title': '4.mp4',
     45             'duration': 168.6,
     46             'uploader': 'y.botova',
     47             'uploader_id': '300043621',
     48             'view_count': int,
     49         },
     50         'expected_warnings': ['Unable to download JSON metadata'],
     51     }, {
     52         'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
     53         'only_matching': True,
     54     }, {
     55         'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
     56         'only_matching': True,
     57     }]
     58 
     59     def _real_extract(self, url):
     60         domain, video_id = re.match(self._VALID_URL, url).groups()
     61 
     62         webpage = self._download_webpage(url, video_id)
     63         store = self._parse_json(self._search_regex(
     64             r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
     65             webpage, 'store'), video_id)
     66         resource = store['resources'][store['rootResourceId']]
     67 
     68         title = resource['name']
     69         meta = resource.get('meta') or {}
     70 
     71         public_url = meta.get('short_url')
     72         if public_url:
     73             video_id = self._match_id(public_url)
     74 
     75         source_url = (self._download_json(
     76             'https://cloud-api.yandex.net/v1/disk/public/resources/download',
     77             video_id, query={'public_key': url}, fatal=False) or {}).get('href')
     78         video_streams = resource.get('videoStreams') or {}
     79         video_hash = resource.get('hash') or url
     80         environment = store.get('environment') or {}
     81         sk = environment.get('sk')
     82         yandexuid = environment.get('yandexuid')
     83         if sk and yandexuid and not (source_url and video_streams):
     84             self._set_cookie(domain, 'yandexuid', yandexuid)
     85 
     86             def call_api(action):
     87                 return (self._download_json(
     88                     urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
     89                         'hash': video_hash,
     90                         'sk': sk,
     91                     }).encode(), headers={
     92                         'Content-Type': 'text/plain',
     93                     }, fatal=False) or {}).get('data') or {}
     94             if not source_url:
     95                 # TODO: figure out how to detect if download limit has
     96                 # been reached and then avoid unnecessary source format
     97                 # extraction requests
     98                 source_url = call_api('download-url').get('url')
     99             if not video_streams:
    100                 video_streams = call_api('get-video-streams')
    101 
    102         formats = []
    103         if source_url:
    104             formats.append({
    105                 'url': source_url,
    106                 'format_id': 'source',
    107                 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
    108                 'quality': 1,
    109                 'filesize': int_or_none(meta.get('size'))
    110             })
    111 
    112         for video in (video_streams.get('videos') or []):
    113             format_url = video.get('url')
    114             if not format_url:
    115                 continue
    116             if video.get('dimension') == 'adaptive':
    117                 formats.extend(self._extract_m3u8_formats(
    118                     format_url, video_id, 'mp4', 'm3u8_native',
    119                     m3u8_id='hls', fatal=False))
    120             else:
    121                 size = video.get('size') or {}
    122                 height = int_or_none(size.get('height'))
    123                 format_id = 'hls'
    124                 if height:
    125                     format_id += '-%dp' % height
    126                 formats.append({
    127                     'ext': 'mp4',
    128                     'format_id': format_id,
    129                     'height': height,
    130                     'protocol': 'm3u8_native',
    131                     'url': format_url,
    132                     'width': int_or_none(size.get('width')),
    133                 })
    134         self._sort_formats(formats)
    135 
    136         uid = resource.get('uid')
    137         display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
    138 
    139         return {
    140             'id': video_id,
    141             'title': title,
    142             'duration': float_or_none(video_streams.get('duration'), 1000),
    143             'uploader': display_name,
    144             'uploader_id': uid,
    145             'view_count': int_or_none(meta.get('views_counter')),
    146             'formats': formats,
    147         }