youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

odnoklassniki.py (9492B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import (
      8     compat_etree_fromstring,
      9     compat_parse_qs,
     10     compat_urllib_parse_unquote,
     11     compat_urllib_parse_urlparse,
     12 )
     13 from ..utils import (
     14     ExtractorError,
     15     unified_strdate,
     16     int_or_none,
     17     qualities,
     18     unescapeHTML,
     19     urlencode_postdata,
     20 )
     21 
     22 
     23 class OdnoklassnikiIE(InfoExtractor):
     24     _VALID_URL = r'''(?x)
     25                 https?://
     26                     (?:(?:www|m|mobile)\.)?
     27                     (?:odnoklassniki|ok)\.ru/
     28                     (?:
     29                         video(?:embed)?/|
     30                         web-api/video/moviePlayer/|
     31                         live/|
     32                         dk\?.*?st\.mvId=
     33                     )
     34                     (?P<id>[\d-]+)
     35                 '''
     36     _TESTS = [{
     37         # metadata in JSON
     38         'url': 'http://ok.ru/video/20079905452',
     39         'md5': '0b62089b479e06681abaaca9d204f152',
     40         'info_dict': {
     41             'id': '20079905452',
     42             'ext': 'mp4',
     43             'title': 'Культура меняет нас (прекрасный ролик!))',
     44             'duration': 100,
     45             'upload_date': '20141207',
     46             'uploader_id': '330537914540',
     47             'uploader': 'Виталий Добровольский',
     48             'like_count': int,
     49             'age_limit': 0,
     50         },
     51     }, {
     52         # metadataUrl
     53         'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
     54         'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
     55         'info_dict': {
     56             'id': '63567059965189-0',
     57             'ext': 'mp4',
     58             'title': 'Девушка без комплексов ...',
     59             'duration': 191,
     60             'upload_date': '20150518',
     61             'uploader_id': '534380003155',
     62             'uploader': '☭ Андрей Мещанинов ☭',
     63             'like_count': int,
     64             'age_limit': 0,
     65             'start_time': 5,
     66         },
     67     }, {
     68         # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
     69         'url': 'http://ok.ru/video/64211978996595-1',
     70         'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
     71         'info_dict': {
     72             'id': 'V_VztHT5BzY',
     73             'ext': 'mp4',
     74             'title': 'Космическая среда от 26 августа 2015',
     75             'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
     76             'duration': 440,
     77             'upload_date': '20150826',
     78             'uploader_id': 'tvroscosmos',
     79             'uploader': 'Телестудия Роскосмоса',
     80             'age_limit': 0,
     81         },
     82     }, {
     83         # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
     84         'url': 'http://ok.ru/video/62036049272859-0',
     85         'info_dict': {
     86             'id': '62036049272859-0',
     87             'ext': 'mp4',
     88             'title': 'МУЗЫКА     ДОЖДЯ .',
     89             'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
     90             'upload_date': '20120106',
     91             'uploader_id': '473534735899',
     92             'uploader': 'МARINA D',
     93             'age_limit': 0,
     94         },
     95         'params': {
     96             'skip_download': True,
     97         },
     98         'skip': 'Video has not been found',
     99     }, {
    100         'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
    101         'only_matching': True,
    102     }, {
    103         'url': 'http://www.ok.ru/video/20648036891',
    104         'only_matching': True,
    105     }, {
    106         'url': 'http://www.ok.ru/videoembed/20648036891',
    107         'only_matching': True,
    108     }, {
    109         'url': 'http://m.ok.ru/video/20079905452',
    110         'only_matching': True,
    111     }, {
    112         'url': 'http://mobile.ok.ru/video/20079905452',
    113         'only_matching': True,
    114     }, {
    115         'url': 'https://www.ok.ru/live/484531969818',
    116         'only_matching': True,
    117     }, {
    118         'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
    119         'only_matching': True,
    120     }, {
    121         # Paid video
    122         'url': 'https://ok.ru/video/954886983203',
    123         'only_matching': True,
    124     }]
    125 
    126     @staticmethod
    127     def _extract_url(webpage):
    128         mobj = re.search(
    129             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
    130         if mobj:
    131             return mobj.group('url')
    132 
    133     def _real_extract(self, url):
    134         start_time = int_or_none(compat_parse_qs(
    135             compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
    136 
    137         video_id = self._match_id(url)
    138 
    139         webpage = self._download_webpage(
    140             'http://ok.ru/video/%s' % video_id, video_id)
    141 
    142         error = self._search_regex(
    143             r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
    144             webpage, 'error', default=None)
    145         if error:
    146             raise ExtractorError(error, expected=True)
    147 
    148         player = self._parse_json(
    149             unescapeHTML(self._search_regex(
    150                 r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
    151                 webpage, 'player', group='player')),
    152             video_id)
    153 
    154         flashvars = player['flashvars']
    155 
    156         metadata = flashvars.get('metadata')
    157         if metadata:
    158             metadata = self._parse_json(metadata, video_id)
    159         else:
    160             data = {}
    161             st_location = flashvars.get('location')
    162             if st_location:
    163                 data['st.location'] = st_location
    164             metadata = self._download_json(
    165                 compat_urllib_parse_unquote(flashvars['metadataUrl']),
    166                 video_id, 'Downloading metadata JSON',
    167                 data=urlencode_postdata(data))
    168 
    169         movie = metadata['movie']
    170 
    171         # Some embedded videos may not contain title in movie dict (e.g.
    172         # http://ok.ru/video/62036049272859-0) thus we allow missing title
    173         # here and it's going to be extracted later by an extractor that
    174         # will process the actual embed.
    175         provider = metadata.get('provider')
    176         title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
    177 
    178         thumbnail = movie.get('poster')
    179         duration = int_or_none(movie.get('duration'))
    180 
    181         author = metadata.get('author', {})
    182         uploader_id = author.get('id')
    183         uploader = author.get('name')
    184 
    185         upload_date = unified_strdate(self._html_search_meta(
    186             'ya:ovs:upload_date', webpage, 'upload date', default=None))
    187 
    188         age_limit = None
    189         adult = self._html_search_meta(
    190             'ya:ovs:adult', webpage, 'age limit', default=None)
    191         if adult:
    192             age_limit = 18 if adult == 'true' else 0
    193 
    194         like_count = int_or_none(metadata.get('likeCount'))
    195 
    196         info = {
    197             'id': video_id,
    198             'title': title,
    199             'thumbnail': thumbnail,
    200             'duration': duration,
    201             'upload_date': upload_date,
    202             'uploader': uploader,
    203             'uploader_id': uploader_id,
    204             'like_count': like_count,
    205             'age_limit': age_limit,
    206             'start_time': start_time,
    207         }
    208 
    209         if provider == 'USER_YOUTUBE':
    210             info.update({
    211                 '_type': 'url_transparent',
    212                 'url': movie['contentId'],
    213             })
    214             return info
    215 
    216         assert title
    217         if provider == 'LIVE_TV_APP':
    218             info['title'] = self._live_title(title)
    219 
    220         quality = qualities(('4', '0', '1', '2', '3', '5'))
    221 
    222         formats = [{
    223             'url': f['url'],
    224             'ext': 'mp4',
    225             'format_id': f['name'],
    226         } for f in metadata['videos']]
    227 
    228         m3u8_url = metadata.get('hlsManifestUrl')
    229         if m3u8_url:
    230             formats.extend(self._extract_m3u8_formats(
    231                 m3u8_url, video_id, 'mp4', 'm3u8_native',
    232                 m3u8_id='hls', fatal=False))
    233 
    234         dash_manifest = metadata.get('metadataEmbedded')
    235         if dash_manifest:
    236             formats.extend(self._parse_mpd_formats(
    237                 compat_etree_fromstring(dash_manifest), 'mpd'))
    238 
    239         for fmt in formats:
    240             fmt_type = self._search_regex(
    241                 r'\btype[/=](\d)', fmt['url'],
    242                 'format type', default=None)
    243             if fmt_type:
    244                 fmt['quality'] = quality(fmt_type)
    245 
    246         # Live formats
    247         m3u8_url = metadata.get('hlsMasterPlaylistUrl')
    248         if m3u8_url:
    249             formats.extend(self._extract_m3u8_formats(
    250                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
    251                 m3u8_id='hls', fatal=False))
    252         rtmp_url = metadata.get('rtmpUrl')
    253         if rtmp_url:
    254             formats.append({
    255                 'url': rtmp_url,
    256                 'format_id': 'rtmp',
    257                 'ext': 'flv',
    258             })
    259 
    260         if not formats:
    261             payment_info = metadata.get('paymentInfo')
    262             if payment_info:
    263                 raise ExtractorError('This video is paid, subscribe to download it', expected=True)
    264 
    265         self._sort_formats(formats)
    266 
    267         info['formats'] = formats
    268         return info