youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

vevo.py (14131B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 import json
      5 
      6 from .common import InfoExtractor
      7 from ..compat import (
      8     compat_str,
      9     compat_urlparse,
     10     compat_HTTPError,
     11 )
     12 from ..utils import (
     13     ExtractorError,
     14     int_or_none,
     15     parse_iso8601,
     16 )
     17 
     18 
     19 class VevoBaseIE(InfoExtractor):
     20     def _extract_json(self, webpage, video_id):
     21         return self._parse_json(
     22             self._search_regex(
     23                 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
     24                 webpage, 'initial store'),
     25             video_id)
     26 
     27 
     28 class VevoIE(VevoBaseIE):
     29     '''
     30     Accepts urls from vevo.com or in the format 'vevo:{id}'
     31     (currently used by MTVIE and MySpaceIE)
     32     '''
     33     _VALID_URL = r'''(?x)
     34         (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
     35            https?://cache\.vevo\.com/m/html/embed\.html\?video=|
     36            https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
     37            https?://embed\.vevo\.com/.*?[?&]isrc=|
     38            vevo:)
     39         (?P<id>[^&?#]+)'''
     40 
     41     _TESTS = [{
     42         'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
     43         'md5': '95ee28ee45e70130e3ab02b0f579ae23',
     44         'info_dict': {
     45             'id': 'GB1101300280',
     46             'ext': 'mp4',
     47             'title': 'Hurts - Somebody to Die For',
     48             'timestamp': 1372057200,
     49             'upload_date': '20130624',
     50             'uploader': 'Hurts',
     51             'track': 'Somebody to Die For',
     52             'artist': 'Hurts',
     53             'genre': 'Pop',
     54         },
     55         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     56     }, {
     57         'note': 'v3 SMIL format',
     58         'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
     59         'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
     60         'info_dict': {
     61             'id': 'USUV71302923',
     62             'ext': 'mp4',
     63             'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
     64             'timestamp': 1392796919,
     65             'upload_date': '20140219',
     66             'uploader': 'Cassadee Pope',
     67             'track': 'I Wish I Could Break Your Heart',
     68             'artist': 'Cassadee Pope',
     69             'genre': 'Country',
     70         },
     71         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     72     }, {
     73         'note': 'Age-limited video',
     74         'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
     75         'info_dict': {
     76             'id': 'USRV81300282',
     77             'ext': 'mp4',
     78             'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
     79             'age_limit': 18,
     80             'timestamp': 1372888800,
     81             'upload_date': '20130703',
     82             'uploader': 'Justin Timberlake',
     83             'track': 'Tunnel Vision (Explicit)',
     84             'artist': 'Justin Timberlake',
     85             'genre': 'Pop',
     86         },
     87         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
     88     }, {
     89         'note': 'No video_info',
     90         'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
     91         'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
     92         'info_dict': {
     93             'id': 'USUV71503000',
     94             'ext': 'mp4',
     95             'title': 'K Camp ft. T.I. - Till I Die',
     96             'age_limit': 18,
     97             'timestamp': 1449468000,
     98             'upload_date': '20151207',
     99             'uploader': 'K Camp',
    100             'track': 'Till I Die',
    101             'artist': 'K Camp',
    102             'genre': 'Hip-Hop',
    103         },
    104         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
    105     }, {
    106         'note': 'Featured test',
    107         'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
    108         'md5': 'd28675e5e8805035d949dc5cf161071d',
    109         'info_dict': {
    110             'id': 'USUV71402190',
    111             'ext': 'mp4',
    112             'title': 'Lemaitre ft. LoLo - Wait',
    113             'age_limit': 0,
    114             'timestamp': 1413432000,
    115             'upload_date': '20141016',
    116             'uploader': 'Lemaitre',
    117             'track': 'Wait',
    118             'artist': 'Lemaitre',
    119             'genre': 'Electronic',
    120         },
    121         'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
    122     }, {
    123         'note': 'Only available via webpage',
    124         'url': 'http://www.vevo.com/watch/GBUV71600656',
    125         'md5': '67e79210613865b66a47c33baa5e37fe',
    126         'info_dict': {
    127             'id': 'GBUV71600656',
    128             'ext': 'mp4',
    129             'title': 'ABC - Viva Love',
    130             'age_limit': 0,
    131             'timestamp': 1461830400,
    132             'upload_date': '20160428',
    133             'uploader': 'ABC',
    134             'track': 'Viva Love',
    135             'artist': 'ABC',
    136             'genre': 'Pop',
    137         },
    138         'expected_warnings': ['Failed to download video versions info'],
    139     }, {
    140         # no genres available
    141         'url': 'http://www.vevo.com/watch/INS171400764',
    142         'only_matching': True,
    143     }, {
    144         # Another case available only via the webpage; using streams/streamsV3 formats
    145         # Geo-restricted to Netherlands/Germany
    146         'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
    147         'only_matching': True,
    148     }, {
    149         'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
    150         'only_matching': True,
    151     }]
    152     _VERSIONS = {
    153         0: 'youtube',  # only in AuthenticateVideo videoVersions
    154         1: 'level3',
    155         2: 'akamai',
    156         3: 'level3',
    157         4: 'amazon',
    158     }
    159 
    160     def _initialize_api(self, video_id):
    161         webpage = self._download_webpage(
    162             'https://accounts.vevo.com/token', None,
    163             note='Retrieving oauth token',
    164             errnote='Unable to retrieve oauth token',
    165             data=json.dumps({
    166                 'client_id': 'SPupX1tvqFEopQ1YS6SS',
    167                 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
    168             }).encode('utf-8'),
    169             headers={
    170                 'Content-Type': 'application/json',
    171             })
    172 
    173         if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
    174             self.raise_geo_restricted(
    175                 '%s said: This page is currently unavailable in your region' % self.IE_NAME)
    176 
    177         auth_info = self._parse_json(webpage, video_id)
    178         self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
    179 
    180     def _call_api(self, path, *args, **kwargs):
    181         try:
    182             data = self._download_json(self._api_url_template % path, *args, **kwargs)
    183         except ExtractorError as e:
    184             if isinstance(e.cause, compat_HTTPError):
    185                 errors = self._parse_json(e.cause.read().decode(), None)['errors']
    186                 error_message = ', '.join([error['message'] for error in errors])
    187                 raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
    188             raise
    189         return data
    190 
    191     def _real_extract(self, url):
    192         video_id = self._match_id(url)
    193 
    194         self._initialize_api(video_id)
    195 
    196         video_info = self._call_api(
    197             'video/%s' % video_id, video_id, 'Downloading api video info',
    198             'Failed to download video info')
    199 
    200         video_versions = self._call_api(
    201             'video/%s/streams' % video_id, video_id,
    202             'Downloading video versions info',
    203             'Failed to download video versions info',
    204             fatal=False)
    205 
    206         # Some videos are only available via webpage (e.g.
    207         # https://github.com/ytdl-org/youtube-dl/issues/9366)
    208         if not video_versions:
    209             webpage = self._download_webpage(url, video_id)
    210             json_data = self._extract_json(webpage, video_id)
    211             if 'streams' in json_data.get('default', {}):
    212                 video_versions = json_data['default']['streams'][video_id][0]
    213             else:
    214                 video_versions = [
    215                     value
    216                     for key, value in json_data['apollo']['data'].items()
    217                     if key.startswith('%s.streams' % video_id)]
    218 
    219         uploader = None
    220         artist = None
    221         featured_artist = None
    222         artists = video_info.get('artists')
    223         for curr_artist in artists:
    224             if curr_artist.get('role') == 'Featured':
    225                 featured_artist = curr_artist['name']
    226             else:
    227                 artist = uploader = curr_artist['name']
    228 
    229         formats = []
    230         for video_version in video_versions:
    231             version = self._VERSIONS.get(video_version.get('version'), 'generic')
    232             version_url = video_version.get('url')
    233             if not version_url:
    234                 continue
    235 
    236             if '.ism' in version_url:
    237                 continue
    238             elif '.mpd' in version_url:
    239                 formats.extend(self._extract_mpd_formats(
    240                     version_url, video_id, mpd_id='dash-%s' % version,
    241                     note='Downloading %s MPD information' % version,
    242                     errnote='Failed to download %s MPD information' % version,
    243                     fatal=False))
    244             elif '.m3u8' in version_url:
    245                 formats.extend(self._extract_m3u8_formats(
    246                     version_url, video_id, 'mp4', 'm3u8_native',
    247                     m3u8_id='hls-%s' % version,
    248                     note='Downloading %s m3u8 information' % version,
    249                     errnote='Failed to download %s m3u8 information' % version,
    250                     fatal=False))
    251             else:
    252                 m = re.search(r'''(?xi)
    253                     _(?P<width>[0-9]+)x(?P<height>[0-9]+)
    254                     _(?P<vcodec>[a-z0-9]+)
    255                     _(?P<vbr>[0-9]+)
    256                     _(?P<acodec>[a-z0-9]+)
    257                     _(?P<abr>[0-9]+)
    258                     \.(?P<ext>[a-z0-9]+)''', version_url)
    259                 if not m:
    260                     continue
    261 
    262                 formats.append({
    263                     'url': version_url,
    264                     'format_id': 'http-%s-%s' % (version, video_version['quality']),
    265                     'vcodec': m.group('vcodec'),
    266                     'acodec': m.group('acodec'),
    267                     'vbr': int(m.group('vbr')),
    268                     'abr': int(m.group('abr')),
    269                     'ext': m.group('ext'),
    270                     'width': int(m.group('width')),
    271                     'height': int(m.group('height')),
    272                 })
    273         self._sort_formats(formats)
    274 
    275         track = video_info['title']
    276         if featured_artist:
    277             artist = '%s ft. %s' % (artist, featured_artist)
    278         title = '%s - %s' % (artist, track) if artist else track
    279 
    280         genres = video_info.get('genres')
    281         genre = (
    282             genres[0] if genres and isinstance(genres, list)
    283             and isinstance(genres[0], compat_str) else None)
    284 
    285         is_explicit = video_info.get('isExplicit')
    286         if is_explicit is True:
    287             age_limit = 18
    288         elif is_explicit is False:
    289             age_limit = 0
    290         else:
    291             age_limit = None
    292 
    293         return {
    294             'id': video_id,
    295             'title': title,
    296             'formats': formats,
    297             'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
    298             'timestamp': parse_iso8601(video_info.get('releaseDate')),
    299             'uploader': uploader,
    300             'duration': int_or_none(video_info.get('duration')),
    301             'view_count': int_or_none(video_info.get('views', {}).get('total')),
    302             'age_limit': age_limit,
    303             'track': track,
    304             'artist': uploader,
    305             'genre': genre,
    306         }
    307 
    308 
    309 class VevoPlaylistIE(VevoBaseIE):
    310     _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)'
    311 
    312     _TESTS = [{
    313         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29',
    314         'info_dict': {
    315             'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29',
    316             'title': 'Best-Of: Birdman',
    317         },
    318         'playlist_count': 10,
    319     }, {
    320         'url': 'http://www.vevo.com/watch/genre/rock',
    321         'info_dict': {
    322             'id': 'rock',
    323             'title': 'Rock',
    324         },
    325         'playlist_count': 20,
    326     }, {
    327         'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0',
    328         'md5': '32dcdfddddf9ec6917fc88ca26d36282',
    329         'info_dict': {
    330             'id': 'USCMV1100073',
    331             'ext': 'mp4',
    332             'title': 'Birdman - Y.U. MAD',
    333             'timestamp': 1323417600,
    334             'upload_date': '20111209',
    335             'uploader': 'Birdman',
    336             'track': 'Y.U. MAD',
    337             'artist': 'Birdman',
    338             'genre': 'Rap/Hip-Hop',
    339         },
    340         'expected_warnings': ['Unable to download SMIL file'],
    341     }, {
    342         'url': 'http://www.vevo.com/watch/genre/rock?index=0',
    343         'only_matching': True,
    344     }]
    345 
    346     def _real_extract(self, url):
    347         mobj = re.match(self._VALID_URL, url)
    348         playlist_id = mobj.group('id')
    349         playlist_kind = mobj.group('kind')
    350 
    351         webpage = self._download_webpage(url, playlist_id)
    352 
    353         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
    354         index = qs.get('index', [None])[0]
    355 
    356         if index:
    357             video_id = self._search_regex(
    358                 r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>',
    359                 webpage, 'video id', default=None, group='id')
    360             if video_id:
    361                 return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
    362 
    363         playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
    364 
    365         playlist = (list(playlists.values())[0]
    366                     if playlist_kind == 'playlist' else playlists[playlist_id])
    367 
    368         entries = [
    369             self.url_result('vevo:%s' % src, VevoIE.ie_key())
    370             for src in playlist['isrcs']]
    371 
    372         return self.playlist_result(
    373             entries, playlist.get('playlistId') or playlist_id,
    374             playlist.get('name'), playlist.get('description'))