youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

kinja.py (8568B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import (
      8     compat_str,
      9     compat_urllib_parse_unquote,
     10 )
     11 from ..utils import (
     12     int_or_none,
     13     parse_iso8601,
     14     strip_or_none,
     15     try_get,
     16     unescapeHTML,
     17     urljoin,
     18 )
     19 
     20 
     21 class KinjaEmbedIE(InfoExtractor):
     22     IENAME = 'kinja:embed'
     23     _DOMAIN_REGEX = r'''(?:[^.]+\.)?
     24         (?:
     25             avclub|
     26             clickhole|
     27             deadspin|
     28             gizmodo|
     29             jalopnik|
     30             jezebel|
     31             kinja|
     32             kotaku|
     33             lifehacker|
     34             splinternews|
     35             the(?:inventory|onion|root|takeout)
     36         )\.com'''
     37     _COMMON_REGEX = r'''/
     38         (?:
     39             ajax/inset|
     40             embed/video
     41         )/iframe\?.*?\bid='''
     42     _VALID_URL = r'''(?x)https?://%s%s
     43         (?P<type>
     44             fb|
     45             imgur|
     46             instagram|
     47             jwp(?:layer)?-video|
     48             kinjavideo|
     49             mcp|
     50             megaphone|
     51             ooyala|
     52             soundcloud(?:-playlist)?|
     53             tumblr-post|
     54             twitch-stream|
     55             twitter|
     56             ustream-channel|
     57             vimeo|
     58             vine|
     59             youtube-(?:list|video)
     60         )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
     61     _TESTS = [{
     62         'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
     63         'only_matching': True,
     64     }, {
     65         'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
     66         'only_matching': True,
     67     }, {
     68         'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
     69         'only_matching': True,
     70     }, {
     71         'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
     72         'only_matching': True,
     73     }, {
     74         'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
     75         'only_matching': True,
     76     }, {
     77         'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
     78         'only_matching': True,
     79     }, {
     80         'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
     81         'only_matching': True,
     82     }, {
     83         'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
     84         'only_matching': True,
     85     }, {
     86         'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
     87         'only_matching': True,
     88     }, {
     89         'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
     90         'only_matching': True,
     91     }, {
     92         'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
     93         'only_matching': True,
     94     }, {
     95         'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
     96         'only_matching': True,
     97     }, {
     98         'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
     99         'only_matching': True,
    100     }, {
    101         'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
    102         'only_matching': True,
    103     }]
    104     _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
    105     _PROVIDER_MAP = {
    106         'fb': ('facebook.com/video.php?v=', 'Facebook'),
    107         'imgur': ('imgur.com/', 'Imgur'),
    108         'instagram': ('instagram.com/p/', 'Instagram'),
    109         'jwplayer-video': _JWPLATFORM_PROVIDER,
    110         'jwp-video': _JWPLATFORM_PROVIDER,
    111         'megaphone': ('player.megaphone.fm/', 'Generic'),
    112         'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
    113         'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
    114         'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
    115         'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
    116         'twitch-stream': ('twitch.tv/', 'TwitchStream'),
    117         'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
    118         'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
    119         'vimeo': ('vimeo.com/', 'Vimeo'),
    120         'vine': ('vine.co/v/', 'Vine'),
    121         'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
    122         'youtube-video': ('youtube.com/embed/', 'Youtube'),
    123     }
    124 
    125     @staticmethod
    126     def _extract_urls(webpage, url):
    127         return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
    128             r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
    129             webpage)]
    130 
    131     def _real_extract(self, url):
    132         video_type, video_id = re.match(self._VALID_URL, url).groups()
    133 
    134         provider = self._PROVIDER_MAP.get(video_type)
    135         if provider:
    136             video_id = compat_urllib_parse_unquote(video_id)
    137             if video_type == 'tumblr-post':
    138                 video_id, blog = video_id.split('-', 1)
    139                 result_url = provider[0] % (blog, video_id)
    140             elif video_type == 'youtube-list':
    141                 video_id, playlist_id = video_id.split('/')
    142                 result_url = provider[0] % (video_id, playlist_id)
    143             else:
    144                 if video_type == 'ooyala':
    145                     video_id = video_id.split('/')[0]
    146                 result_url = provider[0] + video_id
    147             return self.url_result('http://' + result_url, provider[1])
    148 
    149         if video_type == 'kinjavideo':
    150             data = self._download_json(
    151                 'https://kinja.com/api/core/video/views/videoById',
    152                 video_id, query={'videoId': video_id})['data']
    153             title = data['title']
    154 
    155             formats = []
    156             for k in ('signedPlaylist', 'streaming'):
    157                 m3u8_url = data.get(k + 'Url')
    158                 if m3u8_url:
    159                     formats.extend(self._extract_m3u8_formats(
    160                         m3u8_url, video_id, 'mp4', 'm3u8_native',
    161                         m3u8_id='hls', fatal=False))
    162             self._sort_formats(formats)
    163 
    164             thumbnail = None
    165             poster = data.get('poster') or {}
    166             poster_id = poster.get('id')
    167             if poster_id:
    168                 thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
    169 
    170             return {
    171                 'id': video_id,
    172                 'title': title,
    173                 'description': strip_or_none(data.get('description')),
    174                 'formats': formats,
    175                 'tags': data.get('tags'),
    176                 'timestamp': int_or_none(try_get(
    177                     data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
    178                 'thumbnail': thumbnail,
    179                 'uploader': data.get('network'),
    180             }
    181         else:
    182             video_data = self._download_json(
    183                 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
    184                 video_id)['videoMetadata']
    185             iptc = video_data['photoVideoMetadataIPTC']
    186             title = iptc['title']['en']
    187             fmg = video_data.get('photoVideoMetadata_fmg') or {}
    188             tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
    189             data = self._download_json(
    190                 tvss_domain + '/api/v3/video-auth/url-signature-tokens',
    191                 video_id, query={'mcpids': video_id})['data'][0]
    192             formats = []
    193 
    194             rendition_url = data.get('renditionUrl')
    195             if rendition_url:
    196                 formats = self._extract_m3u8_formats(
    197                     rendition_url, video_id, 'mp4',
    198                     'm3u8_native', m3u8_id='hls', fatal=False)
    199 
    200             fallback_rendition_url = data.get('fallbackRenditionUrl')
    201             if fallback_rendition_url:
    202                 formats.append({
    203                     'format_id': 'fallback',
    204                     'tbr': int_or_none(self._search_regex(
    205                         r'_(\d+)\.mp4', fallback_rendition_url,
    206                         'bitrate', default=None)),
    207                     'url': fallback_rendition_url,
    208                 })
    209 
    210             self._sort_formats(formats)
    211 
    212             return {
    213                 'id': video_id,
    214                 'title': title,
    215                 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
    216                 'uploader': fmg.get('network'),
    217                 'duration': int_or_none(iptc.get('fileDuration')),
    218                 'formats': formats,
    219                 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
    220                 'timestamp': parse_iso8601(iptc.get('dateReleased')),
    221             }