youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

viewlift.py (9992B)


      1 from __future__ import unicode_literals
      2 
      3 import json
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_HTTPError
      8 from ..utils import (
      9     ExtractorError,
     10     int_or_none,
     11     parse_age_limit,
     12 )
     13 
     14 
     15 class ViewLiftBaseIE(InfoExtractor):
     16     _API_BASE = 'https://prod-api.viewlift.com/'
     17     _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
     18     _SITE_MAP = {
     19         'ftfnext': 'lax',
     20         'funnyforfree': 'snagfilms',
     21         'hoichoi': 'hoichoitv',
     22         'kiddovid': 'snagfilms',
     23         'laxsportsnetwork': 'lax',
     24         'legapallacanestro': 'lnp',
     25         'marquee': 'marquee-tv',
     26         'monumentalsportsnetwork': 'monumental-network',
     27         'moviespree': 'bingeflix',
     28         'pflmma': 'pfl',
     29         'snagxtreme': 'snagfilms',
     30         'theidentitytb': 'tampabay',
     31         'vayafilm': 'snagfilms',
     32     }
     33     _TOKENS = {}
     34 
     35     def _call_api(self, site, path, video_id, query):
     36         token = self._TOKENS.get(site)
     37         if not token:
     38             token_query = {'site': site}
     39             email, password = self._get_login_info(netrc_machine=site)
     40             if email:
     41                 resp = self._download_json(
     42                     self._API_BASE + 'identity/signin', video_id,
     43                     'Logging in', query=token_query, data=json.dumps({
     44                         'email': email,
     45                         'password': password,
     46                     }).encode())
     47             else:
     48                 resp = self._download_json(
     49                     self._API_BASE + 'identity/anonymous-token', video_id,
     50                     'Downloading authorization token', query=token_query)
     51             self._TOKENS[site] = token = resp['authorizationToken']
     52         return self._download_json(
     53             self._API_BASE + path, video_id,
     54             headers={'Authorization': token}, query=query)
     55 
     56 
     57 class ViewLiftEmbedIE(ViewLiftBaseIE):
     58     IE_NAME = 'viewlift:embed'
     59     _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
     60     _TESTS = [{
     61         'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
     62         'md5': '2924e9215c6eff7a55ed35b72276bd93',
     63         'info_dict': {
     64             'id': '74849a00-85a9-11e1-9660-123139220831',
     65             'ext': 'mp4',
     66             'title': '#whilewewatch',
     67             'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
     68             'timestamp': 1334350096,
     69             'upload_date': '20120413',
     70         }
     71     }, {
     72         # invalid labels, 360p is better that 480p
     73         'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
     74         'md5': '882fca19b9eb27ef865efeeaed376a48',
     75         'info_dict': {
     76             'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
     77             'ext': 'mp4',
     78             'title': 'Life in Limbo',
     79         },
     80         'skip': 'The video does not exist',
     81     }, {
     82         'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
     83         'only_matching': True,
     84     }]
     85 
     86     @staticmethod
     87     def _extract_url(webpage):
     88         mobj = re.search(
     89             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
     90             webpage)
     91         if mobj:
     92             return mobj.group('url')
     93 
     94     def _real_extract(self, url):
     95         domain, film_id = re.match(self._VALID_URL, url).groups()
     96         site = domain.split('.')[-2]
     97         if site in self._SITE_MAP:
     98             site = self._SITE_MAP[site]
     99         try:
    100             content_data = self._call_api(
    101                 site, 'entitlement/video/status', film_id, {
    102                     'id': film_id
    103                 })['video']
    104         except ExtractorError as e:
    105             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
    106                 error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
    107                 if error_message == 'User does not have a valid subscription or has not purchased this content.':
    108                     self.raise_login_required()
    109                 raise ExtractorError(error_message, expected=True)
    110             raise
    111         gist = content_data['gist']
    112         title = gist['title']
    113         video_assets = content_data['streamingInfo']['videoAssets']
    114 
    115         formats = []
    116         mpeg_video_assets = video_assets.get('mpeg') or []
    117         for video_asset in mpeg_video_assets:
    118             video_asset_url = video_asset.get('url')
    119             if not video_asset:
    120                 continue
    121             bitrate = int_or_none(video_asset.get('bitrate'))
    122             height = int_or_none(self._search_regex(
    123                 r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
    124                 'height', default=None))
    125             formats.append({
    126                 'url': video_asset_url,
    127                 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
    128                 'tbr': bitrate,
    129                 'height': height,
    130                 'vcodec': video_asset.get('codec'),
    131             })
    132 
    133         hls_url = video_assets.get('hls')
    134         if hls_url:
    135             formats.extend(self._extract_m3u8_formats(
    136                 hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
    137         self._sort_formats(formats, ('height', 'tbr', 'format_id'))
    138 
    139         info = {
    140             'id': film_id,
    141             'title': title,
    142             'description': gist.get('description'),
    143             'thumbnail': gist.get('videoImageUrl'),
    144             'duration': int_or_none(gist.get('runtime')),
    145             'age_limit': parse_age_limit(content_data.get('parentalRating')),
    146             'timestamp': int_or_none(gist.get('publishDate'), 1000),
    147             'formats': formats,
    148         }
    149         for k in ('categories', 'tags'):
    150             info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
    151         return info
    152 
    153 
    154 class ViewLiftIE(ViewLiftBaseIE):
    155     IE_NAME = 'viewlift'
    156     _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
    157     _TESTS = [{
    158         'url': 'http://www.snagfilms.com/films/title/lost_for_life',
    159         'md5': '19844f897b35af219773fd63bdec2942',
    160         'info_dict': {
    161             'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
    162             'display_id': 'lost_for_life',
    163             'ext': 'mp4',
    164             'title': 'Lost for Life',
    165             'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
    166             'thumbnail': r're:^https?://.*\.jpg',
    167             'duration': 4489,
    168             'categories': 'mincount:3',
    169             'age_limit': 14,
    170             'upload_date': '20150421',
    171             'timestamp': 1429656820,
    172         }
    173     }, {
    174         'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
    175         'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
    176         'info_dict': {
    177             'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
    178             'display_id': 'the_world_cut_project/india',
    179             'ext': 'mp4',
    180             'title': 'India',
    181             'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
    182             'thumbnail': r're:^https?://.*\.jpg',
    183             'duration': 979,
    184             'timestamp': 1399478279,
    185             'upload_date': '20140507',
    186         }
    187     }, {
    188         'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
    189         'info_dict': {
    190             'id': '00000148-7b53-de26-a9fb-fbf306f70020',
    191             'display_id': 'augie_alone/s_2_ep_12_love',
    192             'ext': 'mp4',
    193             'title': 'S. 2 Ep. 12 - Love',
    194             'description': 'Augie finds love.',
    195             'thumbnail': r're:^https?://.*\.jpg',
    196             'duration': 107,
    197             'upload_date': '20141012',
    198             'timestamp': 1413129540,
    199             'age_limit': 17,
    200         },
    201         'params': {
    202             'skip_download': True,
    203         },
    204     }, {
    205         'url': 'http://main.snagfilms.com/films/title/the_freebie',
    206         'only_matching': True,
    207     }, {
    208         # Film is not playable in your area.
    209         'url': 'http://www.snagfilms.com/films/title/inside_mecca',
    210         'only_matching': True,
    211     }, {
    212         # Film is not available.
    213         'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
    214         'only_matching': True,
    215     }, {
    216         'url': 'http://www.winnersview.com/videos/the-good-son',
    217         'only_matching': True,
    218     }, {
    219         # Was once Kaltura embed
    220         'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
    221         'only_matching': True,
    222     }, {
    223         'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
    224         'only_matching': True,
    225     }]
    226 
    227     @classmethod
    228     def suitable(cls, url):
    229         return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
    230 
    231     def _real_extract(self, url):
    232         domain, path, display_id = re.match(self._VALID_URL, url).groups()
    233         site = domain.split('.')[-2]
    234         if site in self._SITE_MAP:
    235             site = self._SITE_MAP[site]
    236         modules = self._call_api(
    237             site, 'content/pages', display_id, {
    238                 'includeContent': 'true',
    239                 'moduleOffset': 1,
    240                 'path': path,
    241                 'site': site,
    242             })['modules']
    243         film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
    244         return {
    245             '_type': 'url_transparent',
    246             'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
    247             'id': film_id,
    248             'display_id': display_id,
    249             'ie_key': 'ViewLiftEmbed',
    250         }