youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

zype.py (5710B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_HTTPError
      8 from ..utils import (
      9     dict_get,
     10     ExtractorError,
     11     int_or_none,
     12     js_to_json,
     13     parse_iso8601,
     14 )
     15 
     16 
     17 class ZypeIE(InfoExtractor):
     18     _ID_RE = r'[\da-fA-F]+'
     19     _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
     20     _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
     21     _TEST = {
     22         'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
     23         'md5': 'eaee31d474c76a955bdaba02a505c595',
     24         'info_dict': {
     25             'id': '5b400b834b32992a310622b9',
     26             'ext': 'mp4',
     27             'title': 'Smoky Barbecue Favorites',
     28             'thumbnail': r're:^https?://.*\.jpe?g',
     29             'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
     30             'timestamp': 1504915200,
     31             'upload_date': '20170909',
     32         },
     33     }
     34 
     35     @staticmethod
     36     def _extract_urls(webpage):
     37         return [
     38             mobj.group('url')
     39             for mobj in re.finditer(
     40                 r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
     41                 webpage)]
     42 
     43     def _real_extract(self, url):
     44         video_id = self._match_id(url)
     45 
     46         try:
     47             response = self._download_json(re.sub(
     48                 r'\.(?:js|html)\?', '.json?', url), video_id)['response']
     49         except ExtractorError as e:
     50             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
     51                 raise ExtractorError(self._parse_json(
     52                     e.cause.read().decode(), video_id)['message'], expected=True)
     53             raise
     54 
     55         body = response['body']
     56         video = response['video']
     57         title = video['title']
     58 
     59         if isinstance(body, dict):
     60             formats = []
     61             for output in body.get('outputs', []):
     62                 output_url = output.get('url')
     63                 if not output_url:
     64                     continue
     65                 name = output.get('name')
     66                 if name == 'm3u8':
     67                     formats = self._extract_m3u8_formats(
     68                         output_url, video_id, 'mp4',
     69                         'm3u8_native', m3u8_id='hls', fatal=False)
     70                 else:
     71                     f = {
     72                         'format_id': name,
     73                         'tbr': int_or_none(output.get('bitrate')),
     74                         'url': output_url,
     75                     }
     76                     if name in ('m4a', 'mp3'):
     77                         f['vcodec'] = 'none'
     78                     else:
     79                         f.update({
     80                             'height': int_or_none(output.get('height')),
     81                             'width': int_or_none(output.get('width')),
     82                         })
     83                     formats.append(f)
     84             text_tracks = body.get('subtitles') or []
     85         else:
     86             m3u8_url = self._search_regex(
     87                 r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
     88                 body, 'm3u8 url', group='url', default=None)
     89             if not m3u8_url:
     90                 source = self._search_regex(
     91                     r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
     92 
     93                 def get_attr(key):
     94                     return self._search_regex(
     95                         r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
     96                         source, key, group='val')
     97 
     98                 if get_attr('integration') == 'verizon-media':
     99                     m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
    100             formats = self._extract_m3u8_formats(
    101                 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
    102             text_tracks = self._search_regex(
    103                 r'textTracks\s*:\s*(\[[^]]+\])',
    104                 body, 'text tracks', default=None)
    105             if text_tracks:
    106                 text_tracks = self._parse_json(
    107                     text_tracks, video_id, js_to_json, False)
    108         self._sort_formats(formats)
    109 
    110         subtitles = {}
    111         if text_tracks:
    112             for text_track in text_tracks:
    113                 tt_url = dict_get(text_track, ('file', 'src'))
    114                 if not tt_url:
    115                     continue
    116                 subtitles.setdefault(text_track.get('label') or 'English', []).append({
    117                     'url': tt_url,
    118                 })
    119 
    120         thumbnails = []
    121         for thumbnail in video.get('thumbnails', []):
    122             thumbnail_url = thumbnail.get('url')
    123             if not thumbnail_url:
    124                 continue
    125             thumbnails.append({
    126                 'url': thumbnail_url,
    127                 'width': int_or_none(thumbnail.get('width')),
    128                 'height': int_or_none(thumbnail.get('height')),
    129             })
    130 
    131         return {
    132             'id': video_id,
    133             'display_id': video.get('friendly_title'),
    134             'title': title,
    135             'thumbnails': thumbnails,
    136             'description': dict_get(video, ('description', 'ott_description', 'short_description')),
    137             'timestamp': parse_iso8601(video.get('published_at')),
    138             'duration': int_or_none(video.get('duration')),
    139             'view_count': int_or_none(video.get('request_count')),
    140             'average_rating': int_or_none(video.get('rating')),
    141             'season_number': int_or_none(video.get('season')),
    142             'episode_number': int_or_none(video.get('episode')),
    143             'formats': formats,
    144             'subtitles': subtitles,
    145         }