youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

disney.py (7044B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     int_or_none,
      9     unified_strdate,
     10     compat_str,
     11     determine_ext,
     12     ExtractorError,
     13     update_url_query,
     14 )
     15 
     16 
     17 class DisneyIE(InfoExtractor):
     18     _VALID_URL = r'''(?x)
     19         https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
     20     _TESTS = [{
     21         # Disney.EmbedVideo
     22         'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
     23         'info_dict': {
     24             'id': '545ed1857afee5a0ec239977',
     25             'ext': 'mp4',
     26             'title': 'Moana - Trailer',
     27             'description': 'A fun adventure for the entire Family!  Bring home Moana on Digital HD Feb 21 & Blu-ray March 7',
     28             'upload_date': '20170112',
     29         },
     30         'params': {
     31             # m3u8 download
     32             'skip_download': True,
     33         }
     34     }, {
     35         # Grill.burger
     36         'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
     37         'info_dict': {
     38             'id': '5454e9f4e9804a552e3524c8',
     39             'ext': 'mp4',
     40             'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
     41             'upload_date': '20170104',
     42             'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
     43         },
     44         'params': {
     45             # m3u8 download
     46             'skip_download': True,
     47         }
     48     }, {
     49         'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
     50         'only_matching': True,
     51     }, {
     52         'url': 'http://video.en.disneyme.com/watch/future-worm/robo-carp-2001-544b66002aa7353cdd3f5114',
     53         'only_matching': True,
     54     }, {
     55         'url': 'http://video.disneyturkiye.com.tr/izle/7c-7-cuceler/kimin-sesi-zaten-5456f3d015f6b36c8afdd0e2',
     56         'only_matching': True,
     57     }, {
     58         'url': 'http://disneyjunior.disney.com/embed/546a4798ddba3d1612e4005d',
     59         'only_matching': True,
     60     }, {
     61         'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
     62         'only_matching': True,
     63     }, {
     64         'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
     65         'only_matching': True,
     66     }, {
     67         'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
     68         'only_matching': True,
     69     }, {
     70         'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
     71         'only_matching': True,
     72     }, {
     73         'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
     74         'only_matching': True,
     75     }, {
     76         'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
     77         'only_matching': True,
     78     }]
     79 
     80     def _real_extract(self, url):
     81         domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
     82         if not video_id:
     83             webpage = self._download_webpage(url, display_id)
     84             grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
     85                 r'Grill\.burger\s*=\s*({.+})\s*:',
     86                 webpage, 'grill data'))
     87             page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
     88             video_data = page_data['data'][0]
     89         else:
     90             webpage = self._download_webpage(
     91                 'http://%s/embed/%s' % (domain, video_id), video_id)
     92             page_data = self._parse_json(self._search_regex(
     93                 r'Disney\.EmbedVideo\s*=\s*({.+});',
     94                 webpage, 'embed data'), video_id)
     95             video_data = page_data['video']
     96 
     97         for external in video_data.get('externals', []):
     98             if external.get('source') == 'vevo':
     99                 return self.url_result('vevo:' + external['data_id'], 'Vevo')
    100 
    101         video_id = video_data['id']
    102         title = video_data['title']
    103 
    104         formats = []
    105         for flavor in video_data.get('flavors', []):
    106             flavor_format = flavor.get('format')
    107             flavor_url = flavor.get('url')
    108             if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
    109                 continue
    110             tbr = int_or_none(flavor.get('bitrate'))
    111             if tbr == 99999:
    112                 # wrong ks(Kaltura Signature) causes 404 Error
    113                 flavor_url = update_url_query(flavor_url, {'ks': ''})
    114                 m3u8_formats = self._extract_m3u8_formats(
    115                     flavor_url, video_id, 'mp4',
    116                     m3u8_id=flavor_format, fatal=False)
    117                 for f in m3u8_formats:
    118                     # Apple FairPlay
    119                     if '/fpshls/' in f['url']:
    120                         continue
    121                     formats.append(f)
    122                 continue
    123             format_id = []
    124             if flavor_format:
    125                 format_id.append(flavor_format)
    126             if tbr:
    127                 format_id.append(compat_str(tbr))
    128             ext = determine_ext(flavor_url)
    129             if flavor_format == 'applehttp' or ext == 'm3u8':
    130                 ext = 'mp4'
    131             width = int_or_none(flavor.get('width'))
    132             height = int_or_none(flavor.get('height'))
    133             formats.append({
    134                 'format_id': '-'.join(format_id),
    135                 'url': flavor_url,
    136                 'width': width,
    137                 'height': height,
    138                 'tbr': tbr,
    139                 'ext': ext,
    140                 'vcodec': 'none' if (width == 0 and height == 0) else None,
    141             })
    142         if not formats and video_data.get('expired'):
    143             raise ExtractorError(
    144                 '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
    145                 expected=True)
    146         self._sort_formats(formats)
    147 
    148         subtitles = {}
    149         for caption in video_data.get('captions', []):
    150             caption_url = caption.get('url')
    151             caption_format = caption.get('format')
    152             if not caption_url or caption_format.startswith('unknown'):
    153                 continue
    154             subtitles.setdefault(caption.get('language', 'en'), []).append({
    155                 'url': caption_url,
    156                 'ext': {
    157                     'webvtt': 'vtt',
    158                 }.get(caption_format, caption_format),
    159             })
    160 
    161         return {
    162             'id': video_id,
    163             'title': title,
    164             'description': video_data.get('description') or video_data.get('short_desc'),
    165             'thumbnail': video_data.get('thumb') or video_data.get('thumb_secure'),
    166             'duration': int_or_none(video_data.get('duration_sec')),
    167             'upload_date': unified_strdate(video_data.get('publish_date')),
    168             'formats': formats,
    169             'subtitles': subtitles,
    170         }