youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

dispeak.py (5080B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     int_or_none,
      8     parse_duration,
      9     remove_end,
     10     xpath_element,
     11     xpath_text,
     12 )
     13 
     14 
     15 class DigitallySpeakingIE(InfoExtractor):
     16     _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
     17 
     18     _TESTS = [{
     19         # From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
     20         'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
     21         'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
     22         'info_dict': {
     23             'id': '840376_BQRC',
     24             'ext': 'mp4',
     25             'title': 'Tenacious Design and The Interface of \'Destiny\'',
     26         },
     27     }, {
     28         # From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
     29         'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
     30         'only_matching': True,
     31     }, {
     32         # From http://www.gdcvault.com/play/1013700/Advanced-Material
     33         'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
     34         'only_matching': True,
     35     }, {
     36         # From https://gdcvault.com/play/1016624, empty speakerVideo
     37         'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
     38         'info_dict': {
     39             'id': '201210-822101_1349794556671DDDD',
     40             'ext': 'flv',
     41             'title': 'Pre-launch - Preparing to Take the Plunge',
     42         },
     43     }, {
     44         # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
     45         'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
     46         'only_matching': True,
     47     }]
     48 
     49     def _parse_mp4(self, metadata):
     50         video_formats = []
     51         video_root = None
     52 
     53         mp4_video = xpath_text(metadata, './mp4video', default=None)
     54         if mp4_video is not None:
     55             mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
     56             video_root = mobj.group('root')
     57         if video_root is None:
     58             http_host = xpath_text(metadata, 'httpHost', default=None)
     59             if http_host:
     60                 video_root = 'http://%s/' % http_host
     61         if video_root is None:
     62             # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
     63             # Works for GPUTechConf, too
     64             video_root = 'http://s3-2u.digitallyspeaking.com/'
     65 
     66         formats = metadata.findall('./MBRVideos/MBRVideo')
     67         if not formats:
     68             return None
     69         for a_format in formats:
     70             stream_name = xpath_text(a_format, 'streamName', fatal=True)
     71             video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
     72             url = video_root + video_path
     73             bitrate = xpath_text(a_format, 'bitrate')
     74             tbr = int_or_none(bitrate)
     75             vbr = int_or_none(self._search_regex(
     76                 r'-(\d+)\.mp4', video_path, 'vbr', default=None))
     77             abr = tbr - vbr if tbr and vbr else None
     78             video_formats.append({
     79                 'format_id': bitrate,
     80                 'url': url,
     81                 'tbr': tbr,
     82                 'vbr': vbr,
     83                 'abr': abr,
     84             })
     85         return video_formats
     86 
     87     def _parse_flv(self, metadata):
     88         formats = []
     89         akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
     90         audios = metadata.findall('./audios/audio')
     91         for audio in audios:
     92             formats.append({
     93                 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
     94                 'play_path': remove_end(audio.get('url'), '.flv'),
     95                 'ext': 'flv',
     96                 'vcodec': 'none',
     97                 'format_id': audio.get('code'),
     98             })
     99         for video_key, format_id, preference in (
    100                 ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
    101             video_path = xpath_text(metadata, './%sVideo' % video_key)
    102             if not video_path:
    103                 continue
    104             formats.append({
    105                 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
    106                 'play_path': remove_end(video_path, '.flv'),
    107                 'ext': 'flv',
    108                 'format_note': '%s video' % video_key,
    109                 'quality': preference,
    110                 'preference': preference,
    111                 'format_id': format_id,
    112             })
    113         return formats
    114 
    115     def _real_extract(self, url):
    116         video_id = self._match_id(url)
    117 
    118         xml_description = self._download_xml(url, video_id)
    119         metadata = xpath_element(xml_description, 'metadata')
    120 
    121         video_formats = self._parse_mp4(metadata)
    122         if video_formats is None:
    123             video_formats = self._parse_flv(metadata)
    124 
    125         return {
    126             'id': video_id,
    127             'formats': video_formats,
    128             'title': xpath_text(metadata, 'title', fatal=True),
    129             'duration': parse_duration(xpath_text(metadata, 'endTime')),
    130             'creator': xpath_text(metadata, 'speaker'),
    131         }