youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

mdr.py (7070B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import (
      6     compat_str,
      7     compat_urlparse,
      8 )
      9 from ..utils import (
     10     determine_ext,
     11     int_or_none,
     12     parse_duration,
     13     parse_iso8601,
     14     url_or_none,
     15     xpath_text,
     16 )
     17 
     18 
     19 class MDRIE(InfoExtractor):
     20     IE_DESC = 'MDR.DE and KiKA'
     21     _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
     22 
     23     _GEO_COUNTRIES = ['DE']
     24 
     25     _TESTS = [{
     26         # MDR regularly deletes its videos
     27         'url': 'http://www.mdr.de/fakt/video189002.html',
     28         'only_matching': True,
     29     }, {
     30         # audio
     31         'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
     32         'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
     33         'info_dict': {
     34             'id': '1312272',
     35             'ext': 'mp3',
     36             'title': 'Feuilleton vom 30. Oktober 2015',
     37             'duration': 250,
     38             'uploader': 'MITTELDEUTSCHER RUNDFUNK',
     39         },
     40         'skip': '404 not found',
     41     }, {
     42         'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
     43         'md5': '4930515e36b06c111213e80d1e4aad0e',
     44         'info_dict': {
     45             'id': '19636',
     46             'ext': 'mp4',
     47             'title': 'Baumhaus vom 30. Oktober 2015',
     48             'duration': 134,
     49             'uploader': 'KIKA',
     50         },
     51         'skip': '404 not found',
     52     }, {
     53         'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
     54         'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
     55         'info_dict': {
     56             'id': '8182',
     57             'ext': 'mp4',
     58             'title': 'Beutolomäus und der geheime Weihnachtswunsch',
     59             'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
     60             'timestamp': 1482541200,
     61             'upload_date': '20161224',
     62             'duration': 4628,
     63             'uploader': 'KIKA',
     64         },
     65     }, {
     66         # audio with alternative playerURL pattern
     67         'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
     68         'info_dict': {
     69             'id': '100',
     70             'ext': 'mp4',
     71             'title': 'Feature: Operation Mindfuck - Robert Anton Wilson',
     72             'duration': 3239,
     73             'uploader': 'MITTELDEUTSCHER RUNDFUNK',
     74         },
     75     }, {
     76         # empty bitrateVideo and bitrateAudio
     77         'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
     78         'info_dict': {
     79             'id': '128372',
     80             'ext': 'mp4',
     81             'title': 'Der kleine Wichtel kehrt zurück',
     82             'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
     83             'duration': 4876,
     84             'timestamp': 1607823300,
     85             'upload_date': '20201213',
     86             'uploader': 'ZDF',
     87         },
     88         'params': {
     89             'skip_download': True,
     90         },
     91     }, {
     92         'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
     93         'only_matching': True,
     94     }, {
     95         'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
     96         'only_matching': True,
     97     }, {
     98         'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
     99         'only_matching': True,
    100     }]
    101 
    102     def _real_extract(self, url):
    103         video_id = self._match_id(url)
    104 
    105         webpage = self._download_webpage(url, video_id)
    106 
    107         data_url = self._search_regex(
    108             r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+?-avCustom\.xml)\1',
    109             webpage, 'data url', group='url').replace(r'\/', '/')
    110 
    111         doc = self._download_xml(
    112             compat_urlparse.urljoin(url, data_url), video_id)
    113 
    114         title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
    115 
    116         type_ = xpath_text(doc, './type', default=None)
    117 
    118         formats = []
    119         processed_urls = []
    120         for asset in doc.findall('./assets/asset'):
    121             for source in (
    122                     'download',
    123                     'progressiveDownload',
    124                     'dynamicHttpStreamingRedirector',
    125                     'adaptiveHttpStreamingRedirector'):
    126                 url_el = asset.find('./%sUrl' % source)
    127                 if url_el is None:
    128                     continue
    129 
    130                 video_url = url_or_none(url_el.text)
    131                 if not video_url or video_url in processed_urls:
    132                     continue
    133 
    134                 processed_urls.append(video_url)
    135 
    136                 ext = determine_ext(video_url)
    137                 if ext == 'm3u8':
    138                     formats.extend(self._extract_m3u8_formats(
    139                         video_url, video_id, 'mp4', entry_protocol='m3u8_native',
    140                         preference=0, m3u8_id='HLS', fatal=False))
    141                 elif ext == 'f4m':
    142                     formats.extend(self._extract_f4m_formats(
    143                         video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
    144                         preference=0, f4m_id='HDS', fatal=False))
    145                 else:
    146                     media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
    147                     vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
    148                     abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
    149                     filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
    150 
    151                     format_id = [media_type]
    152                     if vbr or abr:
    153                         format_id.append(compat_str(vbr or abr))
    154 
    155                     f = {
    156                         'url': video_url,
    157                         'format_id': '-'.join(format_id),
    158                         'filesize': filesize,
    159                         'abr': abr,
    160                         'vbr': vbr,
    161                     }
    162 
    163                     if vbr:
    164                         f.update({
    165                             'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
    166                             'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
    167                         })
    168 
    169                     if type_ == 'audio':
    170                         f['vcodec'] = 'none'
    171 
    172                     formats.append(f)
    173 
    174         self._sort_formats(formats)
    175 
    176         description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
    177         timestamp = parse_iso8601(
    178             xpath_text(
    179                 doc, [
    180                     './broadcast/broadcastDate',
    181                     './broadcast/broadcastStartDate',
    182                     './broadcast/broadcastEndDate'],
    183                 'timestamp', default=None))
    184         duration = parse_duration(xpath_text(doc, './duration', 'duration'))
    185         uploader = xpath_text(doc, './rights', 'uploader')
    186 
    187         return {
    188             'id': video_id,
    189             'title': title,
    190             'description': description,
    191             'timestamp': timestamp,
    192             'duration': duration,
    193             'uploader': uploader,
    194             'formats': formats,
    195         }