youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

medialaan.py (4184B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     extract_attributes,
      8     int_or_none,
      9     mimetype2ext,
     10     parse_iso8601,
     11 )
     12 
     13 
     14 class MedialaanIE(InfoExtractor):
     15     _VALID_URL = r'''(?x)
     16                     https?://
     17                         (?:
     18                             (?:embed\.)?mychannels.video/embed/|
     19                             embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
     20                             (?:www\.)?(?:
     21                                 (?:
     22                                     7sur7|
     23                                     demorgen|
     24                                     hln|
     25                                     joe|
     26                                     qmusic
     27                                 )\.be|
     28                                 (?:
     29                                     [abe]d|
     30                                     bndestem|
     31                                     destentor|
     32                                     gelderlander|
     33                                     pzc|
     34                                     tubantia|
     35                                     volkskrant
     36                                 )\.nl
     37                             )/video/(?:[^/]+/)*[^/?&#]+~p
     38                         )
     39                         (?P<id>\d+)
     40                     '''
     41     _TESTS = [{
     42         'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
     43         'info_dict': {
     44             'id': '193993',
     45             'ext': 'mp4',
     46             'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
     47             'timestamp': 1611663540,
     48             'upload_date': '20210126',
     49             'duration': 238,
     50         },
     51         'params': {
     52             'skip_download': True,
     53         },
     54     }, {
     55         'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
     56         'only_matching': True,
     57     }, {
     58         'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
     59         'only_matching': True,
     60     }, {
     61         'url': 'https://embed.mychannels.video/script/production/193993',
     62         'only_matching': True,
     63     }, {
     64         'url': 'https://embed.mychannels.video/production/193993',
     65         'only_matching': True,
     66     }, {
     67         'url': 'https://mychannels.video/embed/193993',
     68         'only_matching': True,
     69     }, {
     70         'url': 'https://embed.mychannels.video/embed/193993',
     71         'only_matching': True,
     72     }]
     73 
     74     @staticmethod
     75     def _extract_urls(webpage):
     76         entries = []
     77         for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
     78             mychannels_id = extract_attributes(element).get('data-mychannels-id')
     79             if mychannels_id:
     80                 entries.append('https://mychannels.video/embed/' + mychannels_id)
     81         return entries
     82 
     83     def _real_extract(self, url):
     84         production_id = self._match_id(url)
     85         production = self._download_json(
     86             'https://embed.mychannels.video/sdk/production/' + production_id,
     87             production_id, query={'options': 'UUUU_default'})['productions'][0]
     88         title = production['title']
     89 
     90         formats = []
     91         for source in (production.get('sources') or []):
     92             src = source.get('src')
     93             if not src:
     94                 continue
     95             ext = mimetype2ext(source.get('type'))
     96             if ext == 'm3u8':
     97                 formats.extend(self._extract_m3u8_formats(
     98                     src, production_id, 'mp4', 'm3u8_native',
     99                     m3u8_id='hls', fatal=False))
    100             else:
    101                 formats.append({
    102                     'ext': ext,
    103                     'url': src,
    104                 })
    105         self._sort_formats(formats)
    106 
    107         return {
    108             'id': production_id,
    109             'title': title,
    110             'formats': formats,
    111             'thumbnail': production.get('posterUrl'),
    112             'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
    113             'duration': int_or_none(production.get('duration')) or None,
    114         }