youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

tv2dk.py (5670B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import json
      5 import re
      6 
      7 from .common import InfoExtractor
      8 from ..utils import (
      9     determine_ext,
     10     extract_attributes,
     11     js_to_json,
     12     url_or_none,
     13 )
     14 
     15 
     16 class TV2DKIE(InfoExtractor):
     17     _VALID_URL = r'''(?x)
     18                     https?://
     19                         (?:www\.)?
     20                         (?:
     21                             tvsyd|
     22                             tv2ostjylland|
     23                             tvmidtvest|
     24                             tv2fyn|
     25                             tv2east|
     26                             tv2lorry|
     27                             tv2nord
     28                         )\.dk/
     29                         (:[^/]+/)*
     30                         (?P<id>[^/?\#&]+)
     31                     '''
     32     _TESTS = [{
     33         'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
     34         'info_dict': {
     35             'id': '0_52jmwa0p',
     36             'ext': 'mp4',
     37             'title': '19:30 - 28. okt. 2019',
     38             'timestamp': 1572290248,
     39             'upload_date': '20191028',
     40             'uploader_id': 'tvsyd',
     41             'duration': 1347,
     42             'view_count': int,
     43         },
     44         'params': {
     45             'skip_download': True,
     46         },
     47         'add_ie': ['Kaltura'],
     48     }, {
     49         'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
     50         'only_matching': True,
     51     }, {
     52         'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
     53         'only_matching': True,
     54     }, {
     55         'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
     56         'only_matching': True,
     57     }, {
     58         'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
     59         'only_matching': True,
     60     }, {
     61         'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
     62         'only_matching': True,
     63     }, {
     64         'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
     65         'only_matching': True,
     66     }, {
     67         'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
     68         'only_matching': True,
     69     }]
     70 
     71     def _real_extract(self, url):
     72         video_id = self._match_id(url)
     73 
     74         webpage = self._download_webpage(url, video_id)
     75 
     76         entries = []
     77 
     78         def add_entry(partner_id, kaltura_id):
     79             entries.append(self.url_result(
     80                 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
     81                 video_id=kaltura_id))
     82 
     83         for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
     84             video = extract_attributes(video_el)
     85             kaltura_id = video.get('data-entryid')
     86             if not kaltura_id:
     87                 continue
     88             partner_id = video.get('data-partnerid')
     89             if not partner_id:
     90                 continue
     91             add_entry(partner_id, kaltura_id)
     92         if not entries:
     93             kaltura_id = self._search_regex(
     94                 r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
     95             partner_id = self._search_regex(
     96                 (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
     97                 'partner id')
     98             add_entry(partner_id, kaltura_id)
     99         return self.playlist_result(entries)
    100 
    101 
    102 class TV2DKBornholmPlayIE(InfoExtractor):
    103     _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
    104     _TEST = {
    105         'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
    106         'info_dict': {
    107             'id': '781021',
    108             'ext': 'mp4',
    109             'title': '12Nyheder-27.11.19',
    110         },
    111         'params': {
    112             'skip_download': True,
    113         },
    114     }
    115 
    116     def _real_extract(self, url):
    117         video_id = self._match_id(url)
    118 
    119         video = self._download_json(
    120             'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
    121             data=json.dumps({
    122                 'playlist_id': video_id,
    123                 'serienavn': '',
    124             }).encode(), headers={
    125                 'X-Requested-With': 'XMLHttpRequest',
    126                 'Content-Type': 'application/json; charset=UTF-8',
    127             })['d']
    128 
    129         # TODO: generalize flowplayer
    130         title = self._search_regex(
    131             r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
    132             group='value')
    133         sources = self._parse_json(self._search_regex(
    134             r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
    135             video_id, js_to_json)
    136 
    137         formats = []
    138         srcs = set()
    139         for source in sources:
    140             src = url_or_none(source.get('src'))
    141             if not src:
    142                 continue
    143             if src in srcs:
    144                 continue
    145             srcs.add(src)
    146             ext = determine_ext(src)
    147             src_type = source.get('type')
    148             if src_type == 'application/x-mpegurl' or ext == 'm3u8':
    149                 formats.extend(self._extract_m3u8_formats(
    150                     src, video_id, ext='mp4', entry_protocol='m3u8_native',
    151                     m3u8_id='hls', fatal=False))
    152             elif src_type == 'application/dash+xml' or ext == 'mpd':
    153                 formats.extend(self._extract_mpd_formats(
    154                     src, video_id, mpd_id='dash', fatal=False))
    155             else:
    156                 formats.append({
    157                     'url': src,
    158                 })
    159         self._sort_formats(formats)
    160 
    161         return {
    162             'id': video_id,
    163             'title': title,
    164             'formats': formats,
    165         }