[tv2dk:bornholm:play] Add extractor (closes #23291)
authorSergey M․ <dstftw@gmail.com>
Sun, 15 Dec 2019 17:08:18 +0000 (00:08 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 15 Dec 2019 17:08:18 +0000 (00:08 +0700)
youtube_dl/extractor/extractors.py
youtube_dl/extractor/tv2dk.py

index fd93730fae1ecb18363adb46b8952aaf2e010753..376d07727f6304dce175b05924f5d532a7165a00 100644 (file)
@@ -1168,7 +1168,10 @@ from .tv2 import (
     TV2ArticleIE,
     KatsomoIE,
 )
-from .tv2dk import TV2DKIE
+from .tv2dk import (
+    TV2DKIE,
+    TV2DKBornholmPlayIE,
+)
 from .tv2hu import TV2HuIE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
index eb39424df6af3366d7b2433d43947bdb5dc09151..611fdc0c6c7002c1669200c7ace75bf498a85c6c 100644 (file)
@@ -1,10 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import json
 import re
 
 from .common import InfoExtractor
-from ..utils import extract_attributes
+from ..utils import (
+    determine_ext,
+    extract_attributes,
+    js_to_json,
+    url_or_none,
+)
 
 
 class TV2DKIE(InfoExtractor):
@@ -80,3 +86,69 @@ class TV2DKIE(InfoExtractor):
                 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
                 video_id=kaltura_id))
         return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+    _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+        'info_dict': {
+            'id': '781021',
+            'ext': 'mp4',
+            'title': '12Nyheder-27.11.19',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+            data=json.dumps({
+                'playlist_id': video_id,
+                'serienavn': '',
+            }).encode(), headers={
+                'X-Requested-With': 'XMLHttpRequest',
+                'Content-Type': 'application/json; charset=UTF-8',
+            })['d']
+
+        # TODO: generalize flowplayer
+        title = self._search_regex(
+            r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+            group='value')
+        sources = self._parse_json(self._search_regex(
+            r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+            video_id, js_to_json)
+
+        formats = []
+        srcs = set()
+        for source in sources:
+            src = url_or_none(source.get('src'))
+            if not src:
+                continue
+            if src in srcs:
+                continue
+            srcs.add(src)
+            ext = determine_ext(src)
+            src_type = source.get('type')
+            if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    src, video_id, ext='mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif src_type == 'application/dash+xml' or ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    src, video_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': src,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+        }