[karaoketv] Fix extraction
authorSergey M․ <dstftw@gmail.com>
Fri, 15 Apr 2016 15:26:54 +0000 (21:26 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 15 Apr 2016 15:26:54 +0000 (21:26 +0600)
youtube_dl/extractor/karaoketv.py

index dbc83cb6c712a42ddcc1e7bf7632b1d801ffdbff..a6050c4de3e1695ac26bd1a21bab981a52755c21 100644 (file)
@@ -5,12 +5,6 @@ from .common import InfoExtractor
 
 
 class KaraoketvIE(InfoExtractor):
-    '''
-    In api_play.php there's a video-cdn.com <iframe>. The latter plays an
-    unencrypted RTMP stream. However I can't download it with rtmpdump.
-    '''
-    _WORKING = False
-
     _VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
@@ -18,20 +12,53 @@ class KaraoketvIE(InfoExtractor):
             'id': '58356',
             'ext': 'flv',
             'title': 'קריוקי של איזון',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
         }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
+
         webpage = self._download_webpage(url, video_id)
+        api_page_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
+            webpage, 'API play URL', group='url')
+
+        api_page = self._download_webpage(api_page_url, video_id)
+        video_cdn_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
+            api_page, 'video cdn URL', group='url')
+
+        video_cdn = self._download_webpage(video_cdn_url, video_id)
+        play_path = self._parse_json(
+            self._search_regex(
+                r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
+            video_id)['clip']['url']
 
-        api_page_url = self._html_search_regex(
-            r'<iframe[^>]+src="(http://www.karaoke.co.il/api_play.php?[^"]+)"',
-            webpage, 'API play URL')
+        settings = self._parse_json(
+            self._search_regex(
+                r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
+            video_id, fatal=False) or {}
+
+        servers = settings.get('servers')
+        if not servers or not isinstance(servers, list):
+            servers = ('wowzail.video-cdn.com:80/vodcdn', )
+
+        formats = [{
+            'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
+            'play_path': play_path,
+            'app': 'vodcdn',
+            'page_url': video_cdn_url,
+            'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
+            'rtmp_real_time': True,
+            'ext': 'flv',
+        } for server in servers]
 
         return {
-            '_type': 'url_transparent',
             'id': video_id,
             'title': self._og_search_title(webpage),
-            'url': api_page_url,
+            'formats': formats,
         }