[rtl2] Fix extraction for test_RTL2_1
authorYen Chi Hsuan <yan12125@gmail.com>
Fri, 21 Aug 2015 05:20:32 +0000 (13:20 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Fri, 21 Aug 2015 05:20:51 +0000 (13:20 +0800)
youtube_dl/extractor/rtl2.py

index 9e0c6890eee800f890b070c2236a72a175e8d327..276612fc726a25f141ac504dec2ca4947e767f71 100644 (file)
@@ -1,6 +1,7 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
+import re
 from .common import InfoExtractor
 
 
@@ -28,6 +29,10 @@ class RTL2IE(InfoExtractor):
             'title': 'Anna erwischt Alex!',
             'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
         },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -38,10 +43,17 @@ class RTL2IE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        vico_id = self._html_search_regex(
-            r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
-        vivi_id = self._html_search_regex(
-            r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+        mobj = re.search(
+            r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+            webpage)
+        if mobj:
+            vico_id = mobj.group('vico_id')
+            vivi_id = mobj.group('vivi_id')
+        else:
+            vico_id = self._html_search_regex(
+                r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+            vivi_id = self._html_search_regex(
+                r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
         info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
 
         info = self._download_json(info_url, video_id)