[mtvde] Simplify (Closes #6673)
authorSergey M․ <dstftw@gmail.com>
Fri, 28 Aug 2015 15:24:54 +0000 (21:24 +0600)
committerSergey M․ <dstftw@gmail.com>
Fri, 28 Aug 2015 15:24:54 +0000 (21:24 +0600)
youtube_dl/extractor/mtv.py

index 15df62649ff3d1366ff7fefd4de396f2c263d648..f2d0638bf4d4a75fc29deb47a0a2367d9e315b70 100644 (file)
@@ -174,8 +174,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if self._LANG:
             info_url += 'lang=%s&' % self._LANG
         info_url += data
+        return self._get_videos_info_from_url(info_url, video_id)
+
+    def _get_videos_info_from_url(self, url, video_id):
         idoc = self._download_xml(
-            info_url, video_id,
+            url, video_id,
             'Downloading info', transform_source=fix_xml_ampersands)
         return self.playlist_result(
             [self._get_video_info(item) for item in idoc.findall('.//item')])
@@ -289,39 +292,35 @@ class MTVIggyIE(MTVServicesInfoExtractor):
     }
     _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
 
+
 class MTVDEIE(MTVServicesInfoExtractor):
     IE_NAME = 'mtv.de'
-    _VALID_URL = r'''(?x)^https?://(?:www\.)?mtv\.de(?P<video_path>/artists/.*)'''
-    _TESTS = [
-        {
-            'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
-            'info_dict': {
-                'id': 'a50bc5f0b3aa4b3190aa',
-                'ext': 'mp4',
-                'title': 'cro-traum',
-                'description': 'Cro - Traum',
-            },
+    _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows)/(?:[^/]+/)+(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
+    _TESTS = [{
+        'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
+        'info_dict': {
+            'id': 'music_video-a50bc5f0b3aa4b3190aa',
+            'ext': 'mp4',
+            'title': 'MusicVideo_cro-traum',
+            'description': 'Cro - Traum',
         },
-    ]
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        },
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        return self._get_videos_info(url, mobj.group('video_path'))
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
+            video_id)
 
-    def _get_videos_info(self, url, video_path):
-        webpage = self._download_webpage(url, video_path)
-        playlist_js = self._search_regex(r'<script>\s*window.pagePlaylist =(.*?\]);\s*window.trackingParams =', webpage, 'playlist', flags=re.DOTALL)
-        playlist = self._parse_json(playlist_js, video_path)
-        info = None
         for item in playlist:
-            if item['video_path'] == video_path:
-                info = item
-                break
-        if info == None:
-            raise ExtractorError('video not in playlist')
-        mrss_url = info['mrss']
-        idoc = self._download_xml(
-            mrss_url, video_path,
-            'Downloading info', transform_source=fix_xml_ampersands)
-        return self.playlist_result(
-            [self._get_video_info(item) for item in idoc.findall('.//item')])
+            item_id = item.get('id')
+            if item_id and compat_str(item_id) == video_id:
+                return self._get_videos_info_from_url(item['mrss'], video_id)