[rtve] Extract subtitles
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 23 Feb 2015 20:52:07 +0000 (21:52 +0100)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 23 Feb 2015 22:04:07 +0000 (23:04 +0100)
test/test_subtitles.py
youtube_dl/extractor/rtve.py

index 7f93f0a75f16961eacc06ff6b444b4ef855d2625..3f2d8a2ba74e6b4f04d4159a64deb1f69f9d105b 100644 (file)
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
     RaiIE,
     VikiIE,
     ThePlatformIE,
+    RTVEALaCartaIE,
 )
 
 
@@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
 
 
+class TestRtveSubtitles(BaseTestSubtitles):
+    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+    IE = RTVEALaCartaIE
+
+    def test_allsubtitles(self):
+        print('Skipping, only available from Spain')
+        return
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['es']))
+        self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+
+
 if __name__ == '__main__':
     unittest.main()
index e60f85b5b4842d90b49aeec9aa87da8def92d4f9..27cd34b7db746d8017c77084b35becda280f0fd6 100644 (file)
@@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor):
             video_url = compat_urlparse.urljoin(
                 'http://mvod1.akcdn.rtve.es/', video_path)
 
+        subtitles = None
+        if info.get('sbtFile') is not None:
+            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+
         return {
             'id': video_id,
             'title': info['title'],
             'url': video_url,
             'thumbnail': info.get('image'),
             'page_url': url,
+            'subtitles': subtitles,
         }
 
+    def _get_subtitles(self, video_id, sub_file):
+        subs = self._download_json(
+            sub_file + '.json', video_id,
+            'Downloading subtitles info')['page']['items']
+        return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+            for s in subs)
+
 
 class RTVELiveIE(InfoExtractor):
     IE_NAME = 'rtve.es:live'