youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 25ac63ed71bdc2a82842a593db9a150a0b8b7a6e
parent 99209c2916753799e9c68e8d466c5253113f25bc
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Mon, 23 Feb 2015 21:52:07 +0100

[rtve] Extract subtitles

Diffstat:
Mtest/test_subtitles.py | 15+++++++++++++++
Myoutube_dl/extractor/rtve.py | 12++++++++++++
2 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/test/test_subtitles.py b/test/test_subtitles.py @@ -25,6 +25,7 @@ from youtube_dl.extractor import ( RaiIE, VikiIE, ThePlatformIE, + RTVEALaCartaIE, ) @@ -305,5 +306,19 @@ class TestThePlatformSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') +class TestRtveSubtitles(BaseTestSubtitles): + url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' + IE = RTVEALaCartaIE + + def test_allsubtitles(self): + print('Skipping, only available from Spain') + return + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['es'])) + self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') + + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py @@ -102,14 +102,26 @@ class RTVEALaCartaIE(InfoExtractor): video_url = compat_urlparse.urljoin( 'http://mvod1.akcdn.rtve.es/', video_path) + subtitles = None + if info.get('sbtFile') is not None: + subtitles = self.extract_subtitles(video_id, info['sbtFile']) + return { 'id': video_id, 'title': info['title'], 'url': video_url, 'thumbnail': info.get('image'), 'page_url': url, + 'subtitles': subtitles, } + def _get_subtitles(self, video_id, sub_file): + subs = self._download_json( + sub_file + '.json', video_id, + 'Downloading subtitles info')['page']['items'] + return dict((s['lang'], [{'ext': 'vtt', 'url': s['src']}]) + for s in subs) + class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live'