youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 418f734a5877bb2d5d9cabe2ee158d076a1ef2c7
parent 1b2b22ed9f641eef34c05afb4230f2ff0aa57e0f
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Sat,  1 Jun 2013 14:18:27 -0700

Merge pull request #854 from rg3/youtube_automatic_captions

YoutubeIE: fallback to automatic captions when subtitles aren't found
Diffstat:
Mtest/test_youtube_subtitles.py | 12+++++++++++-
Myoutube_dl/InfoExtractors.py | 37++++++++++++++++++++++++++++++++++++-
2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py @@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener) class FakeDownloader(FileDownloader): def __init__(self): self.result = [] - self.params = parameters + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) def to_screen(self, s): print(s) def trouble(self, s, tb=None): @@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase): IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') self.assertEqual(info_dict, None) + def test_youtube_automatic_captions(self): + DL = FakeDownloader() + DL.params['writesubtitles'] = True + DL.params['subtitleslang'] = 'it' + IE = YoutubeIE(DL) + info_dict = IE.extract('8YoUxe5ncPo') + sub = info_dict[0]['subtitles'][0] + self.assertTrue(sub[2] is not None) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py @@ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor): return (u'Did not fetch video subtitles', None, None) return (None, sub_lang, sub) + def _request_automatic_caption(self, video_id, webpage): + """We need the webpage for getting the captions url, pass it as an + argument to speed up the process.""" + sub_lang = self._downloader.params.get('subtitleslang') + sub_format = self._downloader.params.get('subtitlesformat') + self.to_screen(u'%s: Looking for automatic captions' % video_id) + mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang + if mobj is None: + return [(err_msg, None, None)] + player_config = json.loads(mobj.group(1)) + try: + args = player_config[u'args'] + caption_url = args[u'ttsurl'] + timestamp = args[u'timestamp'] + params = compat_urllib_parse.urlencode({ + 'lang': 'en', + 'tlang': sub_lang, + 'fmt': sub_format, + 'ts': timestamp, + 'kind': 'asr', + }) + subtitles_url = caption_url + '&' + params + sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') + return [(None, sub_lang, sub)] + except KeyError: + return [(err_msg, None, None)] + def _extract_subtitle(self, video_id): """ Return a list with a tuple: @@ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor): if video_subtitles: (sub_error, sub_lang, sub) = video_subtitles[0] if sub_error: - self._downloader.report_error(sub_error) + # We try with the automatic captions + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + (sub_error_auto, sub_lang, sub) = video_subtitles[0] + if sub is not None: + pass + else: + # We report the original error + self._downloader.report_error(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id)