From: Ismael Mejia Date: Mon, 26 Aug 2013 02:03:40 +0000 (+0200) Subject: Merge branch 'master' into subtitles_rework X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=06a401c845289344bfe8998a0acad07f79fe7818;p=youtube-dl Merge branch 'master' into subtitles_rework --- 06a401c845289344bfe8998a0acad07f79fe7818 diff --cc youtube_dl/extractor/dailymotion.py index f54ecc569,fa8c630d0..003b1d8c3 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@@ -1,39 -1,17 +1,40 @@@ import re import json + import itertools +import socket from .common import InfoExtractor +from .subtitles import NoAutoSubtitlesIE + from ..utils import ( + compat_http_client, + compat_urllib_error, compat_urllib_request, + compat_str, get_element_by_attribute, get_element_by_id, ExtractorError, ) -class DailymotionIE(InfoExtractor): + +class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): + + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + info = json.loads(sub_list) + if (info['total'] > 0): + sub_lang_list = dict((l['language'], l['url']) for l in info['list']) + return sub_lang_list + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + - class DailymotionIE(DailyMotionSubtitlesIE): ++class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor): """Information Extractor for Dailymotion""" _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' diff --cc youtube_dl/extractor/youtube.py index 571c73889,446d53f64..370cc64cc --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@@ -24,67 -23,114 +24,172 @@@ from ..utils import orderedSet, ) + class YoutubeBaseInfoExtractor(InfoExtractor): + """Provide base functions for Youtube extractors""" + _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' + _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' + _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' + _NETRC_MACHINE = 'youtube' + # If True it will raise an error if no login info is provided + _LOGIN_REQUIRED = False + + def report_lang(self): + """Report attempt to set language.""" + self.to_screen(u'Setting language') + + def _set_language(self): + request = compat_urllib_request.Request(self._LANG_URL) + try: + self.report_lang() + compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) + return False + return True + + def _login(self): + (username, password) = self._get_login_info() + # No authentication to be performed + if username is None: + if self._LOGIN_REQUIRED: + raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) + return False + + request = compat_urllib_request.Request(self._LOGIN_URL) + try: + login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) + return False + + galx = None + dsh = None + match = re.search(re.compile(r']* id="gaia_loginform"', login_results) is not None: + self._downloader.report_warning(u'unable to log in: bad username or password') + return False + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) + return False + return True + + def _confirm_age(self): + age_form = { + 'next_url': '/', + 'action_confirm': 'Confirm', + } + request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) + try: + self.report_age_confirmation() + compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) + return True + + def _real_initialize(self): + if self._downloader is None: + return + if not self._set_language(): + return + if not self._login(): + return + self._confirm_age() -class YoutubeIE(YoutubeBaseInfoExtractor): +class YoutubeSubtitlesIE(SubtitlesIE): + + def _get_available_subtitles(self, video_id): + request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) + try: + sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) + return {} + lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) + + sub_lang_list = {} + for l in lang_list: + lang = l[1] + params = compat_urllib_parse.urlencode({ + 'lang': lang, + 'v': video_id, + 'fmt': self._downloader.params.get('subtitlesformat'), + }) + url = u'http://www.youtube.com/api/timedtext?' + params + sub_lang_list[lang] = url + if not sub_lang_list: + self._downloader.report_warning(u'video doesn\'t have subtitles') + return {} + return sub_lang_list + + def _request_automatic_caption(self, video_id, webpage): + """We need the webpage for getting the captions url, pass it as an + argument to speed up the process.""" + sub_lang = self._downloader.params.get('subtitleslang') or 'en' + sub_format = self._downloader.params.get('subtitlesformat') + self.to_screen(u'%s: Looking for automatic captions' % video_id) + mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang + if mobj is None: + self._downloader.report_warning(err_msg) + return {} + player_config = json.loads(mobj.group(1)) + try: + args = player_config[u'args'] + caption_url = args[u'ttsurl'] + timestamp = args[u'timestamp'] + params = compat_urllib_parse.urlencode({ + 'lang': 'en', + 'tlang': sub_lang, + 'fmt': sub_format, + 'ts': timestamp, + 'kind': 'asr', + }) + subtitles_url = caption_url + '&' + params + sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') + return {sub_lang: sub} + # An extractor error can be raise by the download process if there are + # no automatic captions but there are subtitles + except (KeyError, ExtractorError): + self._downloader.report_warning(err_msg) + return {} + - - class YoutubeIE(YoutubeSubtitlesIE): ++class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor): IE_DESC = u'YouTube.com' _VALID_URL = r"""^ ( @@@ -390,105 -451,109 +497,6 @@@ # Fallback to the other algortihms return self._decrypt_signature(s) -- -- def _get_available_subtitles(self, video_id): -- self.report_video_subtitles_download(video_id) -- request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) -- try: -- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') -- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None) - self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) - return {} -- sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) -- sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) -- if not sub_lang_list: - return (u'video doesn\'t have subtitles', None) - self._downloader.report_warning(u'video doesn\'t have subtitles') - return {} -- return sub_lang_list -- -- def _list_available_subtitles(self, video_id): -- sub_lang_list = self._get_available_subtitles(video_id) -- self.report_video_subtitles_available(video_id, sub_lang_list) -- -- def _request_subtitle(self, sub_lang, sub_name, video_id, format): -- """ - Return tuple: - (error_message, sub_lang, sub) - Return the subtitle as a string or None if they are not found -- """ -- self.report_video_subtitles_request(video_id, sub_lang, format) -- params = compat_urllib_parse.urlencode({ -- 'lang': sub_lang, -- 'name': sub_name, -- 'v': video_id, -- 'fmt': format, -- }) -- url = 'http://www.youtube.com/api/timedtext?' + params -- try: -- sub = compat_urllib_request.urlopen(url).read().decode('utf-8') -- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - return (u'unable to download video subtitles: %s' % compat_str(err), None, None) - self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) - return -- if not sub: - return (u'Did not fetch video subtitles', None, None) - return (None, sub_lang, sub) - self._downloader.report_warning(u'Did not fetch video subtitles') - return - return sub -- -- def _request_automatic_caption(self, video_id, webpage): -- """We need the webpage for getting the captions url, pass it as an -- argument to speed up the process.""" -- sub_lang = self._downloader.params.get('subtitleslang') or 'en' -- sub_format = self._downloader.params.get('subtitlesformat') -- self.to_screen(u'%s: Looking for automatic captions' % video_id) -- mobj = re.search(r';ytplayer.config = ({.*?});', webpage) -- err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang -- if mobj is None: - return [(err_msg, None, None)] - self._downloader.report_warning(err_msg) - return {} -- player_config = json.loads(mobj.group(1)) -- try: -- args = player_config[u'args'] -- caption_url = args[u'ttsurl'] -- timestamp = args[u'timestamp'] -- params = compat_urllib_parse.urlencode({ -- 'lang': 'en', -- 'tlang': sub_lang, -- 'fmt': sub_format, -- 'ts': timestamp, -- 'kind': 'asr', -- }) -- subtitles_url = caption_url + '&' + params -- sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') - return [(None, sub_lang, sub)] - except KeyError: - return [(err_msg, None, None)] - - def _extract_subtitle(self, video_id): - return {sub_lang: sub} - # An extractor error can be raise by the download process if there are - # no automatic captions but there are subtitles - except (KeyError, ExtractorError): - self._downloader.report_warning(err_msg) - return {} - - def _extract_subtitles(self, video_id): -- """ - Return a list with a tuple: - [(error_message, sub_lang, sub)] - Return a dictionary: {language: subtitles} or {} if the subtitles - couldn't be found -- """ -- sub_lang_list = self._get_available_subtitles(video_id) -- sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' - if not sub_lang_list: #There was some error, it didn't get the available subtitles - return {} - if self._downloader.params.get('allsubtitles', False): - pass -- else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] - - subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - return [subtitle] - - def _extract_all_subtitles(self, video_id): - sub_lang_list = self._get_available_subtitles(video_id) - sub_format = self._downloader.params.get('subtitlesformat') - if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles - return [(sub_lang_list[0], None, None)] - subtitles = [] - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' - else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) - return {} - sub_lang_list = {sub_lang: sub_lang_list[sub_lang]} - subtitles = {} -- for sub_lang in sub_lang_list: -- subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) - subtitles.append(subtitle) - if subtitle: - subtitles[sub_lang] = subtitle -- return subtitles -- def _print_formats(self, formats): print('Available formats:') for x in formats: