youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit d93e4dcbb7fb68666528e251623b90d832d9cffc
parent 73e79f2a1be3179edd8eebf4b7b6d56fe953a4a8
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon,  8 Jul 2013 01:15:19 +0200

Merge branch 'master' of github.com:rg3/youtube-dl

Diffstat:
Myoutube_dl/extractor/common.py | 35+++++++++++++++++++++++++++++++++++
Myoutube_dl/extractor/vimeo.py | 23+++++++++++++++++++++++
Myoutube_dl/extractor/youtube.py | 7++++++-
3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -3,6 +3,7 @@ import os import re import socket import sys +import netrc from ..utils import ( compat_http_client, @@ -163,6 +164,10 @@ class InfoExtractor(object): """Report attempt to confirm age.""" self.to_screen(u'Confirming age') + def report_login(self): + """Report attempt to log in.""" + self.to_screen(u'Logging in') + #Methods for following #608 #They set the correct value of the '_type' key def video_result(self, video_info): @@ -227,6 +232,36 @@ class InfoExtractor(object): else: return res + def _get_login_info(self): + """ + Get the the login info as (username, password) + It will look in the netrc file using the _NETRC_MACHINE value + If there's no info available, return (None, None) + """ + if self._downloader is None: + return (None, None) + + username = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) is not None: + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError) as err: + self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err)) + + return (username, password) + class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py @@ -17,6 +17,7 @@ class VimeoIE(InfoExtractor): # _VALID_URL matches Vimeo URLs _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' + _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TEST = { u'url': u'http://vimeo.com/56015672', @@ -31,6 +32,25 @@ class VimeoIE(InfoExtractor): } } + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + login_url = 'https://vimeo.com/log_in' + webpage = self._download_webpage(login_url, None, False) + token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1) + data = compat_urllib_parse.urlencode({'email': username, + 'password': password, + 'action': 'login', + 'service': 'vimeo', + 'token': token, + }) + login_request = compat_urllib_request.Request(login_url, data) + login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + login_request.add_header('Cookie', 'xsrft=%s' % token) + self._download_webpage(login_request, None, False, u'Wrong login info') + def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: @@ -50,6 +70,9 @@ class VimeoIE(InfoExtractor): u'Verifying the password', u'Wrong password') + def _real_initialize(self): + self._login() + def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py @@ -473,7 +473,12 @@ class YoutubeIE(InfoExtractor): video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) # thumbnail image - if 'thumbnail_url' not in video_info: + # We try first to get a high quality image: + m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">', + video_webpage, re.DOTALL) + if m_thumb is not None: + video_thumbnail = m_thumb.group(1) + elif 'thumbnail_url' not in video_info: self._downloader.report_warning(u'unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it