youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit b74fa8cd2c9deb412ac277c6cc44847c3839b844
parent 94eae04c94a43847e8ce7c9bf3d88dd029ef62f6
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Fri,  7 Mar 2014 15:25:33 +0100

[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

Diffstat:
Mtest/test_utils.py | 5+++++
Myoutube_dl/extractor/facebook.py | 19+++++++++++--------
Myoutube_dl/utils.py | 4++++
3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -33,6 +33,7 @@ from youtube_dl.utils import ( unified_strdate, unsmuggle_url, url_basename, + urlencode_postdata, xpath_with_ns, ) @@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase): bam''') self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) + def test_urlencode_postdata(self): + data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) + self.assertTrue(isinstance(data, bytes)) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py @@ -11,6 +11,7 @@ from ..utils import ( compat_urllib_error, compat_urllib_parse, compat_urllib_request, + urlencode_postdata, ExtractorError, ) @@ -51,8 +52,8 @@ class FacebookIE(InfoExtractor): login_page_req = compat_urllib_request.Request(self._LOGIN_URL) login_page_req.add_header('Cookie', 'locale=en_US') - self.report_login() - login_page = self._download_webpage(login_page_req, None, note=False, + login_page = self._download_webpage(login_page_req, None, + note='Downloading login page', errnote='Unable to download login page') lsd = self._search_regex( r'<input type="hidden" name="lsd" value="([^"]*)"', @@ -70,23 +71,25 @@ class FacebookIE(InfoExtractor): 'timezone': '-60', 'trynum': '1', } - request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') try: - login_results = compat_urllib_request.urlopen(request).read() + login_results = self._download_webpage(request, None, + note='Logging in', errnote='unable to fetch login page') if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return check_form = { - 'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'), + 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), 'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'), 'name_action_selected': 'dont_save', - 'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'), + 'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'), } - check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form)) + check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - check_response = compat_urllib_request.urlopen(check_req).read() + check_response = self._download_webpage(check_req, None, + note='Confirming login') if re.search(r'id="checkpointSubmitButton"', check_response) is not None: self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1263,3 +1263,7 @@ def read_batch_urls(batch_fd): with contextlib.closing(batch_fd) as fd: return [url for url in map(fixup, fd) if url] + + +def urlencode_postdata(*args, **kargs): + return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')