Remove the calls to 'compat_urllib_request.urlopen' in a few extractors - youtube-dl

commit baa7b1978bc1d77858458c2b31aec3ff819a3e6a
parent ac5118bcb92a489eec0e9997e55e07945dc720db
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Sun,  8 Dec 2013 22:24:55 +0100

Remove the calls to 'compat_urllib_request.urlopen' in a few extractors

Diffstat:
M youtube_dl/extractor/bliptv.py  | 39 ++++++++++++++++++---------------------
M youtube_dl/extractor/metacafe.py  | 18 ++++--------------
M youtube_dl/extractor/mixcloud.py  | 11 +++++------
M youtube_dl/extractor/stanfordoc.py  | 18 +++---------------
M youtube_dl/extractor/youtube.py  | 42 ++++++++++++------------------------------

5 files changed, 42 insertions(+), 86 deletions(-)
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
@@ -51,8 +51,7 @@ class BlipTVIE(InfoExtractor):
             url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
         urlp = compat_urllib_parse_urlparse(url)
         if urlp.path.startswith('/play/'):
-            request = compat_urllib_request.Request(url)
-            response = compat_urllib_request.urlopen(request)
+            response = self._request_webpage(url, None, False)
             redirecturl = response.geturl()
             rurlp = compat_urllib_parse_urlparse(redirecturl)
             file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@@ -69,25 +68,23 @@ class BlipTVIE(InfoExtractor):
         request.add_header('User-Agent', 'iTunes/10.6.1')
         self.report_extraction(mobj.group(1))
         info = None
-        try:
-            urlh = compat_urllib_request.urlopen(request)
-            if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
-                basename = url.split('/')[-1]
-                title,ext = os.path.splitext(basename)
-                title = title.decode('UTF-8')
-                ext = ext.replace('.', '')
-                self.report_direct_download(title)
-                info = {
-                    'id': title,
-                    'url': url,
-                    'uploader': None,
-                    'upload_date': None,
-                    'title': title,
-                    'ext': ext,
-                    'urlhandle': urlh
-                }
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
+        urlh = self._request_webpage(request, None, False,
+            u'unable to download video info webpage')
+        if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
+            basename = url.split('/')[-1]
+            title,ext = os.path.splitext(basename)
+            title = title.decode('UTF-8')
+            ext = ext.replace('.', '')
+            self.report_direct_download(title)
+            info = {
+                'id': title,
+                'url': url,
+                'uploader': None,
+                'upload_date': None,
+                'title': title,
+                'ext': ext,
+                'urlhandle': urlh
+            }
         if info is None: # Regular URL
             try:
                 json_code_bytes = urlh.read()
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
@@ -1,11 +1,8 @@
 import re
-import socket
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
     compat_parse_qs,
-    compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_request,
     compat_str,
@@ -93,12 +90,8 @@ class MetacafeIE(InfoExtractor):
 
     def _real_initialize(self):
         # Retrieve disclaimer
-        request = compat_urllib_request.Request(self._DISCLAIMER)
-        try:
-            self.report_disclaimer()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
+        self.report_disclaimer()
+        self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
 
         # Confirm age
         disclaimer_form = {
@@ -107,11 +100,8 @@ class MetacafeIE(InfoExtractor):
             }
         request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        try:
-            self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        self.report_age_confirmation()
+        self._download_webpage(request, None, False, u'Unable to confirm age')
 
     def _real_extract(self, url):
         # Extract id and simplified title from URL
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
@@ -1,13 +1,10 @@
 import json
 import re
-import socket
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
-    compat_urllib_error,
-    compat_urllib_request,
     unified_strdate,
+    ExtractorError,
 )
 
 
@@ -31,9 +28,11 @@ class MixcloudIE(InfoExtractor):
         """Returns 1st active url from list"""
         for url in url_list:
             try:
-                compat_urllib_request.urlopen(url)
+                # We only want to know if the request succeed
+                # don't download the whole file
+                self._request_webpage(url, None, False)
                 return url
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
+            except ExtractorError:
                 url = None
 
         return None
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py
@@ -1,13 +1,8 @@
 import re
-import socket
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
-    compat_http_client,
     compat_str,
-    compat_urllib_error,
-    compat_urllib_request,
 
     ExtractorError,
     orderedSet,
@@ -45,11 +40,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
             self.report_extraction(info['id'])
             baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
             xmlUrl = baseUrl + video + '.xml'
-            try:
-                metaXml = compat_urllib_request.urlopen(xmlUrl).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
-            mdoc = xml.etree.ElementTree.fromstring(metaXml)
+            mdoc = self._download_xml(xmlUrl, info['id'])
             try:
                 info['title'] = mdoc.findall('./title')[0].text
                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@@ -95,12 +86,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
                 'upload_date': None,
             }
 
-            self.report_download_webpage(info['id'])
             rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
-            try:
-                rootpage = compat_urllib_request.urlopen(rootURL).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
+            rootpage = self._download_webpage(rootURL, info['id'],
+                errnote=u'Unable to download course info page')
 
             info['title'] = info['id']
 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
@@ -7,7 +7,6 @@ import itertools
 import json
 import os.path
 import re
-import socket
 import string
 import struct
 import traceback
@@ -17,9 +16,7 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
     compat_chr,
-    compat_http_client,
     compat_parse_qs,
-    compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_request,
     compat_urlparse,
@@ -53,9 +50,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         request = compat_urllib_request.Request(self._LANG_URL)
         try:
             self.report_lang()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
+            self._download_webpage(self._LANG_URL, None, False)
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
             return False
         return True
 
@@ -67,12 +64,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
             return False
 
-        request = compat_urllib_request.Request(self._LOGIN_URL)
-        try:
-            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
-            return False
+        login_page = self._download_webpage(self._LOGIN_URL, None, False,
+            u'Unable to fetch login page')
 
         galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
                                   login_page, u'Login GALX parameter')
@@ -105,12 +98,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
         try:
             self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            login_results = self._download_webpage(request, None, False)
             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
                 self._downloader.report_warning(u'unable to log in: bad username or password')
                 return False
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+        except ExtractorError as err:
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
             return False
         return True
 
@@ -120,11 +113,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                 'action_confirm':   'Confirm',
                 }
         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
-        try:
-            self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+        self.report_age_confirmation()
+        self._download_webpage(request, None, False, u'Unable to confirm age')
         return True
 
     def _real_initialize(self):
@@ -1737,10 +1727,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
     IE_NAME = u'youtube:search'
     _SEARCH_KEY = 'ytsearch'
 
-    def report_download_page(self, query, pagenum):
-        """Report attempt to download search page with given number."""
-        self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
-
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""
 
@@ -1749,13 +1735,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
         limit = n
 
         while (50 * pagenum) < limit:
-            self.report_download_page(query, pagenum+1)
             result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
-            request = compat_urllib_request.Request(result_url)
-            try:
-                data = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
+            data = self._download_webpage(result_url, u'query "%s"' % query,
+                u'Downloading page %s' % pagenum, u'Unable to download API page')
             api_response = json.loads(data)['data']
 
             if not 'items' in api_response:

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/extractor/bliptv.py	\|	39	++++++++++++++++++---------------------
M	youtube_dl/extractor/metacafe.py	\|	18	++++--------------
M	youtube_dl/extractor/mixcloud.py	\|	11	+++++------
M	youtube_dl/extractor/stanfordoc.py	\|	18	+++---------------
M	youtube_dl/extractor/youtube.py	\|	42	++++++++++++------------------------------