[novamov] Generalize extractor
authorSergey M. <dstftw@gmail.com>
Mon, 24 Feb 2014 16:30:09 +0000 (23:30 +0700)
committerSergey M. <dstftw@gmail.com>
Mon, 24 Feb 2014 16:30:09 +0000 (23:30 +0700)
youtube_dl/extractor/__init__.py
youtube_dl/extractor/novamov.py

index 6dccd5ae745a4aab7becb408220a93d3ef1abf20..8eff3df41827aa1894663715ff68df553f4c64f5 100644 (file)
@@ -161,7 +161,7 @@ from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .normalboots import NormalbootsIE
-from .novamov import NovamovIE
+from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
index 22a3824573b2dab2283de2a643ba9442d2172c22..fd310e219c1eff92eea30ae7800afb8122df78af 100644 (file)
@@ -9,14 +9,25 @@ from ..utils import (
 )
 
 
-class NovamovIE(InfoExtractor):
-    _VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})'
+class NovaMovIE(InfoExtractor):
+    IE_NAME = 'novamov'
+    IE_DESC = 'NovaMov'
+
+    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
+
+    _HOST = 'www.novamov.com'
+
+    _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
+    _FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
+    _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
+    _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
 
     _TEST = {
         'url': 'http://www.novamov.com/video/4rurhn9x446jj',
-        'file': '4rurhn9x446jj.flv',
         'md5': '7205f346a52bbeba427603ba10d4b935',
         'info_dict': {
+            'id': '4rurhn9x446jj',
+            'ext': 'flv',
             'title': 'search engine optimization',
             'description': 'search engine optimization is used to rank the web page in the google search engine'
         },
@@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('videoid')
 
-        page = self._download_webpage('http://www.novamov.com/video/%s' % video_id,
-                                      video_id, 'Downloading video page')
+        page = self._download_webpage(
+            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
 
-        if re.search(r'This file no longer exists on our servers!</h2>', page) is not None:
+        if re.search(self._FILE_DELETED_REGEX, page) is not None:
             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
 
-        filekey = self._search_regex(
-            r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey')
+        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
 
-        title = self._html_search_regex(
-            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>',
-            page, 'title', fatal=False)
+        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
 
-        description = self._html_search_regex(
-            r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>',
-            page, 'description', fatal=False)
+        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
 
         api_response = self._download_webpage(
-            'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id),
-            video_id, 'Downloading video api response')
+            'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
+            'Downloading video api response')
 
         response = compat_urlparse.parse_qs(api_response)
 
         if 'error_msg' in response:
-            raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True)
+            raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
 
         video_url = response['url'][0]
 
@@ -60,4 +66,4 @@ class NovamovIE(InfoExtractor):
             'url': video_url,
             'title': title,
             'description': description
-        }
+        }
\ No newline at end of file