[extractor/common] Improve _form_hidden_inputs and rename to _hidden_inputs
authorSergey M․ <dstftw@gmail.com>
Tue, 14 Jul 2015 16:36:30 +0000 (22:36 +0600)
committerSergey M․ <dstftw@gmail.com>
Tue, 14 Jul 2015 16:36:30 +0000 (22:36 +0600)
youtube_dl/extractor/common.py
youtube_dl/extractor/gorillavid.py
youtube_dl/extractor/hostingbulk.py
youtube_dl/extractor/played.py
youtube_dl/extractor/primesharetv.py
youtube_dl/extractor/promptfile.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/twitch.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vodlocker.py

index 82f5de2d898c4238839b8ada7c43aea6b166fd5f..315fe4a723e844c4ee550f1fd67e7ef57eb61ad5 100644 (file)
@@ -706,10 +706,17 @@ class InfoExtractor(object):
                                       'twitter card player')
 
     @staticmethod
-    def _form_hidden_inputs(html):
-        return dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            html))
+    def _hidden_inputs(html):
+        return dict([
+            (input.group('name'), input.group('value')) for input in re.finditer(
+                r'''(?x)
+                    <input\s+
+                        type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+                        name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+                        (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+                        value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+                ''', html)
+        ])
 
     def _sort_formats(self, formats, field_preference=None):
         if not formats:
index aabf07a20677c6e940d609565e44dfd330512cd7..f006f0cb105dc9d7b0c1f495dbcd4c840597858a 100644 (file)
@@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor):
         if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
 
         if fields['op'] == 'download1':
             countdown = int_or_none(self._search_regex(
index 63f579592cf4d9c78c00ef30176aceade4a50594..a3154cfdeccf9b4c18cf5a5b01f7944243fb1509 100644 (file)
@@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor):
             r'<img src="([^"]+)".+?class="pic"',
             webpage, 'thumbnail', fatal=False)
 
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
 
         request = compat_urllib_request.Request(url, urlencode_postdata(fields))
         request.add_header('Content-type', 'application/x-www-form-urlencoded')
index 9fe1524f2505b16b13b020fa89b791bd504b3ed4..8a1c296dda8b57611a0e464387be43ab0fc9a370 100644 (file)
@@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor):
         if m_error:
             raise ExtractorError(m_error.group('msg'), expected=True)
 
-        data = self._form_hidden_inputs(orig_webpage)
+        data = self._hidden_inputs(orig_webpage)
 
         self._sleep(2, video_id)
 
index 9aa0c862a4643873fddc0e560c35030c5b223aeb..304359dc5b189b8ce27c967c2d369b26db334532 100644 (file)
@@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor):
         if '>File not exist<' in webpage:
             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
 
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
 
         headers = {
             'Referer': url,
index 81a63c7fc40b21135b2b307ecb075967448a2d44..8190ed6766ce5c878fc82700524ec6d012d70a57 100644 (file)
@@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor):
             raise ExtractorError('Video %s does not exist' % video_id,
                                  expected=True)
 
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
         post = compat_urllib_parse.urlencode(fields)
         req = compat_urllib_request.Request(url, post)
         req.add_header('Content-type', 'application/x-www-form-urlencoded')
index 6e2b94e7d5b7f30076b910d12e7bf1b371062dc0..a07677686a4ecc2923b310c3aeeeaab610bb0868 100644 (file)
@@ -34,7 +34,7 @@ class SharedIE(InfoExtractor):
             raise ExtractorError(
                 'Video %s does not exist' % video_id, expected=True)
 
-        download_form = self._form_hidden_inputs(webpage)
+        download_form = self._hidden_inputs(webpage)
         request = compat_urllib_request.Request(
             url, compat_urllib_parse.urlencode(download_form))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
index af2b798fb41467ddb7c6db1a82f2a6d7e940ec30..92b6dc1b820eef8e6a7a0b42bd48bfc5d2422e38 100644 (file)
@@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor):
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
-        login_form = self._form_hidden_inputs(login_page)
+        login_form = self._hidden_inputs(login_page)
 
         login_form.update({
             'login': username.encode('utf-8'),
index d63c03183ce98c9c306677fbf92e3bd5921cb2ea..10d6745af703e00d6962d3e14c8b01f2419ad955 100644 (file)
@@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor):
         password = self._downloader.params.get('videopassword', None)
         if password is None:
             raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
-        fields = self._form_hidden_inputs(login_form)
+        fields = self._hidden_inputs(login_form)
         token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
         fields['token'] = token
         fields['password'] = password
index 8ac3aeac0ca5a4ebf41654a88953c5b975cfef4a..8f677cae3a503ef34230cf205c12378e3c2ecb66 100644 (file)
@@ -168,7 +168,7 @@ class VKIE(InfoExtractor):
         login_page = self._download_webpage(
             'https://vk.com', None, 'Downloading login page')
 
-        login_form = self._form_hidden_inputs(login_page)
+        login_form = self._hidden_inputs(login_page)
 
         login_form.update({
             'email': username.encode('cp1251'),
index 4804692bfda542c0e4175a67d230ad89698a3d33..ccf1928b5d323f277b4e8a47bd4d008e821b147c 100644 (file)
@@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        fields = self._form_hidden_inputs(webpage)
+        fields = self._hidden_inputs(webpage)
 
         if fields['op'] == 'download1':
             self._sleep(3, video_id)  # they do detect when requests happen too fast!