[beeg] Improve extraction
authorSergey M․ <dstftw@gmail.com>
Thu, 7 Apr 2016 16:40:35 +0000 (22:40 +0600)
committerSergey M․ <dstftw@gmail.com>
Thu, 7 Apr 2016 16:40:35 +0000 (22:40 +0600)
youtube_dl/extractor/beeg.py

index 9072949ddcad6ccdc67a272f556687b202c616cc..956c7680e2ecc46a1df493947ed0be7b973d81b8 100644 (file)
@@ -33,8 +33,33 @@ class BeegIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        webpage = self._download_webpage(url, video_id)
+
+        cpl_url = self._search_regex(
+            r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
+            webpage, 'cpl', default=None, group='url')
+
+        beeg_version, beeg_salt = [None] * 2
+
+        if cpl_url:
+            cpl = self._download_webpage(
+                self._proto_relative_url(cpl_url), video_id,
+                'Downloading cpl JS', fatal=False)
+            if cpl:
+                beeg_version = self._search_regex(
+                    r'beeg_version\s*=\s*(\d+)', cpl,
+                    'beeg version', default=None) or self._search_regex(
+                    r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
+                beeg_salt = self._search_regex(
+                    r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
+                    default=None, group='beeg_salt')
+
+        beeg_version = beeg_version or '1750'
+        beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
+
         video = self._download_json(
-            'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id)
+            'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
+            video_id)
 
         def split(o, e):
             def cut(s, x):
@@ -51,7 +76,7 @@ class BeegIE(InfoExtractor):
 
         def decrypt_key(key):
             # Reverse engineered from http://static.beeg.com/cpl/1738.js
-            a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B'
+            a = beeg_salt
             e = compat_urllib_parse_unquote(key)
             o = ''.join([
                 compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
@@ -101,5 +126,5 @@ class BeegIE(InfoExtractor):
             'duration': duration,
             'tags': tags,
             'formats': formats,
-            'age_limit': 18,
+            'age_limit': self._rta_search(webpage),
         }