[mixcloud] Extract decrypt key
authorSergey M․ <dstftw@gmail.com>
Mon, 14 Aug 2017 15:39:05 +0000 (22:39 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 14 Aug 2017 15:39:05 +0000 (22:39 +0700)
youtube_dl/extractor/mixcloud.py

index 52f7428e0155ad413316b7b7b1bf4bdd0c9d04b6..fcf7beeb25af5e5b9394e448519a891ce46d1f2d 100644 (file)
@@ -9,6 +9,7 @@ from .common import InfoExtractor
 from ..compat import (
     compat_chr,
     compat_ord,
+    compat_str,
     compat_urllib_parse_unquote,
     compat_urlparse,
 )
@@ -53,15 +54,18 @@ class MixcloudIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    _keys = [
+        'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
+        'pleasedontdownloadourmusictheartistswontgetpaid',
+        'window.addEventListener = window.addEventListener || function() {};',
+        '(function() { return new Date().toLocaleDateString(); })()'
+    ]
+    _current_key = None
+
     # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
     def _decrypt_play_info(self, play_info, video_id):
-        KEYS = (
-            'pleasedontdownloadourmusictheartistswontgetpaid',
-            'window.addEventListener = window.addEventListener || function() {};',
-            '(function() { return new Date().toLocaleDateString(); })()',
-        )
         play_info = base64.b64decode(play_info.encode('ascii'))
-        for num, key in enumerate(KEYS, start=1):
+        for num, key in enumerate(self._keys, start=1):
             try:
                 return self._parse_json(
                     ''.join([
@@ -69,7 +73,7 @@ class MixcloudIE(InfoExtractor):
                         for idx, ch in enumerate(play_info)]),
                     video_id)
             except ExtractorError:
-                if num == len(KEYS):
+                if num == len(self._keys):
                     raise
 
     def _real_extract(self, url):
@@ -80,6 +84,20 @@ class MixcloudIE(InfoExtractor):
 
         webpage = self._download_webpage(url, track_id)
 
+        if not self._current_key:
+            js_url = self._search_regex(
+                r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
+                webpage, 'js url', default=None)
+            if js_url:
+                js = self._download_webpage(js_url, track_id, fatal=False)
+                if js:
+                    key = self._search_regex(
+                        r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1',
+                        js, 'key', default=None, group='key')
+                    if key and isinstance(key, compat_str):
+                        self._keys.insert(0, key)
+                        self._current_key = key
+
         message = self._html_search_regex(
             r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
             webpage, 'error message', default=None)