[nbc] fix NBCNews/Today/MSNBC extraction
authorRemita Amine <remitamine@gmail.com>
Mon, 16 Nov 2020 15:18:37 +0000 (16:18 +0100)
committerRemita Amine <remitamine@gmail.com>
Mon, 16 Nov 2020 15:18:37 +0000 (16:18 +0100)
youtube_dl/extractor/nbc.py

index 6f3cb30034da7f5fcebb99fc6dec05f1ff3cd8e4..ea5f5a3157c3ac068a7236e9c6251c00b1017231 100644 (file)
@@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
     int_or_none,
-    js_to_json,
     parse_duration,
     smuggle_url,
     try_get,
@@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
         webpage = self._download_webpage(url, video_id)
 
         data = self._parse_json(self._search_regex(
-            r'window\.__data\s*=\s*({.+});', webpage,
-            'bootstrap json'), video_id, js_to_json)
+            r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
+            webpage, 'bootstrap json'), video_id)['props']['initialState']
         video_data = try_get(data, lambda x: x['video']['current'], dict)
         if not video_data:
             video_data = data['article']['content'][0]['primaryMedia']['video']