[sprout] Add support for Universal Kids (closes #22518)
authorRemita Amine <remitamine@gmail.com>
Fri, 25 Dec 2020 20:17:44 +0000 (21:17 +0100)
committerRemita Amine <remitamine@gmail.com>
Fri, 25 Dec 2020 20:17:44 +0000 (21:17 +0100)
youtube_dl/extractor/sprout.py

index 8467bf49df5fd7ab669c61b2aa701fd55a7b318c..b1f8e05a2e9e807317c6c177795b5f21a09efac3 100644 (file)
@@ -3,50 +3,62 @@ from __future__ import unicode_literals
 
 from .adobepass import AdobePassIE
 from ..utils import (
-    extract_attributes,
-    update_url_query,
+    int_or_none,
     smuggle_url,
+    update_url_query,
 )
 
 
 class SproutIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
-        'md5': '74bf14128578d1e040c3ebc82088f45f',
+    _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
         'info_dict': {
-            'id': '9dexnwtmh8_X',
+            'id': 'bm0foJFaTKqb',
             'ext': 'mp4',
-            'title': 'A Cowboy Adventure',
-            'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
-            'timestamp': 1437758640,
-            'upload_date': '20150724',
-            'uploader': 'NBCU-SPROUT-NEW',
-        }
-    }
+            'title': 'Robot Bike Race',
+            'description': 'md5:436b1d97117cc437f54c383f4debc66d',
+            'timestamp': 1606148940,
+            'upload_date': '20201123',
+            'uploader': 'NBCU-MPAT',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.universalkids.com/watch/robot-bike-race',
+        'only_matching': True,
+    }]
+    _GEO_COUNTRIES = ['US']
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        video_component = self._search_regex(
-            r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
-            webpage, 'video component', default=None)
-        if video_component:
-            options = self._parse_json(extract_attributes(
-                video_component)['data-options'], video_id)
-            theplatform_url = options['video']
-            query = {
-                'mbr': 'true',
-                'manifest': 'm3u',
-            }
-            if options.get('protected'):
-                query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
-            theplatform_url = smuggle_url(update_url_query(
-                theplatform_url, query), {'force_smil_url': True})
-        else:
-            iframe = self._search_regex(
-                r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
-                webpage, 'iframe')
-            theplatform_url = extract_attributes(iframe)['src']
-
-        return self.url_result(theplatform_url, 'ThePlatform')
+        display_id = self._match_id(url)
+        mpx_metadata = self._download_json(
+            # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
+            'https://www.universalkids.com/_api/videos/' + display_id,
+            display_id)['mpxMetadata']
+        media_pid = mpx_metadata['mediaPid']
+        theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
+        query = {
+            'mbr': 'true',
+            'manifest': 'm3u',
+        }
+        if mpx_metadata.get('entitlement') == 'auth':
+            query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
+        theplatform_url = smuggle_url(
+            update_url_query(theplatform_url, query), {
+                'force_smil_url': True,
+                'geo_countries': self._GEO_COUNTRIES,
+            })
+        return {
+            '_type': 'url_transparent',
+            'id': 'id',
+            'url': theplatform_url,
+            'series': mpx_metadata.get('seriesName'),
+            'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
+            'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
+            'ie_key': 'ThePlatform',
+        }