[nhk] fix video extraction(closes #22249)(closes #22353)
authorRemita Amine <remitamine@gmail.com>
Tue, 24 Sep 2019 19:23:56 +0000 (20:23 +0100)
committerRemita Amine <remitamine@gmail.com>
Tue, 24 Sep 2019 19:24:17 +0000 (20:24 +0100)
youtube_dl/extractor/nhk.py
youtube_dl/extractor/piksel.py

index 241412f9849645674c5369e4a5d46a6ca0709358..cce4bb47283960878ab2200e036f61c976c4d85a 100644 (file)
@@ -60,8 +60,8 @@ class NhkVodIE(InfoExtractor):
         if is_video:
             info.update({
                 '_type': 'url_transparent',
-                'ie_key': 'Ooyala',
-                'url': 'ooyala:' + episode['vod_id'],
+                'ie_key': 'Piksel',
+                'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
             })
         else:
             audio = episode['audio']
index 401298cb877f59ec46e896ee0c087d21f2c93924..88b6859b01a7c51eebe9f129d759f68005c75ce6 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class PikselIE(InfoExtractor):
-    _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
     _TESTS = [
         {
             'url': 'http://player.piksel.com/v/ums2867l',
@@ -40,6 +40,11 @@ class PikselIE(InfoExtractor):
                 'timestamp': 1486171129,
                 'upload_date': '20170204'
             }
+        },
+        {
+            # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+            'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+            'only_matching': True,
         }
     ]
 
@@ -52,8 +57,11 @@ class PikselIE(InfoExtractor):
             return mobj.group('url')
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'data-de-program-uuid=[\'"]([a-z0-9]+)',
+            webpage, 'program uuid', default=display_id)
         app_token = self._search_regex([
             r'clientAPI\s*:\s*"([^"]+)"',
             r'data-de-api-key\s*=\s*"([^"]+)"'