[pbs] extract chapters information
authorremitamine <remitamine@gmail.com>
Thu, 5 May 2016 20:42:37 +0000 (21:42 +0100)
committerRemita Amine <remitamine@gmail.com>
Tue, 2 May 2017 19:41:48 +0000 (20:41 +0100)
youtube_dl/extractor/pbs.py

index 6166dc2adf41a431c79244e01256ab94f0dcb704..0727e381b52e3c2b4e520d38c88762b285604c3b 100644 (file)
@@ -8,6 +8,7 @@ from ..utils import (
     ExtractorError,
     determine_ext,
     int_or_none,
+    float_or_none,
     js_to_json,
     strip_jsonp,
     strip_or_none,
@@ -464,6 +465,7 @@ class PBSIE(InfoExtractor):
                     redirects.append(redirect)
                     redirect_urls.add(redirect_url)
 
+        chapters = []
         # Player pages may also serve different qualities
         for page in ('widget/partnerplayer', 'portalplayer'):
             player = self._download_webpage(
@@ -479,6 +481,20 @@ class PBSIE(InfoExtractor):
                     extract_redirect_urls(video_info)
                     if not info:
                         info = video_info
+                if not chapters:
+                    for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
+                        chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
+                        if not chapter:
+                            continue
+                        start_time = float_or_none(chapter.get('start_time'), 1000)
+                        duration = float_or_none(chapter.get('duration'), 1000)
+                        if start_time is None or duration is None:
+                            continue
+                        chapters.append({
+                            'start_time': start_time,
+                            'end_time': start_time + duration,
+                            'title': chapter.get('title'),
+                        })
 
         formats = []
         http_url = None
@@ -588,4 +604,5 @@ class PBSIE(InfoExtractor):
             'upload_date': upload_date,
             'formats': formats,
             'subtitles': subtitles,
+            'chapters': chapters,
         }