[YoutubeDL] Support DASH manifest downloading
authorYen Chi Hsuan <yan12125@gmail.com>
Wed, 3 Jun 2015 15:10:18 +0000 (23:10 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Wed, 3 Jun 2015 15:10:18 +0000 (23:10 +0800)
youtube_dl/downloader/dash.py [new file with mode: 0644]
youtube_dl/downloader/http.py
youtube_dl/extractor/youtube.py

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
new file mode 100644 (file)
index 0000000..18eca2c
--- /dev/null
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+from .common import FileDownloader
+from ..compat import compat_urllib_request
+
+import re
+
+
+class DashSegmentsFD(FileDownloader):
+    """
+    Download segments in a DASH manifest
+    """
+    def real_download(self, filename, info_dict):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+        base_url = info_dict['url']
+        segment_urls = info_dict['segment_urls']
+
+        self.byte_counter = 0
+
+        def append_url_to_file(outf, target_url, target_name):
+            self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
+            req = compat_urllib_request.Request(target_url)
+            data = self.ydl.urlopen(req).read()
+            outf.write(data)
+            self.byte_counter += len(data)
+
+        def combine_url(base_url, target_url):
+            if re.match(r'^https?://', target_url):
+                return target_url
+            return '%s/%s' % (base_url, target_url)
+
+        with open(tmpfilename, 'wb') as outf:
+            append_url_to_file(
+                outf, combine_url(base_url, info_dict['initialization_url']),
+                'initialization segment')
+            for i, segment_url in enumerate(segment_urls):
+                append_url_to_file(
+                    outf, combine_url(base_url, segment_url),
+                    'segment %d / %d' % (i + 1, len(segment_urls)))
+
+        self.try_rename(tmpfilename, filename)
+
+        self._hook_progress({
+            'downloaded_bytes': self.byte_counter,
+            'total_bytes': self.byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
+        return True
index b7f144af9ea33a102246632e04e71707be3d98ad..ceacb8522b79ec4e4d3fbef83e242ed457be4a7c 100644 (file)
@@ -6,6 +6,7 @@ import socket
 import time
 
 from .common import FileDownloader
+from .dash import DashSegmentsFD
 from ..compat import (
     compat_urllib_request,
     compat_urllib_error,
@@ -19,6 +20,9 @@ from ..utils import (
 
 class HttpFD(FileDownloader):
     def real_download(self, filename, info_dict):
+        if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))):
+            return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict)
+
         url = info_dict['url']
         tmpfilename = self.temp_name(filename)
         stream = None
index aacb999ce9044ba07af7597d03b34e1aa37ebddd..5d1297e0d27260dd1e0f389d5add17061fad0644 100644 (file)
@@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     # TODO implement WebVTT downloading
                     pass
                 elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
                     format_id = r.attrib['id']
                     video_url = url_el.text
                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         'filesize': filesize,
                         'fps': int_or_none(r.attrib.get('frameRate')),
                     }
+                    if segment_list:
+                        f.update({
+                            'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
+                            'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')]
+                        })
                     try:
                         existing_format = next(
                             fo for fo in formats