Merge remote-tracking branch 'aajanki/wdr_live' - youtube-dl - Another place where youtube-dl lives on

commit feccc3ff37975a67ea90cefdf028632794f2e6ff
parent 265bfa2c79abc8f233132126be313ed2d4b18dc8
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 26 Feb 2015 01:34:01 +0100

Merge remote-tracking branch 'aajanki/wdr_live'

Diffstat:
M youtube_dl/downloader/f4m.py  | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
M youtube_dl/extractor/common.py  | 76 +++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
M youtube_dl/extractor/wdr.py  | 30 ++++++++++++++++++++++++++++++

3 files changed, 155 insertions(+), 51 deletions(-)
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
@@ -11,6 +11,7 @@ from .common import FileDownloader
 from .http import HttpFD
 from ..compat import (
     compat_urlparse,
+    compat_urllib_error,
 )
 from ..utils import (
     struct_pack,
@@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
 
         self.read_unsigned_int()  # BootstrapinfoVersion
         # Profile,Live,Update,Reserved
-        self.read(1)
+        flags = self.read_unsigned_char()
+        live = flags & 0x20 != 0
         # time scale
         self.read_unsigned_int()
         # CurrentMediaTime
@@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
         return {
             'segments': segments,
             'fragments': fragments,
+            'live': live,
         }
 
     def read_bootstrap_info(self):
@@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
     for segment, fragments_count in segment_run_table['segment_run']:
         for _ in range(fragments_count):
             res.append((segment, next(fragments_counter)))
+
+    if boot_info['live']:
+        res = res[-2:]
+
     return res
 
 
@@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
             self.report_error('Unsupported DRM')
         return media
 
+    def _get_bootstrap_from_url(self, bootstrap_url):
+        bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        return read_bootstrap_info(bootstrap)
+
+    def _update_live_fragments(self, bootstrap_url, latest_fragment):
+        fragments_list = []
+        retries = 30
+        while (not fragments_list) and (retries > 0):
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+            fragments_list = build_fragments_list(boot_info)
+            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+            if not fragments_list:
+                # Retry after a while
+                time.sleep(5.0)
+                retries -= 1
+
+        if not fragments_list:
+            self.report_error('Failed to update fragments')
+
+        return fragments_list
+
+    def _parse_bootstrap_node(self, node, base_url):
+        if node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, node.attrib['url'])
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+        else:
+            bootstrap_url = None
+            bootstrap = base64.b64decode(node.text)
+            boot_info = read_bootstrap_info(bootstrap)
+        return (boot_info, bootstrap_url)
+
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         requested_bitrate = info_dict.get('tbr')
@@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
 
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
-            bootstrap_url = compat_urlparse.urljoin(
-                base_url, bootstrap_node.attrib['url'])
-            bootstrap = self.ydl.urlopen(bootstrap_url).read()
-        else:
-            bootstrap = base64.b64decode(bootstrap_node.text)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+        live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
             metadata = base64.b64decode(metadata_node.text)
         else:
             metadata = None
-        boot_info = read_bootstrap_info(bootstrap)
 
         fragments_list = build_fragments_list(boot_info)
         if self.params.get('test', False):
@@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
 
         write_flv_header(dest_stream)
-        write_metadata_tag(dest_stream, metadata)
+        if not live:
+            write_metadata_tag(dest_stream, metadata)
 
         # This dict stores the download progress, it's updated by the progress
         # hook
@@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
         http_dl.add_progress_hook(frag_progress_hook)
 
         frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
+            seg_i, frag_i = fragments_list.pop(0)
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
             if akamai_pv:
                 url += '?' + akamai_pv.strip(';')
             frag_filename = '%s-%s' % (tmpfilename, name)
-            success = http_dl.download(frag_filename, {'url': url})
-            if not success:
-                return False
-            with open(frag_filename, 'rb') as down:
-                down_data = down.read()
-                reader = FlvReader(down_data)
-                while True:
-                    _, box_type, box_data = reader.read_box_info()
-                    if box_type == b'mdat':
-                        dest_stream.write(box_data)
-                        break
-            frags_filenames.append(frag_filename)
+            try:
+                success = http_dl.download(frag_filename, {'url': url})
+                if not success:
+                    return False
+                with open(frag_filename, 'rb') as down:
+                    down_data = down.read()
+                    reader = FlvReader(down_data)
+                    while True:
+                        _, box_type, box_data = reader.read_box_info()
+                        if box_type == b'mdat':
+                            dest_stream.write(box_data)
+                            break
+                if live:
+                    os.remove(frag_filename)
+                else:
+                    frags_filenames.append(frag_filename)
+            except (compat_urllib_error.HTTPError, ) as err:
+                if live and (err.code == 404 or err.code == 410):
+                    # We didn't keep up with the live window. Continue
+                    # with the next available fragment.
+                    msg = 'Fragment %d unavailable' % frag_i
+                    self.report_warning(msg)
+                    fragments_list = []
+                else:
+                    raise
+
+            if not fragments_list and live and bootstrap_url:
+                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+                total_frags += len(fragments_list)
+                if fragments_list and (fragments_list[0][1] > frag_i + 1):
+                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+                    self.report_warning(msg)
 
         dest_stream.close()
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
@@ -921,39 +921,57 @@ class InfoExtractor(object):
 
         formats = []
         rtmp_count = 0
-        for video in smil.findall('./body/switch/video'):
-            src = video.get('src')
-            if not src:
-                continue
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
-            width = int_or_none(video.get('width'))
-            height = int_or_none(video.get('height'))
-            proto = video.get('proto')
-            if not proto:
-                if base:
-                    if base.startswith('rtmp'):
-                        proto = 'rtmp'
-                    elif base.startswith('http'):
-                        proto = 'http'
-            ext = video.get('ext')
-            if proto == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
-            elif proto == 'rtmp':
-                rtmp_count += 1
-                streamer = video.get('streamer') or base
-                formats.append({
-                    'url': streamer,
-                    'play_path': src,
-                    'ext': 'flv',
-                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
-                    'tbr': bitrate,
-                    'width': width,
-                    'height': height,
-                })
+        if smil.findall('./body/seq/video'):
+            video = smil.findall('./body/seq/video')[0]
+            fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
+            formats.extend(fmts)
+        else:
+            for video in smil.findall('./body/switch/video'):
+                fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
+                formats.extend(fmts)
+
         self._sort_formats(formats)
 
         return formats
 
+    def _parse_smil_video(self, video, base, rtmp_count):
+        src = video.get('src')
+        if not src:
+            return ([], rtmp_count)
+        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+        width = int_or_none(video.get('width'))
+        height = int_or_none(video.get('height'))
+        proto = video.get('proto')
+        if not proto:
+            if base:
+                if base.startswith('rtmp'):
+                    proto = 'rtmp'
+                elif base.startswith('http'):
+                    proto = 'http'
+        ext = video.get('ext')
+        if proto == 'm3u8':
+            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+        elif proto == 'rtmp':
+            rtmp_count += 1
+            streamer = video.get('streamer') or base
+            return ([{
+                'url': streamer,
+                'play_path': src,
+                'ext': 'flv',
+                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
+        elif proto.startswith('http'):
+            return ([{
+                'url': base + src,
+                'ext': ext or 'flv',
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
@@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Servicezeit',
                 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                 'upload_date': '20140310',
+                'is_live': False
             },
             'params': {
                 'skip_download': True,
@@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Marga Spiegel ist tot',
                 'description': 'md5:2309992a6716c347891c045be50992e4',
                 'upload_date': '20140311',
+                'is_live': False
             },
             'params': {
                 'skip_download': True,
@@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                 'description': 'md5:2309992a6716c347891c045be50992e4',
                 'upload_date': '20091129',
+                'is_live': False
             },
         },
         {
@@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Flavia Coelho: Amar é Amar',
                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                 'upload_date': '20140717',
+                'is_live': False
             },
         },
         {
@@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
             'info_dict': {
                 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
             }
+        },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+            'info_dict': {
+                'id': 'mdb-103364',
+                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'ext': 'flv',
+                'upload_date': '20150212',
+                'is_live': True
+            },
+            'params': {
+                'skip_download': True,
+            },
         }
     ]
 
@@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
         video_url = flashvars['dslSrc'][0]
         title = flashvars['trackerClipTitle'][0]
         thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
+        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+
+        if is_live:
+            title = self._live_title(title)
 
         if 'trackerClipAirTime' in flashvars:
             upload_date = flashvars['trackerClipAirTime'][0]
@@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
         if video_url.endswith('.f4m'):
             video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
             ext = 'flv'
+        elif video_url.endswith('.smil'):
+            fmt = self._extract_smil_formats(video_url, page_id)[0]
+            video_url = fmt['url']
+            sep = '&' if '?' in video_url else '?'
+            video_url += sep
+            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
+            ext = fmt['ext']
         else:
             ext = determine_ext(video_url)
 
@@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'upload_date': upload_date,
+            'is_live': is_live
         }

	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE

M	youtube_dl/downloader/f4m.py	\|	100	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
M	youtube_dl/extractor/common.py	\|	76	+++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
M	youtube_dl/extractor/wdr.py	\|	30	++++++++++++++++++++++++++++++