youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit feccc3ff37975a67ea90cefdf028632794f2e6ff
parent 265bfa2c79abc8f233132126be313ed2d4b18dc8
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 26 Feb 2015 01:34:01 +0100

Merge remote-tracking branch 'aajanki/wdr_live'

Diffstat:
Myoutube_dl/downloader/f4m.py | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Myoutube_dl/extractor/common.py | 76+++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Myoutube_dl/extractor/wdr.py | 30++++++++++++++++++++++++++++++
3 files changed, 155 insertions(+), 51 deletions(-)

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py @@ -11,6 +11,7 @@ from .common import FileDownloader from .http import HttpFD from ..compat import ( compat_urlparse, + compat_urllib_error, ) from ..utils import ( struct_pack, @@ -121,7 +122,8 @@ class FlvReader(io.BytesIO): self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved - self.read(1) + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 # time scale self.read_unsigned_int() # CurrentMediaTime @@ -160,6 +162,7 @@ class FlvReader(io.BytesIO): return { 'segments': segments, 'fragments': fragments, + 'live': live, } def read_bootstrap_info(self): @@ -182,6 +185,10 @@ def build_fragments_list(boot_info): for segment, fragments_count in segment_run_table['segment_run']: for _ in range(fragments_count): res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + return res @@ -246,6 +253,38 @@ class F4mFD(FileDownloader): self.report_error('Unsupported DRM') return media + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + if node.text is None: + bootstrap_url = compat_urlparse.urljoin( + base_url, node.attrib['url']) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = base64.b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return (boot_info, bootstrap_url) + def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') @@ -265,18 +304,13 @@ class F4mFD(FileDownloader): base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - if bootstrap_node.text is None: - bootstrap_url = compat_urlparse.urljoin( - base_url, bootstrap_node.attrib['url']) - bootstrap = self.ydl.urlopen(bootstrap_url).read() - else: - bootstrap = base64.b64decode(bootstrap_node.text) + boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url) + live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: metadata = base64.b64decode(metadata_node.text) else: metadata = None - boot_info = read_bootstrap_info(bootstrap) fragments_list = build_fragments_list(boot_info) if self.params.get('test', False): @@ -301,7 +335,8 @@ class F4mFD(FileDownloader): (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') write_flv_header(dest_stream) - write_metadata_tag(dest_stream, metadata) + if not live: + write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook @@ -348,24 +383,45 @@ class F4mFD(FileDownloader): http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] - for (seg_i, frag_i) in fragments_list: + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name if akamai_pv: url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) - success = http_dl.download(frag_filename, {'url': url}) - if not success: - return False - with open(frag_filename, 'rb') as down: - down_data = down.read() - reader = FlvReader(down_data) - while True: - _, box_type, box_data = reader.read_box_info() - if box_type == b'mdat': - dest_stream.write(box_data) - break - frags_filenames.append(frag_filename) + try: + success = http_dl.download(frag_filename, {'url': url}) + if not success: + return False + with open(frag_filename, 'rb') as down: + down_data = down.read() + reader = FlvReader(down_data) + while True: + _, box_type, box_data = reader.read_box_info() + if box_type == b'mdat': + dest_stream.write(box_data) + break + if live: + os.remove(frag_filename) + else: + frags_filenames.append(frag_filename) + except (compat_urllib_error.HTTPError, ) as err: + if live and (err.code == 404 or err.code == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) dest_stream.close() diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -921,39 +921,57 @@ class InfoExtractor(object): formats = [] rtmp_count = 0 - for video in smil.findall('./body/switch/video'): - src = video.get('src') - if not src: - continue - bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - if not proto: - if base: - if base.startswith('rtmp'): - proto = 'rtmp' - elif base.startswith('http'): - proto = 'http' - ext = video.get('ext') - if proto == 'm3u8': - formats.extend(self._extract_m3u8_formats(src, video_id, ext)) - elif proto == 'rtmp': - rtmp_count += 1 - streamer = video.get('streamer') or base - formats.append({ - 'url': streamer, - 'play_path': src, - 'ext': 'flv', - 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), - 'tbr': bitrate, - 'width': width, - 'height': height, - }) + if smil.findall('./body/seq/video'): + video = smil.findall('./body/seq/video')[0] + fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count) + formats.extend(fmts) + else: + for video in smil.findall('./body/switch/video'): + fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count) + formats.extend(fmts) + self._sort_formats(formats) return formats + def _parse_smil_video(self, video, base, rtmp_count): + src = video.get('src') + if not src: + return ([], rtmp_count) + bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + proto = video.get('proto') + if not proto: + if base: + if base.startswith('rtmp'): + proto = 'rtmp' + elif base.startswith('http'): + proto = 'http' + ext = video.get('ext') + if proto == 'm3u8': + return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count) + elif proto == 'rtmp': + rtmp_count += 1 + streamer = video.get('streamer') or base + return ([{ + 'url': streamer, + 'play_path': src, + 'ext': 'flv', + 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), + 'tbr': bitrate, + 'width': width, + 'height': height, + }], rtmp_count) + elif proto.startswith('http'): + return ([{ + 'url': base + src, + 'ext': ext or 'flv', + 'tbr': bitrate, + 'width': width, + 'height': height, + }], rtmp_count) + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py @@ -28,6 +28,7 @@ class WDRIE(InfoExtractor): 'title': 'Servicezeit', 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', 'upload_date': '20140310', + 'is_live': False }, 'params': { 'skip_download': True, @@ -41,6 +42,7 @@ class WDRIE(InfoExtractor): 'title': 'Marga Spiegel ist tot', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20140311', + 'is_live': False }, 'params': { 'skip_download': True, @@ -55,6 +57,7 @@ class WDRIE(InfoExtractor): 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20091129', + 'is_live': False }, }, { @@ -66,6 +69,7 @@ class WDRIE(InfoExtractor): 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'upload_date': '20140717', + 'is_live': False }, }, { @@ -74,6 +78,20 @@ class WDRIE(InfoExtractor): 'info_dict': { 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', } + }, + { + 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', + 'info_dict': { + 'id': 'mdb-103364', + 'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', + 'ext': 'flv', + 'upload_date': '20150212', + 'is_live': True + }, + 'params': { + 'skip_download': True, + }, } ] @@ -119,6 +137,10 @@ class WDRIE(InfoExtractor): video_url = flashvars['dslSrc'][0] title = flashvars['trackerClipTitle'][0] thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None + is_live = flashvars.get('isLive', ['0'])[0] == '1' + + if is_live: + title = self._live_title(title) if 'trackerClipAirTime' in flashvars: upload_date = flashvars['trackerClipAirTime'][0] @@ -131,6 +153,13 @@ class WDRIE(InfoExtractor): if video_url.endswith('.f4m'): video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' ext = 'flv' + elif video_url.endswith('.smil'): + fmt = self._extract_smil_formats(video_url, page_id)[0] + video_url = fmt['url'] + sep = '&' if '?' in video_url else '?' + video_url += sep + video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43' + ext = fmt['ext'] else: ext = determine_ext(video_url) @@ -144,6 +173,7 @@ class WDRIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, + 'is_live': is_live }