youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 45c85d7ba1dbca09c7ded9130fa5670b302e099b
parent df8f53f752c0f01577dcc5d63c6d9a81d924770b
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon, 22 Sep 2014 12:53:41 +0200

Merge remote-tracking branch 'origin/master'

Diffstat:
Myoutube_dl/downloader/f4m.py | 5+++++
Myoutube_dl/extractor/__init__.py | 5++++-
Myoutube_dl/extractor/nbc.py | 4++--
Myoutube_dl/extractor/npo.py | 30++++++++++++++++++++++++++++++
Myoutube_dl/extractor/sbs.py | 2+-
Myoutube_dl/extractor/theplatform.py | 53+++++++++++++++++++++++++++++++----------------------
6 files changed, 73 insertions(+), 26 deletions(-)

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py @@ -16,6 +16,7 @@ from ..utils import ( format_bytes, encodeFilename, sanitize_open, + xpath_text, ) @@ -251,6 +252,8 @@ class F4mFD(FileDownloader): # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') @@ -290,6 +293,8 @@ class F4mFD(FileDownloader): for (seg_i, frag_i) in fragments_list: name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name + if akamai_pv: + url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) success = http_dl.download(frag_filename, {'url': url}) if not success: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE -from .npo import NPOIE +from .npo import ( + NPOIE, + TegenlichtVproIE, +) from .nrk import ( NRKIE, NRKTVIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor): _TEST = { 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', - 'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', + # md5 checksum is not stable 'info_dict': { - 'id': 'u1RInQZRN7QJ', + 'id': 'bTmnLCvIbaaH', 'ext': 'flv', 'title': 'I Am a Firefighter', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py @@ -7,6 +7,7 @@ from ..utils import ( unified_strdate, parse_duration, qualities, + url_basename, ) @@ -55,7 +56,9 @@ class NPOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + return self._get_info(video_id) + def _get_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, video_id, @@ -106,3 +109,30 @@ class NPOIE(InfoExtractor): 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, } + + +class TegenlichtVproIE(NPOIE): + IE_NAME = 'tegenlicht.vpro.nl' + _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' + + _TESTS = [ + { + 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht', + 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'upload_date': '20130225', + }, + }, + ] + + def _real_extract(self, url): + name = url_basename(url) + webpage = self._download_webpage(url, name) + urn = self._html_search_meta('mediaurn', webpage) + info_page = self._download_json( + 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) + return self._get_info(info_page['mid']) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py @@ -21,7 +21,7 @@ class SBSIE(InfoExtractor): 'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'info_dict': { 'id': '320403011771', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dingo Conservation', 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', 'thumbnail': 're:http://.*\.jpg', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py @@ -5,6 +5,7 @@ import json from .common import InfoExtractor from ..utils import ( + compat_str, ExtractorError, xpath_with_ns, ) @@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor): body = meta.find(_x('smil:body')) f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is not None: + if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: f4m_url += '?' # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = [{ - 'ext': 'flv', - 'url': f4m_url, - }] + formats = self._extract_f4m_formats(f4m_url, video_id) else: - base_url = head.find(_x('smil:meta')).attrib['base'] - switch = body.find(_x('smil:switch')) formats = [] - for f in switch.findall(_x('smil:video')): - attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 - format_id = '%dx%d_%dk' % (width, height, vbr) - formats.append({ - 'format_id': format_id, - 'url': base_url, - 'play_path': 'mp4:' + attr['src'], - 'ext': 'flv', - 'width': width, - 'height': height, - 'vbr': vbr, - }) + switch = body.find(_x('smil:switch')) + if switch is not None: + base_url = head.find(_x('smil:meta')).attrib['base'] + for f in switch.findall(_x('smil:video')): + attr = f.attrib + width = int(attr['width']) + height = int(attr['height']) + vbr = int(attr['system-bitrate']) // 1000 + format_id = '%dx%d_%dk' % (width, height, vbr) + formats.append({ + 'format_id': format_id, + 'url': base_url, + 'play_path': 'mp4:' + attr['src'], + 'ext': 'flv', + 'width': width, + 'height': height, + 'vbr': vbr, + }) + else: + switch = body.find(_x('smil:seq//smil:switch')) + for f in switch.findall(_x('smil:video')): + attr = f.attrib + vbr = int(attr['system-bitrate']) // 1000 + formats.append({ + 'format_id': compat_str(vbr), + 'url': attr['src'], + 'vbr': vbr, + }) self._sort_formats(formats) return {