youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit fa35cdad02e1c40094f01c9f8e6529da2f021563
parent d1b9c912a42de3b99ae73553d38fbfa50b8ebc52
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Mon, 21 Apr 2014 05:47:52 +0200

[condenast|generic] Add support for condenast embeds (Fixes #2783)

Diffstat:
Myoutube_dl/extractor/condenast.py | 30++++++++++++++++++------------
Myoutube_dl/extractor/generic.py | 26++++++++++++++++++++++++++
2 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py @@ -28,16 +28,18 @@ class CondeNastIE(InfoExtractor): 'glamour': 'Glamour', 'wmagazine': 'W Magazine', 'vanityfair': 'Vanity Fair', + 'cnevids': 'Condé Nast', } - _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) _TEST = { 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', - 'file': '5171b343c2b4c00dd0c1ccb3.mp4', 'md5': '1921f713ed48aabd715691f774c451f7', 'info_dict': { + 'id': '5171b343c2b4c00dd0c1ccb3', + 'ext': 'mp4', 'title': '3D Printed Speakers Lit With LED', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', } @@ -55,12 +57,16 @@ class CondeNastIE(InfoExtractor): entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] return self.playlist_result(entries, playlist_title=title) - def _extract_video(self, webpage): - description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>', - r'<div class="video-post-content">(.+?)</div>', - ], - webpage, 'description', - fatal=False, flags=re.DOTALL) + def _extract_video(self, webpage, url_type): + if url_type != 'embed': + description = self._html_search_regex( + [ + r'<div class="cne-video-description">(.+?)</div>', + r'<div class="video-post-content">(.+?)</div>', + ], + webpage, 'description', fatal=False, flags=re.DOTALL) + else: + description = None params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, 'player params', flags=re.DOTALL) video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') @@ -99,12 +105,12 @@ class CondeNastIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) site = mobj.group('site') url_type = mobj.group('type') - id = mobj.group('id') + item_id = mobj.group('id') - self.to_screen(u'Extracting from %s with the Condé Nast extractor' % self._SITES[site]) - webpage = self._download_webpage(url, id) + self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site]) + webpage = self._download_webpage(url, item_id) if url_type == 'series': return self._extract_series(url, webpage) else: - return self._extract_video(webpage) + return self._extract_video(webpage, url_type) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -239,6 +239,16 @@ class GenericIE(InfoExtractor): 'uploader_id': 'rbctv_2012_4', }, }, + # Condé Nast embed + { + 'url': 'http://www.wired.com/2014/04/honda-asimo/', + 'md5': 'ba0dfe966fa007657bd1443ee672db0f', + 'info_dict': { + 'id': '53501be369702d3275860000', + 'ext': 'mp4', + 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', + } + } ] def report_download_webpage(self, video_id): @@ -485,6 +495,22 @@ class GenericIE(InfoExtractor): if mobj: return self.url_result(mobj.group(1), 'BlipTV') + # Look for embedded condenast player + matches = re.findall( + r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")', + webpage) + if matches: + return { + '_type': 'playlist', + 'entries': [{ + '_type': 'url', + 'ie_key': 'CondeNast', + 'url': ma, + } for ma in matches], + 'title': video_title, + 'id': video_id, + } + # Look for Bandcamp pages with custom domain mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage) if mobj is not None: