youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 1afe753462f0293122dc7a9b534b4f5cdb1e5c4e
parent 524c2c716a55d16b3cf4f2ed344e3a063bb63ce8
Author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date:   Sun,  9 Feb 2014 14:22:56 +0100

[slideshare] Fix description extraction and modernize

The ‘og:description’  property doesn’t contain the full description

Diffstat:
Myoutube_dl/extractor/slideshare.py | 23++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -12,11 +14,12 @@ class SlideshareIE(InfoExtractor): _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' _TEST = { - u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', - u'file': u'25665706.mp4', - u'info_dict': { - u'title': u'Managing Scale and Complexity', - u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix', + 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', + 'info_dict': { + 'id': '25665706', + 'ext': 'mp4', + 'title': 'Managing Scale and Complexity', + 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.', }, } @@ -26,15 +29,17 @@ class SlideshareIE(InfoExtractor): webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( r'var slideshare_object = ({.*?}); var user_info =', - webpage, u'slideshare object') + webpage, 'slideshare object') info = json.loads(slideshare_obj) - if info['slideshow']['type'] != u'video': - raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) + if info['slideshow']['type'] != 'video': + raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) doc = info['doc'] bucket = info['jsplayer']['video_bucket'] ext = info['jsplayer']['video_extension'] video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) + description = self._html_search_regex( + r'<p class="description.*?"[^>]*>(.*?)</p>', webpage, 'description') return { '_type': 'video', @@ -43,5 +48,5 @@ class SlideshareIE(InfoExtractor): 'ext': ext, 'url': video_url, 'thumbnail': info['slideshow']['pin_image_url'], - 'description': self._og_search_description(webpage), + 'description': description, }