youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit da634d0a8b409673ea095710c642bbd61ffec429
parent 86f2541695ba0280982e47e52c5cf26946d5d7c6
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 11 Jul 2015 04:43:55 +0600

Merge branch 'dufferzafar-webofstories'

Diffstat:
Myoutube_dl/extractor/__init__.py | 5++++-
Myoutube_dl/extractor/webofstories.py | 41+++++++++++++++++++++++++++++++++++++++++
2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -721,7 +721,10 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) -from .webofstories import WebOfStoriesIE +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import int_or_none @@ -98,3 +100,42 @@ class WebOfStoriesIE(InfoExtractor): 'description': description, 'duration': duration, } + + +class WebOfStoriesPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.webofstories.com/playAll/donald.knuth', + 'info_dict': { + 'id': 'donald.knuth', + 'title': 'Donald Knuth (Scientist)', + }, + 'playlist_mincount': 97, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories') + for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) + ] + + title = self._search_regex( + r'<div id="speakerName">\s*<span>([^<]+)</span>', + webpage, 'speaker', default=None) + if title: + field = self._search_regex( + r'<span id="primaryField">([^<]+)</span>', + webpage, 'field', default=None) + if field: + title += ' (%s)' % field + + if not title: + title = self._search_regex( + r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>', + webpage, 'title') + + return self.playlist_result(entries, playlist_id, title)