youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 03f32a7eadf9d832aef55673edf38023a8daff95
parent 50ea2bb20d3a3e219910e87b8b30fc79ce534595
Author: Sergey M․ <dstftw@gmail.com>
Date:   Fri, 17 Jul 2015 00:14:38 +0600

[wnl] Add extractor for omroepwnl playlists

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Myoutube_dl/extractor/npo.py | 45+++++++++++++++++++++++++++++++++++++++------
2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -390,6 +390,7 @@ from .npo import ( NPORadioIE, NPORadioFragmentIE, TegenlichtVproIE, + WNLIE ) from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py @@ -46,12 +46,15 @@ class NPOIE(NPOBaseIE): IE_NAME = 'npo' IE_DESC = 'npo.nl and ntr.nl' _VALID_URL = r'''(?x) - https?:// - (?:www\.)? - (?: - npo\.nl/(?!live|radio)(?:[^/]+/){2}| - ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + (?: + npo:| + https?:// + (?:www\.)? + (?: + npo\.nl/(?!live|radio)(?:[^/]+/){2}| + ntr\.nl/(?:[^/]+/){2,}| + omroepwnl\.nl/video/fragment/[^/]+__ + ) ) (?P<id>[^/?#]+) ''' @@ -426,3 +429,33 @@ class TegenlichtVproIE(NPOIE): info_page = self._download_json( 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) return self._get_info(info_page['mid']) + + +class WNLIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+' + + _TEST = { + 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', + 'info_dict': { + 'id': 'vandaag-de-dag-6-mei', + 'title': 'Vandaag de Dag 6 mei', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id, 'NPO') + for video_id, part in re.findall( + r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) + ] + + playlist_title = self._html_search_regex( + r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>', + webpage, 'playlist title') + + return self.playlist_result(entries, playlist_id, playlist_title)