nzz.py (1409B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import ( 8 extract_attributes, 9 ) 10 11 12 class NZZIE(InfoExtractor): 13 _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)' 14 _TESTS = [{ 15 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', 16 'info_dict': { 17 'id': '9153', 18 }, 19 'playlist_mincount': 6, 20 }, { 21 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112', 22 'info_dict': { 23 'id': '1368112', 24 }, 25 'playlist_count': 1, 26 }] 27 28 def _real_extract(self, url): 29 page_id = self._match_id(url) 30 webpage = self._download_webpage(url, page_id) 31 32 entries = [] 33 for player_element in re.findall( 34 r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage): 35 player_params = extract_attributes(player_element) 36 if player_params.get('data-type') not in ('kaltura_singleArticle',): 37 self.report_warning('Unsupported player type') 38 continue 39 entry_id = player_params['data-id'] 40 entries.append(self.url_result( 41 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) 42 43 return self.playlist_result(entries, page_id)