hgtv.py (1435B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 6 7 class HGTVComShowIE(InfoExtractor): 8 IE_NAME = 'hgtv.com:show' 9 _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)' 10 _TESTS = [{ 11 # data-module="video" 12 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos', 13 'info_dict': { 14 'id': 'flip-or-flop-full-episodes-season-4-videos', 15 'title': 'Flip or Flop Full Episodes', 16 }, 17 'playlist_mincount': 15, 18 }, { 19 # data-deferred-module="video" 20 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift', 21 'only_matching': True, 22 }] 23 24 def _real_extract(self, url): 25 display_id = self._match_id(url) 26 27 webpage = self._download_webpage(url, display_id) 28 29 config = self._parse_json( 30 self._search_regex( 31 r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script', 32 webpage, 'video config'), 33 display_id)['channels'][0] 34 35 entries = [ 36 self.url_result(video['releaseUrl']) 37 for video in config['videos'] if video.get('releaseUrl')] 38 39 return self.playlist_result( 40 entries, display_id, config.get('title'), config.get('description'))