watchbox.py (5942B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 int_or_none, 10 js_to_json, 11 strip_or_none, 12 try_get, 13 unescapeHTML, 14 unified_timestamp, 15 ) 16 17 18 class WatchBoxIE(InfoExtractor): 19 _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)' 20 _TESTS = [{ 21 # film 22 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html', 23 'info_dict': { 24 'id': '341368', 25 'ext': 'mp4', 26 'title': 'Free Jimmy', 27 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6', 28 'thumbnail': r're:^https?://.*\.jpg$', 29 'duration': 4890, 30 'age_limit': 16, 31 'release_year': 2009, 32 }, 33 'params': { 34 'format': 'bestvideo', 35 'skip_download': True, 36 }, 37 'expected_warnings': ['Failed to download m3u8 information'], 38 }, { 39 # episode 40 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html', 41 'info_dict': { 42 'id': '328286', 43 'ext': 'mp4', 44 'title': 'S01 E01 - Date in der Hölle', 45 'description': 'md5:2f31c74a8186899f33cb5114491dae2b', 46 'thumbnail': r're:^https?://.*\.jpg$', 47 'duration': 1291, 48 'age_limit': 12, 49 'release_year': 2010, 50 'series': 'Ugly Americans', 51 'season_number': 1, 52 'episode': 'Date in der Hölle', 53 'episode_number': 1, 54 }, 55 'params': { 56 'format': 'bestvideo', 57 'skip_download': True, 58 }, 59 'expected_warnings': ['Failed to download m3u8 information'], 60 }, { 61 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270', 62 'only_matching': True, 63 }] 64 65 def _real_extract(self, url): 66 mobj = re.match(self._VALID_URL, url) 67 kind, video_id = mobj.group('kind', 'id') 68 69 webpage = self._download_webpage(url, video_id) 70 71 player_config = self._parse_json( 72 self._search_regex( 73 r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage, 74 'player config', default='{}', group='data'), 75 video_id, transform_source=unescapeHTML, fatal=False) 76 77 if not player_config: 78 player_config = self._parse_json( 79 self._search_regex( 80 r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', 81 default='{}'), 82 video_id, transform_source=js_to_json, fatal=False) or {} 83 84 source = player_config.get('source') or {} 85 86 video_id = compat_str(source.get('videoId') or video_id) 87 88 devapi = self._download_json( 89 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={ 90 'format': 'json', 91 'apikey': 'hbbtv', 92 }, fatal=False) 93 94 item = try_get(devapi, lambda x: x['items'][0], dict) or {} 95 96 title = item.get('title') or try_get( 97 item, lambda x: x['movie']['headline_movie'], 98 compat_str) or source['title'] 99 100 formats = [] 101 hls_url = item.get('media_videourl_hls') or source.get('hls') 102 if hls_url: 103 formats.extend(self._extract_m3u8_formats( 104 hls_url, video_id, 'mp4', entry_protocol='m3u8_native', 105 m3u8_id='hls', fatal=False)) 106 dash_url = item.get('media_videourl_wv') or source.get('dash') 107 if dash_url: 108 formats.extend(self._extract_mpd_formats( 109 dash_url, video_id, mpd_id='dash', fatal=False)) 110 mp4_url = item.get('media_videourl') 111 if mp4_url: 112 formats.append({ 113 'url': mp4_url, 114 'format_id': 'mp4', 115 'width': int_or_none(item.get('width')), 116 'height': int_or_none(item.get('height')), 117 'tbr': int_or_none(item.get('bitrate')), 118 }) 119 self._sort_formats(formats) 120 121 description = strip_or_none(item.get('descr')) 122 thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail') 123 duration = int_or_none(item.get('media_length') or source.get('length')) 124 timestamp = unified_timestamp(item.get('pubDate')) 125 view_count = int_or_none(item.get('media_views')) 126 age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk'])) 127 release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year'])) 128 129 info = { 130 'id': video_id, 131 'title': title, 132 'description': description, 133 'thumbnail': thumbnail, 134 'duration': duration, 135 'timestamp': timestamp, 136 'view_count': view_count, 137 'age_limit': age_limit, 138 'release_year': release_year, 139 'formats': formats, 140 } 141 142 if kind.lower() == 'serien': 143 series = try_get( 144 item, lambda x: x['special']['title'], 145 compat_str) or source.get('format') 146 season_number = int_or_none(self._search_regex( 147 r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number', 148 default=None) or self._search_regex( 149 r'/staffel-(\d+)/', url, 'season number', default=None)) 150 episode = source.get('title') 151 episode_number = int_or_none(self._search_regex( 152 r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number', 153 default=None)) 154 info.update({ 155 'series': series, 156 'season_number': season_number, 157 'episode': episode, 158 'episode_number': episode_number, 159 }) 160 161 return info