iprima.py (5246B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 import time 6 7 from .common import InfoExtractor 8 from ..utils import ( 9 determine_ext, 10 js_to_json, 11 ) 12 13 14 class IPrimaIE(InfoExtractor): 15 _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' 16 _GEO_BYPASS = False 17 18 _TESTS = [{ 19 'url': 'https://prima.iprima.cz/particka/92-epizoda', 20 'info_dict': { 21 'id': 'p51388', 22 'ext': 'mp4', 23 'title': 'Partička (92)', 24 'description': 'md5:859d53beae4609e6dd7796413f1b6cac', 25 }, 26 'params': { 27 'skip_download': True, # m3u8 download 28 }, 29 }, { 30 'url': 'https://cnn.iprima.cz/videa/70-epizoda', 31 'info_dict': { 32 'id': 'p681554', 33 'ext': 'mp4', 34 'title': 'HLAVNÍ ZPRÁVY 3.5.2020', 35 }, 36 'params': { 37 'skip_download': True, # m3u8 download 38 }, 39 }, { 40 'url': 'http://play.iprima.cz/particka/particka-92', 41 'only_matching': True, 42 }, { 43 # geo restricted 44 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', 45 'only_matching': True, 46 }, { 47 # iframe api.play-backend.iprima.cz 48 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2', 49 'only_matching': True, 50 }, { 51 # iframe prima.iprima.cz 52 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha', 53 'only_matching': True, 54 }, { 55 'url': 'http://www.iprima.cz/filmy/desne-rande', 56 'only_matching': True, 57 }, { 58 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby', 59 'only_matching': True, 60 }, { 61 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy', 62 'only_matching': True, 63 }, { 64 'url': 'https://cool.iprima.cz/derava-silnice-nevadi', 65 'only_matching': True, 66 }, { 67 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi', 68 'only_matching': True, 69 }, { 70 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1', 71 'only_matching': True, 72 }] 73 74 def _real_extract(self, url): 75 video_id = self._match_id(url) 76 77 self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1') 78 79 webpage = self._download_webpage(url, video_id) 80 81 title = self._og_search_title( 82 webpage, default=None) or self._search_regex( 83 r'<h1>([^<]+)', webpage, 'title') 84 85 video_id = self._search_regex( 86 (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', 87 r'data-product="([^"]+)">', 88 r'id=["\']player-(p\d+)"', 89 r'playerId\s*:\s*["\']player-(p\d+)', 90 r'\bvideos\s*=\s*["\'](p\d+)'), 91 webpage, 'real id') 92 93 playerpage = self._download_webpage( 94 'http://play.iprima.cz/prehravac/init', 95 video_id, note='Downloading player', query={ 96 '_infuse': 1, 97 '_ts': round(time.time()), 98 'productId': video_id, 99 }, headers={'Referer': url}) 100 101 formats = [] 102 103 def extract_formats(format_url, format_key=None, lang=None): 104 ext = determine_ext(format_url) 105 new_formats = [] 106 if format_key == 'hls' or ext == 'm3u8': 107 new_formats = self._extract_m3u8_formats( 108 format_url, video_id, 'mp4', entry_protocol='m3u8_native', 109 m3u8_id='hls', fatal=False) 110 elif format_key == 'dash' or ext == 'mpd': 111 return 112 new_formats = self._extract_mpd_formats( 113 format_url, video_id, mpd_id='dash', fatal=False) 114 if lang: 115 for f in new_formats: 116 if not f.get('language'): 117 f['language'] = lang 118 formats.extend(new_formats) 119 120 options = self._parse_json( 121 self._search_regex( 122 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]', 123 playerpage, 'player options', default='{}'), 124 video_id, transform_source=js_to_json, fatal=False) 125 if options: 126 for key, tracks in options.get('tracks', {}).items(): 127 if not isinstance(tracks, list): 128 continue 129 for track in tracks: 130 src = track.get('src') 131 if src: 132 extract_formats(src, key.lower(), track.get('lang')) 133 134 if not formats: 135 for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): 136 extract_formats(src) 137 138 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: 139 self.raise_geo_restricted(countries=['CZ']) 140 141 self._sort_formats(formats) 142 143 return { 144 'id': video_id, 145 'title': title, 146 'thumbnail': self._og_search_thumbnail(webpage, default=None), 147 'formats': formats, 148 'description': self._og_search_description(webpage, default=None), 149 }