joj.py (3748B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 int_or_none, 10 js_to_json, 11 try_get, 12 ) 13 14 15 class JojIE(InfoExtractor): 16 _VALID_URL = r'''(?x) 17 (?: 18 joj:| 19 https?://media\.joj\.sk/embed/ 20 ) 21 (?P<id>[^/?#^]+) 22 ''' 23 _TESTS = [{ 24 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932', 25 'info_dict': { 26 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 27 'ext': 'mp4', 28 'title': 'NOVÉ BÝVANIE', 29 'thumbnail': r're:^https?://.*\.jpg$', 30 'duration': 3118, 31 } 32 }, { 33 'url': 'https://media.joj.sk/embed/9i1cxv', 34 'only_matching': True, 35 }, { 36 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932', 37 'only_matching': True, 38 }, { 39 'url': 'joj:9i1cxv', 40 'only_matching': True, 41 }] 42 43 @staticmethod 44 def _extract_urls(webpage): 45 return [ 46 mobj.group('url') 47 for mobj in re.finditer( 48 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1', 49 webpage)] 50 51 def _real_extract(self, url): 52 video_id = self._match_id(url) 53 54 webpage = self._download_webpage( 55 'https://media.joj.sk/embed/%s' % video_id, video_id) 56 57 title = self._search_regex( 58 (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1', 59 r'<title>(?P<title>[^<]+)'), webpage, 'title', 60 default=None, group='title') or self._og_search_title(webpage) 61 62 bitrates = self._parse_json( 63 self._search_regex( 64 r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates', 65 default='{}'), 66 video_id, transform_source=js_to_json, fatal=False) 67 68 formats = [] 69 for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: 70 if isinstance(format_url, compat_str): 71 height = self._search_regex( 72 r'(\d+)[pP]\.', format_url, 'height', default=None) 73 formats.append({ 74 'url': format_url, 75 'format_id': '%sp' % height if height else None, 76 'height': int(height), 77 }) 78 if not formats: 79 playlist = self._download_xml( 80 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, 81 video_id) 82 for file_el in playlist.findall('./files/file'): 83 path = file_el.get('path') 84 if not path: 85 continue 86 format_id = file_el.get('id') or file_el.get('label') 87 formats.append({ 88 'url': 'http://n16.joj.sk/storage/%s' % path.replace( 89 'dat/', '', 1), 90 'format_id': format_id, 91 'height': int_or_none(self._search_regex( 92 r'(\d+)[pP]', format_id or path, 'height', 93 default=None)), 94 }) 95 self._sort_formats(formats) 96 97 thumbnail = self._og_search_thumbnail(webpage) 98 99 duration = int_or_none(self._search_regex( 100 r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) 101 102 return { 103 'id': video_id, 104 'title': title, 105 'thumbnail': thumbnail, 106 'duration': duration, 107 'formats': formats, 108 }