youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

iprima.py (5246B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 import time
      6 
      7 from .common import InfoExtractor
      8 from ..utils import (
      9     determine_ext,
     10     js_to_json,
     11 )
     12 
     13 
     14 class IPrimaIE(InfoExtractor):
     15     _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     16     _GEO_BYPASS = False
     17 
     18     _TESTS = [{
     19         'url': 'https://prima.iprima.cz/particka/92-epizoda',
     20         'info_dict': {
     21             'id': 'p51388',
     22             'ext': 'mp4',
     23             'title': 'Partička (92)',
     24             'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
     25         },
     26         'params': {
     27             'skip_download': True,  # m3u8 download
     28         },
     29     }, {
     30         'url': 'https://cnn.iprima.cz/videa/70-epizoda',
     31         'info_dict': {
     32             'id': 'p681554',
     33             'ext': 'mp4',
     34             'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
     35         },
     36         'params': {
     37             'skip_download': True,  # m3u8 download
     38         },
     39     }, {
     40         'url': 'http://play.iprima.cz/particka/particka-92',
     41         'only_matching': True,
     42     }, {
     43         # geo restricted
     44         'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
     45         'only_matching': True,
     46     }, {
     47         # iframe api.play-backend.iprima.cz
     48         'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
     49         'only_matching': True,
     50     }, {
     51         # iframe prima.iprima.cz
     52         'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
     53         'only_matching': True,
     54     }, {
     55         'url': 'http://www.iprima.cz/filmy/desne-rande',
     56         'only_matching': True,
     57     }, {
     58         'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
     59         'only_matching': True,
     60     }, {
     61         'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
     62         'only_matching': True,
     63     }, {
     64         'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
     65         'only_matching': True,
     66     }, {
     67         'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
     68         'only_matching': True,
     69     }, {
     70         'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
     71         'only_matching': True,
     72     }]
     73 
     74     def _real_extract(self, url):
     75         video_id = self._match_id(url)
     76 
     77         self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
     78 
     79         webpage = self._download_webpage(url, video_id)
     80 
     81         title = self._og_search_title(
     82             webpage, default=None) or self._search_regex(
     83             r'<h1>([^<]+)', webpage, 'title')
     84 
     85         video_id = self._search_regex(
     86             (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
     87              r'data-product="([^"]+)">',
     88              r'id=["\']player-(p\d+)"',
     89              r'playerId\s*:\s*["\']player-(p\d+)',
     90              r'\bvideos\s*=\s*["\'](p\d+)'),
     91             webpage, 'real id')
     92 
     93         playerpage = self._download_webpage(
     94             'http://play.iprima.cz/prehravac/init',
     95             video_id, note='Downloading player', query={
     96                 '_infuse': 1,
     97                 '_ts': round(time.time()),
     98                 'productId': video_id,
     99             }, headers={'Referer': url})
    100 
    101         formats = []
    102 
    103         def extract_formats(format_url, format_key=None, lang=None):
    104             ext = determine_ext(format_url)
    105             new_formats = []
    106             if format_key == 'hls' or ext == 'm3u8':
    107                 new_formats = self._extract_m3u8_formats(
    108                     format_url, video_id, 'mp4', entry_protocol='m3u8_native',
    109                     m3u8_id='hls', fatal=False)
    110             elif format_key == 'dash' or ext == 'mpd':
    111                 return
    112                 new_formats = self._extract_mpd_formats(
    113                     format_url, video_id, mpd_id='dash', fatal=False)
    114             if lang:
    115                 for f in new_formats:
    116                     if not f.get('language'):
    117                         f['language'] = lang
    118             formats.extend(new_formats)
    119 
    120         options = self._parse_json(
    121             self._search_regex(
    122                 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
    123                 playerpage, 'player options', default='{}'),
    124             video_id, transform_source=js_to_json, fatal=False)
    125         if options:
    126             for key, tracks in options.get('tracks', {}).items():
    127                 if not isinstance(tracks, list):
    128                     continue
    129                 for track in tracks:
    130                     src = track.get('src')
    131                     if src:
    132                         extract_formats(src, key.lower(), track.get('lang'))
    133 
    134         if not formats:
    135             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
    136                 extract_formats(src)
    137 
    138         if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
    139             self.raise_geo_restricted(countries=['CZ'])
    140 
    141         self._sort_formats(formats)
    142 
    143         return {
    144             'id': video_id,
    145             'title': title,
    146             'thumbnail': self._og_search_thumbnail(webpage, default=None),
    147             'formats': formats,
    148             'description': self._og_search_description(webpage, default=None),
    149         }