youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

puhutv.py (8458B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import (
      6     compat_HTTPError,
      7     compat_str,
      8 )
      9 from ..utils import (
     10     ExtractorError,
     11     int_or_none,
     12     float_or_none,
     13     parse_resolution,
     14     str_or_none,
     15     try_get,
     16     unified_timestamp,
     17     url_or_none,
     18     urljoin,
     19 )
     20 
     21 
     22 class PuhuTVIE(InfoExtractor):
     23     _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
     24     IE_NAME = 'puhutv'
     25     _TESTS = [{
     26         # film
     27         'url': 'https://puhutv.com/sut-kardesler-izle',
     28         'md5': 'a347470371d56e1585d1b2c8dab01c96',
     29         'info_dict': {
     30             'id': '5085',
     31             'display_id': 'sut-kardesler',
     32             'ext': 'mp4',
     33             'title': 'Süt Kardeşler',
     34             'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
     35             'thumbnail': r're:^https?://.*\.jpg$',
     36             'duration': 4832.44,
     37             'creator': 'Arzu Film',
     38             'timestamp': 1561062602,
     39             'upload_date': '20190620',
     40             'release_year': 1976,
     41             'view_count': int,
     42             'tags': list,
     43         },
     44     }, {
     45         # episode, geo restricted, bypassable with --geo-verification-proxy
     46         'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
     47         'only_matching': True,
     48     }, {
     49         # 4k, with subtitles
     50         'url': 'https://puhutv.com/dip-1-bolum-izle',
     51         'only_matching': True,
     52     }]
     53     _SUBTITLE_LANGS = {
     54         'English': 'en',
     55         'Deutsch': 'de',
     56         'عربى': 'ar'
     57     }
     58 
     59     def _real_extract(self, url):
     60         display_id = self._match_id(url)
     61 
     62         info = self._download_json(
     63             urljoin(url, '/api/slug/%s-izle' % display_id),
     64             display_id)['data']
     65 
     66         video_id = compat_str(info['id'])
     67         show = info.get('title') or {}
     68         title = info.get('name') or show['name']
     69         if info.get('display_name'):
     70             title = '%s %s' % (title, info['display_name'])
     71 
     72         try:
     73             videos = self._download_json(
     74                 'https://puhutv.com/api/assets/%s/videos' % video_id,
     75                 display_id, 'Downloading video JSON',
     76                 headers=self.geo_verification_headers())
     77         except ExtractorError as e:
     78             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
     79                 self.raise_geo_restricted()
     80             raise
     81 
     82         urls = []
     83         formats = []
     84 
     85         for video in videos['data']['videos']:
     86             media_url = url_or_none(video.get('url'))
     87             if not media_url or media_url in urls:
     88                 continue
     89             urls.append(media_url)
     90 
     91             playlist = video.get('is_playlist')
     92             if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
     93                 formats.extend(self._extract_m3u8_formats(
     94                     media_url, video_id, 'mp4', entry_protocol='m3u8_native',
     95                     m3u8_id='hls', fatal=False))
     96                 continue
     97 
     98             quality = int_or_none(video.get('quality'))
     99             f = {
    100                 'url': media_url,
    101                 'ext': 'mp4',
    102                 'height': quality
    103             }
    104             video_format = video.get('video_format')
    105             is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
    106             if is_hls:
    107                 format_id = 'hls'
    108                 f['protocol'] = 'm3u8_native'
    109             elif video_format == 'mp4':
    110                 format_id = 'http'
    111             else:
    112                 continue
    113             if quality:
    114                 format_id += '-%sp' % quality
    115             f['format_id'] = format_id
    116             formats.append(f)
    117         self._sort_formats(formats)
    118 
    119         creator = try_get(
    120             show, lambda x: x['producer']['name'], compat_str)
    121 
    122         content = info.get('content') or {}
    123 
    124         images = try_get(
    125             content, lambda x: x['images']['wide'], dict) or {}
    126         thumbnails = []
    127         for image_id, image_url in images.items():
    128             if not isinstance(image_url, compat_str):
    129                 continue
    130             if not image_url.startswith(('http', '//')):
    131                 image_url = 'https://%s' % image_url
    132             t = parse_resolution(image_id)
    133             t.update({
    134                 'id': image_id,
    135                 'url': image_url
    136             })
    137             thumbnails.append(t)
    138 
    139         tags = []
    140         for genre in show.get('genres') or []:
    141             if not isinstance(genre, dict):
    142                 continue
    143             genre_name = genre.get('name')
    144             if genre_name and isinstance(genre_name, compat_str):
    145                 tags.append(genre_name)
    146 
    147         subtitles = {}
    148         for subtitle in content.get('subtitles') or []:
    149             if not isinstance(subtitle, dict):
    150                 continue
    151             lang = subtitle.get('language')
    152             sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
    153             if not lang or not isinstance(lang, compat_str) or not sub_url:
    154                 continue
    155             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
    156                 'url': sub_url
    157             }]
    158 
    159         return {
    160             'id': video_id,
    161             'display_id': display_id,
    162             'title': title,
    163             'description': info.get('description') or show.get('description'),
    164             'season_id': str_or_none(info.get('season_id')),
    165             'season_number': int_or_none(info.get('season_number')),
    166             'episode_number': int_or_none(info.get('episode_number')),
    167             'release_year': int_or_none(show.get('released_at')),
    168             'timestamp': unified_timestamp(info.get('created_at')),
    169             'creator': creator,
    170             'view_count': int_or_none(content.get('watch_count')),
    171             'duration': float_or_none(content.get('duration_in_ms'), 1000),
    172             'tags': tags,
    173             'subtitles': subtitles,
    174             'thumbnails': thumbnails,
    175             'formats': formats
    176         }
    177 
    178 
    179 class PuhuTVSerieIE(InfoExtractor):
    180     _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
    181     IE_NAME = 'puhutv:serie'
    182     _TESTS = [{
    183         'url': 'https://puhutv.com/deniz-yildizi-detay',
    184         'info_dict': {
    185             'title': 'Deniz Yıldızı',
    186             'id': 'deniz-yildizi',
    187         },
    188         'playlist_mincount': 205,
    189     }, {
    190         # a film detail page which is using same url with serie page
    191         'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
    192         'only_matching': True,
    193     }]
    194 
    195     def _extract_entries(self, seasons):
    196         for season in seasons:
    197             season_id = season.get('id')
    198             if not season_id:
    199                 continue
    200             page = 1
    201             has_more = True
    202             while has_more is True:
    203                 season = self._download_json(
    204                     'https://galadriel.puhutv.com/seasons/%s' % season_id,
    205                     season_id, 'Downloading page %s' % page, query={
    206                         'page': page,
    207                         'per': 40,
    208                     })
    209                 episodes = season.get('episodes')
    210                 if isinstance(episodes, list):
    211                     for ep in episodes:
    212                         slug_path = str_or_none(ep.get('slugPath'))
    213                         if not slug_path:
    214                             continue
    215                         video_id = str_or_none(int_or_none(ep.get('id')))
    216                         yield self.url_result(
    217                             'https://puhutv.com/%s' % slug_path,
    218                             ie=PuhuTVIE.ie_key(), video_id=video_id,
    219                             video_title=ep.get('name') or ep.get('eventLabel'))
    220                 page += 1
    221                 has_more = season.get('hasMore')
    222 
    223     def _real_extract(self, url):
    224         playlist_id = self._match_id(url)
    225 
    226         info = self._download_json(
    227             urljoin(url, '/api/slug/%s-detay' % playlist_id),
    228             playlist_id)['data']
    229 
    230         seasons = info.get('seasons')
    231         if seasons:
    232             return self.playlist_result(
    233                 self._extract_entries(seasons), playlist_id, info.get('name'))
    234 
    235         # For films, these are using same url with series
    236         video_id = info.get('slug') or info['assets'][0]['slug']
    237         return self.url_result(
    238             'https://puhutv.com/%s-izle' % video_id,
    239             PuhuTVIE.ie_key(), video_id)