youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

malltv.py (3298B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     clean_html,
      7     dict_get,
      8     float_or_none,
      9     int_or_none,
     10     merge_dicts,
     11     parse_duration,
     12     try_get,
     13 )
     14 
     15 
     16 class MallTVIE(InfoExtractor):
     17     _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     18     _TESTS = [{
     19         'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
     20         'md5': '1c4a37f080e1f3023103a7b43458e518',
     21         'info_dict': {
     22             'id': 't0zzt0',
     23             'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
     24             'ext': 'mp4',
     25             'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
     26             'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
     27             'duration': 216,
     28             'timestamp': 1538870400,
     29             'upload_date': '20181007',
     30             'view_count': int,
     31         }
     32     }, {
     33         'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
     34         'only_matching': True,
     35     }, {
     36         'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
     37         'only_matching': True,
     38     }]
     39 
     40     def _real_extract(self, url):
     41         display_id = self._match_id(url)
     42 
     43         webpage = self._download_webpage(
     44             url, display_id, headers=self.geo_verification_headers())
     45 
     46         video = self._parse_json(self._search_regex(
     47             r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
     48             webpage, 'video object'), display_id)
     49         video_source = video['VideoSource']
     50         video_id = self._search_regex(
     51             r'/([\da-z]+)/index\b', video_source, 'video id')
     52 
     53         formats = self._extract_m3u8_formats(
     54             video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
     55         self._sort_formats(formats)
     56 
     57         subtitles = {}
     58         for s in (video.get('Subtitles') or {}):
     59             s_url = s.get('Url')
     60             if not s_url:
     61                 continue
     62             subtitles.setdefault(s.get('Language') or 'cz', []).append({
     63                 'url': s_url,
     64             })
     65 
     66         entity_counts = video.get('EntityCounts') or {}
     67 
     68         def get_count(k):
     69             v = entity_counts.get(k + 's') or {}
     70             return int_or_none(dict_get(v, ('Count', 'StrCount')))
     71 
     72         info = self._search_json_ld(webpage, video_id, default={})
     73 
     74         return merge_dicts({
     75             'id': video_id,
     76             'display_id': display_id,
     77             'title': video.get('Title'),
     78             'description': clean_html(video.get('Description')),
     79             'thumbnail': video.get('ThumbnailUrl'),
     80             'formats': formats,
     81             'subtitles': subtitles,
     82             'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
     83             'view_count': get_count('View'),
     84             'like_count': get_count('Like'),
     85             'dislike_count': get_count('Dislike'),
     86             'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
     87             'comment_count': get_count('Comment'),
     88         }, info)