youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

spreaker.py (6030B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import itertools
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_str
      8 from ..utils import (
      9     float_or_none,
     10     int_or_none,
     11     str_or_none,
     12     try_get,
     13     unified_timestamp,
     14     url_or_none,
     15 )
     16 
     17 
     18 def _extract_episode(data, episode_id=None):
     19     title = data['title']
     20     download_url = data['download_url']
     21 
     22     series = try_get(data, lambda x: x['show']['title'], compat_str)
     23     uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
     24 
     25     thumbnails = []
     26     for image in ('image_original', 'image_medium', 'image'):
     27         image_url = url_or_none(data.get('%s_url' % image))
     28         if image_url:
     29             thumbnails.append({'url': image_url})
     30 
     31     def stats(key):
     32         return int_or_none(try_get(
     33             data,
     34             (lambda x: x['%ss_count' % key],
     35              lambda x: x['stats']['%ss' % key])))
     36 
     37     def duration(key):
     38         return float_or_none(data.get(key), scale=1000)
     39 
     40     return {
     41         'id': compat_str(episode_id or data['episode_id']),
     42         'url': download_url,
     43         'display_id': data.get('permalink'),
     44         'title': title,
     45         'description': data.get('description'),
     46         'timestamp': unified_timestamp(data.get('published_at')),
     47         'uploader': uploader,
     48         'uploader_id': str_or_none(data.get('author_id')),
     49         'creator': uploader,
     50         'duration': duration('duration') or duration('length'),
     51         'view_count': stats('play'),
     52         'like_count': stats('like'),
     53         'comment_count': stats('message'),
     54         'format': 'MPEG Layer 3',
     55         'format_id': 'mp3',
     56         'container': 'mp3',
     57         'ext': 'mp3',
     58         'thumbnails': thumbnails,
     59         'series': series,
     60         'extractor_key': SpreakerIE.ie_key(),
     61     }
     62 
     63 
     64 class SpreakerIE(InfoExtractor):
     65     _VALID_URL = r'''(?x)
     66                     https?://
     67                         api\.spreaker\.com/
     68                         (?:
     69                             (?:download/)?episode|
     70                             v2/episodes
     71                         )/
     72                         (?P<id>\d+)
     73                     '''
     74     _TESTS = [{
     75         'url': 'https://api.spreaker.com/episode/12534508',
     76         'info_dict': {
     77             'id': '12534508',
     78             'display_id': 'swm-ep15-how-to-market-your-music-part-2',
     79             'ext': 'mp3',
     80             'title': 'EP:15 | Music Marketing (Likes) - Part 2',
     81             'description': 'md5:0588c43e27be46423e183076fa071177',
     82             'timestamp': 1502250336,
     83             'upload_date': '20170809',
     84             'uploader': 'SWM',
     85             'uploader_id': '9780658',
     86             'duration': 1063.42,
     87             'view_count': int,
     88             'like_count': int,
     89             'comment_count': int,
     90             'series': 'Success With Music (SWM)',
     91         },
     92     }, {
     93         'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
     94         'only_matching': True,
     95     }, {
     96         'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
     97         'only_matching': True,
     98     }]
     99 
    100     def _real_extract(self, url):
    101         episode_id = self._match_id(url)
    102         data = self._download_json(
    103             'https://api.spreaker.com/v2/episodes/%s' % episode_id,
    104             episode_id)['response']['episode']
    105         return _extract_episode(data, episode_id)
    106 
    107 
    108 class SpreakerPageIE(InfoExtractor):
    109     _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
    110     _TESTS = [{
    111         'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
    112         'only_matching': True,
    113     }]
    114 
    115     def _real_extract(self, url):
    116         display_id = self._match_id(url)
    117         webpage = self._download_webpage(url, display_id)
    118         episode_id = self._search_regex(
    119             (r'data-episode_id=["\'](?P<id>\d+)',
    120              r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
    121         return self.url_result(
    122             'https://api.spreaker.com/episode/%s' % episode_id,
    123             ie=SpreakerIE.ie_key(), video_id=episode_id)
    124 
    125 
    126 class SpreakerShowIE(InfoExtractor):
    127     _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
    128     _TESTS = [{
    129         'url': 'https://api.spreaker.com/show/4652058',
    130         'info_dict': {
    131             'id': '4652058',
    132         },
    133         'playlist_mincount': 118,
    134     }]
    135 
    136     def _entries(self, show_id):
    137         for page_num in itertools.count(1):
    138             episodes = self._download_json(
    139                 'https://api.spreaker.com/show/%s/episodes' % show_id,
    140                 show_id, note='Downloading JSON page %d' % page_num, query={
    141                     'page': page_num,
    142                     'max_per_page': 100,
    143                 })
    144             pager = try_get(episodes, lambda x: x['response']['pager'], dict)
    145             if not pager:
    146                 break
    147             results = pager.get('results')
    148             if not results or not isinstance(results, list):
    149                 break
    150             for result in results:
    151                 if not isinstance(result, dict):
    152                     continue
    153                 yield _extract_episode(result)
    154             if page_num == pager.get('last_page'):
    155                 break
    156 
    157     def _real_extract(self, url):
    158         show_id = self._match_id(url)
    159         return self.playlist_result(self._entries(show_id), playlist_id=show_id)
    160 
    161 
    162 class SpreakerShowPageIE(InfoExtractor):
    163     _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
    164     _TESTS = [{
    165         'url': 'https://www.spreaker.com/show/success-with-music',
    166         'only_matching': True,
    167     }]
    168 
    169     def _real_extract(self, url):
    170         display_id = self._match_id(url)
    171         webpage = self._download_webpage(url, display_id)
    172         show_id = self._search_regex(
    173             r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
    174         return self.url_result(
    175             'https://api.spreaker.com/show/%s' % show_id,
    176             ie=SpreakerShowIE.ie_key(), video_id=show_id)