youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

iheart.py (3465B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     clean_html,
      7     clean_podcast_url,
      8     int_or_none,
      9     str_or_none,
     10 )
     11 
     12 
     13 class IHeartRadioBaseIE(InfoExtractor):
     14     def _call_api(self, path, video_id, fatal=True, query=None):
     15         return self._download_json(
     16             'https://api.iheart.com/api/v3/podcast/' + path,
     17             video_id, fatal=fatal, query=query)
     18 
     19     def _extract_episode(self, episode):
     20         return {
     21             'thumbnail': episode.get('imageUrl'),
     22             'description': clean_html(episode.get('description')),
     23             'timestamp': int_or_none(episode.get('startDate'), 1000),
     24             'duration': int_or_none(episode.get('duration')),
     25         }
     26 
     27 
     28 class IHeartRadioIE(IHeartRadioBaseIE):
     29     IENAME = 'iheartradio'
     30     _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
     31     _TEST = {
     32         'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
     33         'md5': 'c8609c92c8688dcb69d8541042b8abca',
     34         'info_dict': {
     35             'id': '70346499',
     36             'ext': 'mp3',
     37             'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
     38             'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
     39             'timestamp': 1597741200,
     40             'upload_date': '20200818',
     41         }
     42     }
     43 
     44     def _real_extract(self, url):
     45         episode_id = self._match_id(url)
     46         episode = self._call_api(
     47             'episodes/' + episode_id, episode_id)['episode']
     48         info = self._extract_episode(episode)
     49         info.update({
     50             'id': episode_id,
     51             'title': episode['title'],
     52             'url': clean_podcast_url(episode['mediaUrl']),
     53         })
     54         return info
     55 
     56 
     57 class IHeartRadioPodcastIE(IHeartRadioBaseIE):
     58     IE_NAME = 'iheartradio:podcast'
     59     _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
     60     _TESTS = [{
     61         'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
     62         'info_dict': {
     63             'id': '30717896',
     64             'title': 'It Could Happen Here',
     65             'description': 'md5:5842117412a967eb0b01f8088eb663e2',
     66         },
     67         'playlist_mincount': 11,
     68     }, {
     69         'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
     70         'only_matching': True,
     71     }]
     72 
     73     def _real_extract(self, url):
     74         podcast_id = self._match_id(url)
     75         path = 'podcasts/' + podcast_id
     76         episodes = self._call_api(
     77             path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
     78 
     79         entries = []
     80         for episode in episodes:
     81             episode_id = str_or_none(episode.get('id'))
     82             if not episode_id:
     83                 continue
     84             info = self._extract_episode(episode)
     85             info.update({
     86                 '_type': 'url',
     87                 'id': episode_id,
     88                 'title': episode.get('title'),
     89                 'url': 'iheartradio:' + episode_id,
     90                 'ie_key': IHeartRadioIE.ie_key(),
     91             })
     92             entries.append(info)
     93 
     94         podcast = self._call_api(path, podcast_id, False) or {}
     95 
     96         return self.playlist_result(
     97             entries, podcast_id, podcast.get('title'), podcast.get('description'))