youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

applepodcasts.py (2378B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     clean_podcast_url,
      7     int_or_none,
      8     parse_iso8601,
      9     try_get,
     10 )
     11 
     12 
     13 class ApplePodcastsIE(InfoExtractor):
     14     _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
     15     _TESTS = [{
     16         'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
     17         'md5': 'df02e6acb11c10e844946a39e7222b08',
     18         'info_dict': {
     19             'id': '1000482637777',
     20             'ext': 'mp3',
     21             'title': '207 - Whitney Webb Returns',
     22             'description': 'md5:13a73bade02d2e43737751e3987e1399',
     23             'upload_date': '20200705',
     24             'timestamp': 1593921600,
     25             'duration': 6425,
     26             'series': 'The Tim Dillon Show',
     27         }
     28     }, {
     29         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
     30         'only_matching': True,
     31     }, {
     32         'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
     33         'only_matching': True,
     34     }, {
     35         'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
     36         'only_matching': True,
     37     }]
     38 
     39     def _real_extract(self, url):
     40         episode_id = self._match_id(url)
     41         webpage = self._download_webpage(url, episode_id)
     42         ember_data = self._parse_json(self._search_regex(
     43             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
     44             webpage, 'ember data'), episode_id)
     45         ember_data = ember_data.get(episode_id) or ember_data
     46         episode = ember_data['data']['attributes']
     47         description = episode.get('description') or {}
     48 
     49         series = None
     50         for inc in (ember_data.get('included') or []):
     51             if inc.get('type') == 'media/podcast':
     52                 series = try_get(inc, lambda x: x['attributes']['name'])
     53 
     54         return {
     55             'id': episode_id,
     56             'title': episode['name'],
     57             'url': clean_podcast_url(episode['assetUrl']),
     58             'description': description.get('standard') or description.get('short'),
     59             'timestamp': parse_iso8601(episode.get('releaseDateTime')),
     60             'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
     61             'series': series,
     62         }