youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

googlepodcasts.py (3412B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import json
      5 import re
      6 
      7 from .common import InfoExtractor
      8 from ..utils import (
      9     clean_podcast_url,
     10     int_or_none,
     11     try_get,
     12     urlencode_postdata,
     13 )
     14 
     15 
     16 class GooglePodcastsBaseIE(InfoExtractor):
     17     _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
     18 
     19     def _batch_execute(self, func_id, video_id, params):
     20         return json.loads(self._download_json(
     21             'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
     22             video_id, data=urlencode_postdata({
     23                 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
     24             }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
     25 
     26     def _extract_episode(self, episode):
     27         return {
     28             'id': episode[4][3],
     29             'title': episode[8],
     30             'url': clean_podcast_url(episode[13]),
     31             'thumbnail': episode[2],
     32             'description': episode[9],
     33             'creator': try_get(episode, lambda x: x[14]),
     34             'timestamp': int_or_none(episode[11]),
     35             'duration': int_or_none(episode[12]),
     36             'series': episode[1],
     37         }
     38 
     39 
     40 class GooglePodcastsIE(GooglePodcastsBaseIE):
     41     IE_NAME = 'google:podcasts'
     42     _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
     43     _TEST = {
     44         'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
     45         'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
     46         'info_dict': {
     47             'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
     48             'ext': 'mp3',
     49             'title': 'WWDTM New Year 2021',
     50             'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
     51             'upload_date': '20210102',
     52             'timestamp': 1609606800,
     53             'duration': 2901,
     54             'series': "Wait Wait... Don't Tell Me!",
     55         }
     56     }
     57 
     58     def _real_extract(self, url):
     59         b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
     60         episode = self._batch_execute(
     61             'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
     62         return self._extract_episode(episode)
     63 
     64 
     65 class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
     66     IE_NAME = 'google:podcasts:feed'
     67     _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
     68     _TEST = {
     69         'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
     70         'info_dict': {
     71             'title': "Wait Wait... Don't Tell Me!",
     72             'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
     73         },
     74         'playlist_mincount': 20,
     75     }
     76 
     77     def _real_extract(self, url):
     78         b64_feed_url = self._match_id(url)
     79         data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
     80 
     81         entries = []
     82         for episode in (try_get(data, lambda x: x[1][0]) or []):
     83             entries.append(self._extract_episode(episode))
     84 
     85         feed = try_get(data, lambda x: x[3]) or []
     86         return self.playlist_result(
     87             entries, playlist_title=try_get(feed, lambda x: x[0]),
     88             playlist_description=try_get(feed, lambda x: x[2]))