megaphone.py (1770B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..utils import js_to_json 8 9 10 class MegaphoneIE(InfoExtractor): 11 IE_NAME = 'megaphone.fm' 12 IE_DESC = 'megaphone.fm embedded players' 13 _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)' 14 _TEST = { 15 'url': 'https://player.megaphone.fm/GLT9749789991?"', 16 'md5': '4816a0de523eb3e972dc0dda2c191f96', 17 'info_dict': { 18 'id': 'GLT9749789991', 19 'ext': 'mp3', 20 'title': '#97 What Kind Of Idiot Gets Phished?', 21 'thumbnail': r're:^https://.*\.png.*$', 22 'duration': 1776.26375, 23 'author': 'Reply All', 24 }, 25 } 26 27 def _real_extract(self, url): 28 video_id = self._match_id(url) 29 webpage = self._download_webpage(url, video_id) 30 31 title = self._og_search_property('audio:title', webpage) 32 author = self._og_search_property('audio:artist', webpage) 33 thumbnail = self._og_search_thumbnail(webpage) 34 35 episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON') 36 episode_data = self._parse_json(episode_json, video_id, js_to_json) 37 video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:') 38 39 formats = [{ 40 'url': video_url, 41 }] 42 43 return { 44 'id': video_id, 45 'thumbnail': thumbnail, 46 'title': title, 47 'author': author, 48 'duration': episode_data['duration'], 49 'formats': formats, 50 } 51 52 @classmethod 53 def _extract_urls(cls, webpage): 54 return [m[0] for m in re.findall( 55 r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]