youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

megaphone.py (1770B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import js_to_json
      8 
      9 
     10 class MegaphoneIE(InfoExtractor):
     11     IE_NAME = 'megaphone.fm'
     12     IE_DESC = 'megaphone.fm embedded players'
     13     _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
     14     _TEST = {
     15         'url': 'https://player.megaphone.fm/GLT9749789991?"',
     16         'md5': '4816a0de523eb3e972dc0dda2c191f96',
     17         'info_dict': {
     18             'id': 'GLT9749789991',
     19             'ext': 'mp3',
     20             'title': '#97 What Kind Of Idiot Gets Phished?',
     21             'thumbnail': r're:^https://.*\.png.*$',
     22             'duration': 1776.26375,
     23             'author': 'Reply All',
     24         },
     25     }
     26 
     27     def _real_extract(self, url):
     28         video_id = self._match_id(url)
     29         webpage = self._download_webpage(url, video_id)
     30 
     31         title = self._og_search_property('audio:title', webpage)
     32         author = self._og_search_property('audio:artist', webpage)
     33         thumbnail = self._og_search_thumbnail(webpage)
     34 
     35         episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
     36         episode_data = self._parse_json(episode_json, video_id, js_to_json)
     37         video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
     38 
     39         formats = [{
     40             'url': video_url,
     41         }]
     42 
     43         return {
     44             'id': video_id,
     45             'thumbnail': thumbnail,
     46             'title': title,
     47             'author': author,
     48             'duration': episode_data['duration'],
     49             'formats': formats,
     50         }
     51 
     52     @classmethod
     53     def _extract_urls(cls, webpage):
     54         return [m[0] for m in re.findall(
     55             r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]