youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

morningstar.py (1862B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 
      8 
      9 class MorningstarIE(InfoExtractor):
     10     IE_DESC = 'morningstar.com'
     11     _VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
     12     _TESTS = [{
     13         'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
     14         'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
     15         'info_dict': {
     16             'id': '615869',
     17             'ext': 'mp4',
     18             'title': 'Get Ahead of the Curve on 2013 Taxes',
     19             'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
     20             'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
     21         }
     22     }, {
     23         'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
     24         'only_matching': True,
     25     }]
     26 
     27     def _real_extract(self, url):
     28         mobj = re.match(self._VALID_URL, url)
     29         video_id = mobj.group('id')
     30 
     31         webpage = self._download_webpage(url, video_id)
     32         title = self._html_search_regex(
     33             r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
     34         video_url = self._html_search_regex(
     35             r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
     36             webpage, 'video URL')
     37         thumbnail = self._html_search_regex(
     38             r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
     39             webpage, 'thumbnail', fatal=False)
     40         description = self._html_search_regex(
     41             r'<div id="mstarDeck".*?>(.*?)</div>',
     42             webpage, 'description', fatal=False)
     43 
     44         return {
     45             'id': video_id,
     46             'title': title,
     47             'url': video_url,
     48             'thumbnail': thumbnail,
     49             'description': description,
     50         }