youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

businessinsider.py (1998B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from .jwplatform import JWPlatformIE
      6 
      7 
      8 class BusinessInsiderIE(InfoExtractor):
      9     _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     10     _TESTS = [{
     11         'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
     12         'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
     13         'info_dict': {
     14             'id': 'cjGDb0X9',
     15             'ext': 'mp4',
     16             'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
     17             'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
     18             'upload_date': '20160611',
     19             'timestamp': 1465675620,
     20         },
     21     }, {
     22         'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
     23         'md5': '43f438dbc6da0b89f5ac42f68529d84a',
     24         'info_dict': {
     25             'id': '5zJwd4FK',
     26             'ext': 'mp4',
     27             'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
     28             'description': 'md5:2af8975825d38a4fed24717bbe51db49',
     29             'upload_date': '20170705',
     30             'timestamp': 1499270528,
     31         },
     32     }, {
     33         'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
     34         'only_matching': True,
     35     }]
     36 
     37     def _real_extract(self, url):
     38         video_id = self._match_id(url)
     39         webpage = self._download_webpage(url, video_id)
     40         jwplatform_id = self._search_regex(
     41             (r'data-media-id=["\']([a-zA-Z0-9]{8})',
     42              r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
     43              r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
     44              r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
     45             webpage, 'jwplatform id')
     46         return self.url_result(
     47             'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
     48             video_id=video_id)