youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

snotr.py (2501B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     parse_duration,
      9     parse_filesize,
     10     str_to_int,
     11 )
     12 
     13 
     14 class SnotrIE(InfoExtractor):
     15     _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
     16     _TESTS = [{
     17         'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
     18         'info_dict': {
     19             'id': '13708',
     20             'ext': 'mp4',
     21             'title': 'Drone flying through fireworks!',
     22             'duration': 248,
     23             'filesize_approx': 40700000,
     24             'description': 'A drone flying through Fourth of July Fireworks',
     25             'thumbnail': r're:^https?://.*\.jpg$',
     26         },
     27         'expected_warnings': ['description'],
     28     }, {
     29         'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
     30         'info_dict': {
     31             'id': '530',
     32             'ext': 'mp4',
     33             'title': 'David Letteman - George W. Bush Top 10',
     34             'duration': 126,
     35             'filesize_approx': 8500000,
     36             'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
     37             'thumbnail': r're:^https?://.*\.jpg$',
     38         }
     39     }]
     40 
     41     def _real_extract(self, url):
     42         mobj = re.match(self._VALID_URL, url)
     43         video_id = mobj.group('id')
     44 
     45         webpage = self._download_webpage(url, video_id)
     46         title = self._og_search_title(webpage)
     47 
     48         description = self._og_search_description(webpage)
     49         info_dict = self._parse_html5_media_entries(
     50             url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0]
     51 
     52         view_count = str_to_int(self._html_search_regex(
     53             r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',
     54             webpage, 'view count', fatal=False))
     55 
     56         duration = parse_duration(self._html_search_regex(
     57             r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',
     58             webpage, 'duration', fatal=False))
     59 
     60         filesize_approx = parse_filesize(self._html_search_regex(
     61             r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)',
     62             webpage, 'filesize', fatal=False))
     63 
     64         info_dict.update({
     65             'id': video_id,
     66             'description': description,
     67             'title': title,
     68             'view_count': view_count,
     69             'duration': duration,
     70             'filesize_approx': filesize_approx,
     71         })
     72 
     73         return info_dict