youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

vodlocker.py (2796B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     ExtractorError,
      7     NO_DEFAULT,
      8     sanitized_Request,
      9     urlencode_postdata,
     10 )
     11 
     12 
     13 class VodlockerIE(InfoExtractor):
     14     _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
     15 
     16     _TESTS = [{
     17         'url': 'http://vodlocker.com/e8wvyzz4sl42',
     18         'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
     19         'info_dict': {
     20             'id': 'e8wvyzz4sl42',
     21             'ext': 'mp4',
     22             'title': 'Germany vs Brazil',
     23             'thumbnail': r're:http://.*\.jpg',
     24         },
     25     }]
     26 
     27     def _real_extract(self, url):
     28         video_id = self._match_id(url)
     29         webpage = self._download_webpage(url, video_id)
     30 
     31         if any(p in webpage for p in (
     32                 '>THIS FILE WAS DELETED<',
     33                 '>File Not Found<',
     34                 'The file you were looking for could not be found, sorry for any inconvenience.<',
     35                 '>The file was removed')):
     36             raise ExtractorError('Video %s does not exist' % video_id, expected=True)
     37 
     38         fields = self._hidden_inputs(webpage)
     39 
     40         if fields['op'] == 'download1':
     41             self._sleep(3, video_id)  # they do detect when requests happen too fast!
     42             post = urlencode_postdata(fields)
     43             req = sanitized_Request(url, post)
     44             req.add_header('Content-type', 'application/x-www-form-urlencoded')
     45             webpage = self._download_webpage(
     46                 req, video_id, 'Downloading video page')
     47 
     48         def extract_file_url(html, default=NO_DEFAULT):
     49             return self._search_regex(
     50                 r'file:\s*"(http[^\"]+)",', html, 'file url', default=default)
     51 
     52         video_url = extract_file_url(webpage, default=None)
     53 
     54         if not video_url:
     55             embed_url = self._search_regex(
     56                 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1',
     57                 webpage, 'embed url', group='url')
     58             embed_webpage = self._download_webpage(
     59                 embed_url, video_id, 'Downloading embed webpage')
     60             video_url = extract_file_url(embed_webpage)
     61             thumbnail_webpage = embed_webpage
     62         else:
     63             thumbnail_webpage = webpage
     64 
     65         title = self._search_regex(
     66             r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
     67         thumbnail = self._search_regex(
     68             r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)
     69 
     70         formats = [{
     71             'format_id': 'sd',
     72             'url': video_url,
     73         }]
     74 
     75         return {
     76             'id': video_id,
     77             'title': title,
     78             'thumbnail': thumbnail,
     79             'formats': formats,
     80         }