youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

shared.py (4385B)


      1 from __future__ import unicode_literals
      2 
      3 from .common import InfoExtractor
      4 from ..compat import (
      5     compat_b64decode,
      6     compat_urllib_parse_unquote_plus,
      7 )
      8 from ..utils import (
      9     determine_ext,
     10     ExtractorError,
     11     int_or_none,
     12     js_to_json,
     13     KNOWN_EXTENSIONS,
     14     parse_filesize,
     15     rot47,
     16     url_or_none,
     17     urlencode_postdata,
     18 )
     19 
     20 
     21 class SharedBaseIE(InfoExtractor):
     22     def _real_extract(self, url):
     23         video_id = self._match_id(url)
     24 
     25         webpage, urlh = self._download_webpage_handle(url, video_id)
     26 
     27         if self._FILE_NOT_FOUND in webpage:
     28             raise ExtractorError(
     29                 'Video %s does not exist' % video_id, expected=True)
     30 
     31         video_url = self._extract_video_url(webpage, video_id, url)
     32 
     33         title = self._extract_title(webpage)
     34         filesize = int_or_none(self._extract_filesize(webpage))
     35 
     36         return {
     37             'id': video_id,
     38             'url': video_url,
     39             'ext': 'mp4',
     40             'filesize': filesize,
     41             'title': title,
     42         }
     43 
     44     def _extract_title(self, webpage):
     45         return compat_b64decode(self._html_search_meta(
     46             'full:title', webpage, 'title')).decode('utf-8')
     47 
     48     def _extract_filesize(self, webpage):
     49         return self._html_search_meta(
     50             'full:size', webpage, 'file size', fatal=False)
     51 
     52 
     53 class SharedIE(SharedBaseIE):
     54     IE_DESC = 'shared.sx'
     55     _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
     56     _FILE_NOT_FOUND = '>File does not exist<'
     57 
     58     _TEST = {
     59         'url': 'http://shared.sx/0060718775',
     60         'md5': '106fefed92a8a2adb8c98e6a0652f49b',
     61         'info_dict': {
     62             'id': '0060718775',
     63             'ext': 'mp4',
     64             'title': 'Bmp4',
     65             'filesize': 1720110,
     66         },
     67     }
     68 
     69     def _extract_video_url(self, webpage, video_id, url):
     70         download_form = self._hidden_inputs(webpage)
     71 
     72         video_page = self._download_webpage(
     73             url, video_id, 'Downloading video page',
     74             data=urlencode_postdata(download_form),
     75             headers={
     76                 'Content-Type': 'application/x-www-form-urlencoded',
     77                 'Referer': url,
     78             })
     79 
     80         video_url = self._html_search_regex(
     81             r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
     82             video_page, 'video URL', group='url')
     83 
     84         return video_url
     85 
     86 
     87 class VivoIE(SharedBaseIE):
     88     IE_DESC = 'vivo.sx'
     89     _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
     90     _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
     91 
     92     _TESTS = [{
     93         'url': 'http://vivo.sx/d7ddda0e78',
     94         'md5': '15b3af41be0b4fe01f4df075c2678b2c',
     95         'info_dict': {
     96             'id': 'd7ddda0e78',
     97             'ext': 'mp4',
     98             'title': 'Chicken',
     99             'filesize': 515659,
    100         },
    101     }, {
    102         'url': 'http://vivo.st/d7ddda0e78',
    103         'only_matching': True,
    104     }]
    105 
    106     def _extract_title(self, webpage):
    107         title = self._html_search_regex(
    108             r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
    109             'title', default=None, group='title')
    110         if title:
    111             ext = determine_ext(title)
    112             if ext.lower() in KNOWN_EXTENSIONS:
    113                 title = title.rpartition('.' + ext)[0]
    114             return title
    115         return self._og_search_title(webpage)
    116 
    117     def _extract_filesize(self, webpage):
    118         return parse_filesize(self._search_regex(
    119             r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
    120             webpage, 'filesize', fatal=False))
    121 
    122     def _extract_video_url(self, webpage, video_id, url):
    123         def decode_url_old(encoded_url):
    124             return compat_b64decode(encoded_url).decode('utf-8')
    125 
    126         stream_url = self._search_regex(
    127             r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
    128             'stream url', default=None, group='url')
    129         if stream_url:
    130             stream_url = url_or_none(decode_url_old(stream_url))
    131         if stream_url:
    132             return stream_url
    133 
    134         def decode_url(encoded_url):
    135             return rot47(compat_urllib_parse_unquote_plus(encoded_url))
    136 
    137         return decode_url(self._parse_json(
    138             self._search_regex(
    139                 r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
    140                 'stream'),
    141             video_id, transform_source=js_to_json)['source'])