youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

vuclip.py (2254B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..compat import (
      7     compat_urllib_parse_urlparse,
      8 )
      9 from ..utils import (
     10     ExtractorError,
     11     parse_duration,
     12     remove_end,
     13 )
     14 
     15 
     16 class VuClipIE(InfoExtractor):
     17     _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
     18 
     19     _TEST = {
     20         'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247&section=recommend',
     21         'info_dict': {
     22             'id': '1129900602',
     23             'ext': '3gp',
     24             'title': 'Top 10 TV Convicts',
     25             'duration': 733,
     26         }
     27     }
     28 
     29     def _real_extract(self, url):
     30         video_id = self._match_id(url)
     31         webpage = self._download_webpage(url, video_id)
     32 
     33         ad_m = re.search(
     34             r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
     35         if ad_m:
     36             urlr = compat_urllib_parse_urlparse(url)
     37             adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
     38             webpage = self._download_webpage(
     39                 adfree_url, video_id, note='Download post-ad page')
     40 
     41         error_msg = self._html_search_regex(
     42             r'<p class="message">(.*?)</p>', webpage, 'error message',
     43             default=None)
     44         if error_msg:
     45             raise ExtractorError(
     46                 '%s said: %s' % (self.IE_NAME, error_msg), expected=True)
     47 
     48         # These clowns alternate between two page types
     49         video_url = self._search_regex(
     50             r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif',
     51             webpage, 'video URL', default=None)
     52         if video_url:
     53             formats = [{
     54                 'url': video_url,
     55             }]
     56         else:
     57             formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats']
     58 
     59         title = remove_end(self._html_search_regex(
     60             r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video')
     61 
     62         duration = parse_duration(self._html_search_regex(
     63             r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False))
     64 
     65         return {
     66             'id': video_id,
     67             'formats': formats,
     68             'title': title,
     69             'duration': duration,
     70         }