youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

manyvids.py (3121B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     determine_ext,
      7     int_or_none,
      8     str_to_int,
      9     urlencode_postdata,
     10 )
     11 
     12 
     13 class ManyVidsIE(InfoExtractor):
     14     _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
     15     _TESTS = [{
     16         # preview video
     17         'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
     18         'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
     19         'info_dict': {
     20             'id': '133957',
     21             'ext': 'mp4',
     22             'title': 'everthing about me (Preview)',
     23             'view_count': int,
     24             'like_count': int,
     25         },
     26     }, {
     27         # full video
     28         'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
     29         'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
     30         'info_dict': {
     31             'id': '935718',
     32             'ext': 'mp4',
     33             'title': 'MY FACE REVEAL',
     34             'view_count': int,
     35             'like_count': int,
     36         },
     37     }]
     38 
     39     def _real_extract(self, url):
     40         video_id = self._match_id(url)
     41 
     42         webpage = self._download_webpage(url, video_id)
     43 
     44         video_url = self._search_regex(
     45             r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
     46             webpage, 'video URL', group='url')
     47 
     48         title = self._html_search_regex(
     49             (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
     50              r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
     51             webpage, 'title', default=None) or self._html_search_meta(
     52             'twitter:title', webpage, 'title', fatal=True)
     53 
     54         if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
     55             title += ' (Preview)'
     56 
     57         mv_token = self._search_regex(
     58             r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
     59             'mv token', default=None, group='value')
     60 
     61         if mv_token:
     62             # Sets some cookies
     63             self._download_webpage(
     64                 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
     65                 video_id, fatal=False, data=urlencode_postdata({
     66                     'mvtoken': mv_token,
     67                     'vid': video_id,
     68                 }), headers={
     69                     'Referer': url,
     70                     'X-Requested-With': 'XMLHttpRequest'
     71                 })
     72 
     73         if determine_ext(video_url) == 'm3u8':
     74             formats = self._extract_m3u8_formats(
     75                 video_url, video_id, 'mp4', entry_protocol='m3u8_native',
     76                 m3u8_id='hls')
     77         else:
     78             formats = [{'url': video_url}]
     79 
     80         like_count = int_or_none(self._search_regex(
     81             r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
     82         view_count = str_to_int(self._html_search_regex(
     83             r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
     84             'view count', default=None))
     85 
     86         return {
     87             'id': video_id,
     88             'title': title,
     89             'view_count': view_count,
     90             'like_count': like_count,
     91             'formats': formats,
     92         }