youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

bitchute.py (5326B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import itertools
      5 import re
      6 
      7 from .common import InfoExtractor
      8 from ..utils import (
      9     orderedSet,
     10     unified_strdate,
     11     urlencode_postdata,
     12 )
     13 
     14 
     15 class BitChuteIE(InfoExtractor):
     16     _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
     17     _TESTS = [{
     18         'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
     19         'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
     20         'info_dict': {
     21             'id': 'szoMrox2JEI',
     22             'ext': 'mp4',
     23             'title': 'Fuck bitches get money',
     24             'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
     25             'thumbnail': r're:^https?://.*\.jpg$',
     26             'uploader': 'Victoria X Rave',
     27             'upload_date': '20170813',
     28         },
     29     }, {
     30         'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
     31         'only_matching': True,
     32     }, {
     33         'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
     34         'only_matching': True,
     35     }]
     36 
     37     def _real_extract(self, url):
     38         video_id = self._match_id(url)
     39 
     40         webpage = self._download_webpage(
     41             'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
     42                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
     43             })
     44 
     45         title = self._html_search_regex(
     46             (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
     47             webpage, 'title', default=None) or self._html_search_meta(
     48             'description', webpage, 'title',
     49             default=None) or self._og_search_description(webpage)
     50 
     51         format_urls = []
     52         for mobj in re.finditer(
     53                 r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
     54             format_urls.append(mobj.group('url'))
     55         format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
     56 
     57         formats = [
     58             {'url': format_url}
     59             for format_url in orderedSet(format_urls)]
     60 
     61         if not formats:
     62             formats = self._parse_html5_media_entries(
     63                 url, webpage, video_id)[0]['formats']
     64 
     65         self._check_formats(formats, video_id)
     66         self._sort_formats(formats)
     67 
     68         description = self._html_search_regex(
     69             r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
     70             webpage, 'description', fatal=False)
     71         thumbnail = self._og_search_thumbnail(
     72             webpage, default=None) or self._html_search_meta(
     73             'twitter:image:src', webpage, 'thumbnail')
     74         uploader = self._html_search_regex(
     75             (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
     76              r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
     77             webpage, 'uploader', fatal=False)
     78 
     79         upload_date = unified_strdate(self._search_regex(
     80             r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
     81             webpage, 'upload date', fatal=False))
     82 
     83         return {
     84             'id': video_id,
     85             'title': title,
     86             'description': description,
     87             'thumbnail': thumbnail,
     88             'uploader': uploader,
     89             'upload_date': upload_date,
     90             'formats': formats,
     91         }
     92 
     93 
     94 class BitChuteChannelIE(InfoExtractor):
     95     _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
     96     _TEST = {
     97         'url': 'https://www.bitchute.com/channel/victoriaxrave/',
     98         'playlist_mincount': 185,
     99         'info_dict': {
    100             'id': 'victoriaxrave',
    101         },
    102     }
    103 
    104     _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
    105 
    106     def _entries(self, channel_id):
    107         channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
    108         offset = 0
    109         for page_num in itertools.count(1):
    110             data = self._download_json(
    111                 '%sextend/' % channel_url, channel_id,
    112                 'Downloading channel page %d' % page_num,
    113                 data=urlencode_postdata({
    114                     'csrfmiddlewaretoken': self._TOKEN,
    115                     'name': '',
    116                     'offset': offset,
    117                 }), headers={
    118                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    119                     'Referer': channel_url,
    120                     'X-Requested-With': 'XMLHttpRequest',
    121                     'Cookie': 'csrftoken=%s' % self._TOKEN,
    122                 })
    123             if data.get('success') is False:
    124                 break
    125             html = data.get('html')
    126             if not html:
    127                 break
    128             video_ids = re.findall(
    129                 r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
    130                 html)
    131             if not video_ids:
    132                 break
    133             offset += len(video_ids)
    134             for video_id in video_ids:
    135                 yield self.url_result(
    136                     'https://www.bitchute.com/video/%s' % video_id,
    137                     ie=BitChuteIE.ie_key(), video_id=video_id)
    138 
    139     def _real_extract(self, url):
    140         channel_id = self._match_id(url)
    141         return self.playlist_result(
    142             self._entries(channel_id), playlist_id=channel_id)