youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

reddit.py (5331B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     ExtractorError,
      8     int_or_none,
      9     float_or_none,
     10     try_get,
     11     unescapeHTML,
     12     url_or_none,
     13 )
     14 
     15 
     16 class RedditIE(InfoExtractor):
     17     _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
     18     _TEST = {
     19         # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
     20         'url': 'https://v.redd.it/zv89llsvexdz',
     21         'md5': '0a070c53eba7ec4534d95a5a1259e253',
     22         'info_dict': {
     23             'id': 'zv89llsvexdz',
     24             'ext': 'mp4',
     25             'title': 'zv89llsvexdz',
     26         },
     27         'params': {
     28             'format': 'bestvideo',
     29         },
     30     }
     31 
     32     def _real_extract(self, url):
     33         video_id = self._match_id(url)
     34 
     35         formats = self._extract_m3u8_formats(
     36             'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
     37             'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
     38 
     39         formats.extend(self._extract_mpd_formats(
     40             'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
     41             mpd_id='dash', fatal=False))
     42 
     43         self._sort_formats(formats)
     44 
     45         return {
     46             'id': video_id,
     47             'title': video_id,
     48             'formats': formats,
     49         }
     50 
     51 
     52 class RedditRIE(InfoExtractor):
     53     _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
     54     _TESTS = [{
     55         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
     56         'info_dict': {
     57             'id': 'zv89llsvexdz',
     58             'ext': 'mp4',
     59             'title': 'That small heart attack.',
     60             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
     61             'thumbnails': 'count:4',
     62             'timestamp': 1501941939,
     63             'upload_date': '20170805',
     64             'uploader': 'Antw87',
     65             'duration': 12,
     66             'like_count': int,
     67             'dislike_count': int,
     68             'comment_count': int,
     69             'age_limit': 0,
     70         },
     71         'params': {
     72             'format': 'bestvideo',
     73             'skip_download': True,
     74         },
     75     }, {
     76         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
     77         'only_matching': True,
     78     }, {
     79         # imgur
     80         'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
     81         'only_matching': True,
     82     }, {
     83         # imgur @ old reddit
     84         'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
     85         'only_matching': True,
     86     }, {
     87         # streamable
     88         'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
     89         'only_matching': True,
     90     }, {
     91         # youtube
     92         'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
     93         'only_matching': True,
     94     }, {
     95         # reddit video @ nm reddit
     96         'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
     97         'only_matching': True,
     98     }]
     99 
    100     def _real_extract(self, url):
    101         mobj = re.match(self._VALID_URL, url)
    102         url, video_id = mobj.group('url', 'id')
    103 
    104         video_id = self._match_id(url)
    105 
    106         data = self._download_json(
    107             url + '/.json', video_id)[0]['data']['children'][0]['data']
    108 
    109         video_url = data['url']
    110 
    111         # Avoid recursing into the same reddit URL
    112         if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
    113             raise ExtractorError('No media found', expected=True)
    114 
    115         over_18 = data.get('over_18')
    116         if over_18 is True:
    117             age_limit = 18
    118         elif over_18 is False:
    119             age_limit = 0
    120         else:
    121             age_limit = None
    122 
    123         thumbnails = []
    124 
    125         def add_thumbnail(src):
    126             if not isinstance(src, dict):
    127                 return
    128             thumbnail_url = url_or_none(src.get('url'))
    129             if not thumbnail_url:
    130                 return
    131             thumbnails.append({
    132                 'url': unescapeHTML(thumbnail_url),
    133                 'width': int_or_none(src.get('width')),
    134                 'height': int_or_none(src.get('height')),
    135             })
    136 
    137         for image in try_get(data, lambda x: x['preview']['images']) or []:
    138             if not isinstance(image, dict):
    139                 continue
    140             add_thumbnail(image.get('source'))
    141             resolutions = image.get('resolutions')
    142             if isinstance(resolutions, list):
    143                 for resolution in resolutions:
    144                     add_thumbnail(resolution)
    145 
    146         return {
    147             '_type': 'url_transparent',
    148             'url': video_url,
    149             'title': data.get('title'),
    150             'thumbnails': thumbnails,
    151             'timestamp': float_or_none(data.get('created_utc')),
    152             'uploader': data.get('author'),
    153             'duration': int_or_none(try_get(
    154                 data,
    155                 (lambda x: x['media']['reddit_video']['duration'],
    156                  lambda x: x['secure_media']['reddit_video']['duration']))),
    157             'like_count': int_or_none(data.get('ups')),
    158             'dislike_count': int_or_none(data.get('downs')),
    159             'comment_count': int_or_none(data.get('num_comments')),
    160             'age_limit': age_limit,
    161         }