youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

patreon.py (5646B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     clean_html,
      7     determine_ext,
      8     int_or_none,
      9     KNOWN_EXTENSIONS,
     10     mimetype2ext,
     11     parse_iso8601,
     12     str_or_none,
     13     try_get,
     14 )
     15 
     16 
     17 class PatreonIE(InfoExtractor):
     18     _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
     19     _TESTS = [{
     20         'url': 'http://www.patreon.com/creation?hid=743933',
     21         'md5': 'e25505eec1053a6e6813b8ed369875cc',
     22         'info_dict': {
     23             'id': '743933',
     24             'ext': 'mp3',
     25             'title': 'Episode 166: David Smalley of Dogma Debate',
     26             'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
     27             'uploader': 'Cognitive Dissonance Podcast',
     28             'thumbnail': 're:^https?://.*$',
     29             'timestamp': 1406473987,
     30             'upload_date': '20140727',
     31             'uploader_id': '87145',
     32         },
     33     }, {
     34         'url': 'http://www.patreon.com/creation?hid=754133',
     35         'md5': '3eb09345bf44bf60451b8b0b81759d0a',
     36         'info_dict': {
     37             'id': '754133',
     38             'ext': 'mp3',
     39             'title': 'CD 167 Extra',
     40             'uploader': 'Cognitive Dissonance Podcast',
     41             'thumbnail': 're:^https?://.*$',
     42         },
     43         'skip': 'Patron-only content',
     44     }, {
     45         'url': 'https://www.patreon.com/creation?hid=1682498',
     46         'info_dict': {
     47             'id': 'SU4fj_aEMVw',
     48             'ext': 'mp4',
     49             'title': 'I\'m on Patreon!',
     50             'uploader': 'TraciJHines',
     51             'thumbnail': 're:^https?://.*$',
     52             'upload_date': '20150211',
     53             'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
     54             'uploader_id': 'TraciJHines',
     55         },
     56         'params': {
     57             'noplaylist': True,
     58             'skip_download': True,
     59         }
     60     }, {
     61         'url': 'https://www.patreon.com/posts/episode-166-of-743933',
     62         'only_matching': True,
     63     }, {
     64         'url': 'https://www.patreon.com/posts/743933',
     65         'only_matching': True,
     66     }]
     67 
     68     # Currently Patreon exposes download URL via hidden CSS, so login is not
     69     # needed. Keeping this commented for when this inevitably changes.
     70     '''
     71     def _login(self):
     72         username, password = self._get_login_info()
     73         if username is None:
     74             return
     75 
     76         login_form = {
     77             'redirectUrl': 'http://www.patreon.com/',
     78             'email': username,
     79             'password': password,
     80         }
     81 
     82         request = sanitized_Request(
     83             'https://www.patreon.com/processLogin',
     84             compat_urllib_parse_urlencode(login_form).encode('utf-8')
     85         )
     86         login_page = self._download_webpage(request, None, note='Logging in')
     87 
     88         if re.search(r'onLoginFailed', login_page):
     89             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
     90 
     91     def _real_initialize(self):
     92         self._login()
     93     '''
     94 
     95     def _real_extract(self, url):
     96         video_id = self._match_id(url)
     97         post = self._download_json(
     98             'https://www.patreon.com/api/posts/' + video_id, video_id, query={
     99                 'fields[media]': 'download_url,mimetype,size_bytes',
    100                 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
    101                 'fields[user]': 'full_name,url',
    102                 'json-api-use-default-includes': 'false',
    103                 'include': 'media,user',
    104             })
    105         attributes = post['data']['attributes']
    106         title = attributes['title'].strip()
    107         image = attributes.get('image') or {}
    108         info = {
    109             'id': video_id,
    110             'title': title,
    111             'description': clean_html(attributes.get('content')),
    112             'thumbnail': image.get('large_url') or image.get('url'),
    113             'timestamp': parse_iso8601(attributes.get('published_at')),
    114             'like_count': int_or_none(attributes.get('like_count')),
    115             'comment_count': int_or_none(attributes.get('comment_count')),
    116         }
    117 
    118         for i in post.get('included', []):
    119             i_type = i.get('type')
    120             if i_type == 'media':
    121                 media_attributes = i.get('attributes') or {}
    122                 download_url = media_attributes.get('download_url')
    123                 ext = mimetype2ext(media_attributes.get('mimetype'))
    124                 if download_url and ext in KNOWN_EXTENSIONS:
    125                     info.update({
    126                         'ext': ext,
    127                         'filesize': int_or_none(media_attributes.get('size_bytes')),
    128                         'url': download_url,
    129                     })
    130             elif i_type == 'user':
    131                 user_attributes = i.get('attributes')
    132                 if user_attributes:
    133                     info.update({
    134                         'uploader': user_attributes.get('full_name'),
    135                         'uploader_id': str_or_none(i.get('id')),
    136                         'uploader_url': user_attributes.get('url'),
    137                     })
    138 
    139         if not info.get('url'):
    140             embed_url = try_get(attributes, lambda x: x['embed']['url'])
    141             if embed_url:
    142                 info.update({
    143                     '_type': 'url',
    144                     'url': embed_url,
    145                 })
    146 
    147         if not info.get('url'):
    148             post_file = attributes['post_file']
    149             ext = determine_ext(post_file.get('name'))
    150             if ext in KNOWN_EXTENSIONS:
    151                 info.update({
    152                     'ext': ext,
    153                     'url': post_file['url'],
    154                 })
    155 
    156         return info