twitch.py - youtube-dl - Another place where youtube-dl lives on

twitch.py (34150B)
      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import collections
      5 import itertools
      6 import json
      7 import random
      8 import re
      9 
     10 from .common import InfoExtractor
     11 from ..compat import (
     12     compat_parse_qs,
     13     compat_str,
     14     compat_urlparse,
     15     compat_urllib_parse_urlencode,
     16     compat_urllib_parse_urlparse,
     17 )
     18 from ..utils import (
     19     clean_html,
     20     dict_get,
     21     ExtractorError,
     22     float_or_none,
     23     int_or_none,
     24     parse_duration,
     25     parse_iso8601,
     26     qualities,
     27     try_get,
     28     unified_timestamp,
     29     update_url_query,
     30     url_or_none,
     31     urljoin,
     32 )
     33 
     34 
     35 class TwitchBaseIE(InfoExtractor):
     36     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
     37 
     38     _API_BASE = 'https://api.twitch.tv'
     39     _USHER_BASE = 'https://usher.ttvnw.net'
     40     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
     41     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
     42     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
     43     _NETRC_MACHINE = 'twitch'
     44 
     45     _OPERATION_HASHES = {
     46         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
     47         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
     48         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
     49         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
     50         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
     51         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
     52         'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
     53         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
     54         'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
     55     }
     56 
     57     def _real_initialize(self):
     58         self._login()
     59 
     60     def _login(self):
     61         username, password = self._get_login_info()
     62         if username is None:
     63             return
     64 
     65         def fail(message):
     66             raise ExtractorError(
     67                 'Unable to login. Twitch said: %s' % message, expected=True)
     68 
     69         def login_step(page, urlh, note, data):
     70             form = self._hidden_inputs(page)
     71             form.update(data)
     72 
     73             page_url = urlh.geturl()
     74             post_url = self._search_regex(
     75                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
     76                 'post url', default=self._LOGIN_POST_URL, group='url')
     77             post_url = urljoin(page_url, post_url)
     78 
     79             headers = {
     80                 'Referer': page_url,
     81                 'Origin': 'https://www.twitch.tv',
     82                 'Content-Type': 'text/plain;charset=UTF-8',
     83             }
     84 
     85             response = self._download_json(
     86                 post_url, None, note, data=json.dumps(form).encode(),
     87                 headers=headers, expected_status=400)
     88             error = dict_get(response, ('error', 'error_description', 'error_code'))
     89             if error:
     90                 fail(error)
     91 
     92             if 'Authenticated successfully' in response.get('message', ''):
     93                 return None, None
     94 
     95             redirect_url = urljoin(
     96                 post_url,
     97                 response.get('redirect') or response['redirect_path'])
     98             return self._download_webpage_handle(
     99                 redirect_url, None, 'Downloading login redirect page',
    100                 headers=headers)
    101 
    102         login_page, handle = self._download_webpage_handle(
    103             self._LOGIN_FORM_URL, None, 'Downloading login page')
    104 
    105         # Some TOR nodes and public proxies are blocked completely
    106         if 'blacklist_message' in login_page:
    107             fail(clean_html(login_page))
    108 
    109         redirect_page, handle = login_step(
    110             login_page, handle, 'Logging in', {
    111                 'username': username,
    112                 'password': password,
    113                 'client_id': self._CLIENT_ID,
    114             })
    115 
    116         # Successful login
    117         if not redirect_page:
    118             return
    119 
    120         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
    121             # TODO: Add mechanism to request an SMS or phone call
    122             tfa_token = self._get_tfa_info('two-factor authentication token')
    123             login_step(redirect_page, handle, 'Submitting TFA token', {
    124                 'authy_token': tfa_token,
    125                 'remember_2fa': 'true',
    126             })
    127 
    128     def _prefer_source(self, formats):
    129         try:
    130             source = next(f for f in formats if f['format_id'] == 'Source')
    131             source['quality'] = 10
    132         except StopIteration:
    133             for f in formats:
    134                 if '/chunked/' in f['url']:
    135                     f.update({
    136                         'quality': 10,
    137                         'format_note': 'Source',
    138                     })
    139         self._sort_formats(formats)
    140 
    141     def _download_base_gql(self, video_id, ops, note, fatal=True):
    142         headers = {
    143             'Content-Type': 'text/plain;charset=UTF-8',
    144             'Client-ID': self._CLIENT_ID,
    145         }
    146         gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
    147         if gql_auth:
    148             headers['Authorization'] = 'OAuth ' + gql_auth.value
    149         return self._download_json(
    150             'https://gql.twitch.tv/gql', video_id, note,
    151             data=json.dumps(ops).encode(),
    152             headers=headers, fatal=fatal)
    153 
    154     def _download_gql(self, video_id, ops, note, fatal=True):
    155         for op in ops:
    156             op['extensions'] = {
    157                 'persistedQuery': {
    158                     'version': 1,
    159                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
    160                 }
    161             }
    162         return self._download_base_gql(video_id, ops, note)
    163 
    164     def _download_access_token(self, video_id, token_kind, param_name):
    165         method = '%sPlaybackAccessToken' % token_kind
    166         ops = {
    167             'query': '''{
    168               %s(
    169                 %s: "%s",
    170                 params: {
    171                   platform: "web",
    172                   playerBackend: "mediaplayer",
    173                   playerType: "site"
    174                 }
    175               )
    176               {
    177                 value
    178                 signature
    179               }
    180             }''' % (method, param_name, video_id),
    181         }
    182         return self._download_base_gql(
    183             video_id, ops,
    184             'Downloading %s access token GraphQL' % token_kind)['data'][method]
    185 
    186 
    187 class TwitchVodIE(TwitchBaseIE):
    188     IE_NAME = 'twitch:vod'
    189     _VALID_URL = r'''(?x)
    190                     https?://
    191                         (?:
    192                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
    193                             player\.twitch\.tv/\?.*?\bvideo=v?
    194                         )
    195                         (?P<id>\d+)
    196                     '''
    197 
    198     _TESTS = [{
    199         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
    200         'info_dict': {
    201             'id': 'v6528877',
    202             'ext': 'mp4',
    203             'title': 'LCK Summer Split - Week 6 Day 1',
    204             'thumbnail': r're:^https?://.*\.jpg$',
    205             'duration': 17208,
    206             'timestamp': 1435131734,
    207             'upload_date': '20150624',
    208             'uploader': 'Riot Games',
    209             'uploader_id': 'riotgames',
    210             'view_count': int,
    211             'start_time': 310,
    212         },
    213         'params': {
    214             # m3u8 download
    215             'skip_download': True,
    216         },
    217     }, {
    218         # Untitled broadcast (title is None)
    219         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
    220         'info_dict': {
    221             'id': 'v11230755',
    222             'ext': 'mp4',
    223             'title': 'Untitled Broadcast',
    224             'thumbnail': r're:^https?://.*\.jpg$',
    225             'duration': 1638,
    226             'timestamp': 1439746708,
    227             'upload_date': '20150816',
    228             'uploader': 'BelkAO_o',
    229             'uploader_id': 'belkao_o',
    230             'view_count': int,
    231         },
    232         'params': {
    233             # m3u8 download
    234             'skip_download': True,
    235         },
    236         'skip': 'HTTP Error 404: Not Found',
    237     }, {
    238         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
    239         'only_matching': True,
    240     }, {
    241         'url': 'https://www.twitch.tv/videos/6528877',
    242         'only_matching': True,
    243     }, {
    244         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
    245         'only_matching': True,
    246     }, {
    247         'url': 'https://www.twitch.tv/northernlion/video/291940395',
    248         'only_matching': True,
    249     }, {
    250         'url': 'https://player.twitch.tv/?video=480452374',
    251         'only_matching': True,
    252     }]
    253 
    254     def _download_info(self, item_id):
    255         data = self._download_gql(
    256             item_id, [{
    257                 'operationName': 'VideoMetadata',
    258                 'variables': {
    259                     'channelLogin': '',
    260                     'videoID': item_id,
    261                 },
    262             }],
    263             'Downloading stream metadata GraphQL')[0]['data']
    264         video = data.get('video')
    265         if video is None:
    266             raise ExtractorError(
    267                 'Video %s does not exist' % item_id, expected=True)
    268         return self._extract_info_gql(video, item_id)
    269 
    270     @staticmethod
    271     def _extract_info(info):
    272         status = info.get('status')
    273         if status == 'recording':
    274             is_live = True
    275         elif status == 'recorded':
    276             is_live = False
    277         else:
    278             is_live = None
    279         _QUALITIES = ('small', 'medium', 'large')
    280         quality_key = qualities(_QUALITIES)
    281         thumbnails = []
    282         preview = info.get('preview')
    283         if isinstance(preview, dict):
    284             for thumbnail_id, thumbnail_url in preview.items():
    285                 thumbnail_url = url_or_none(thumbnail_url)
    286                 if not thumbnail_url:
    287                     continue
    288                 if thumbnail_id not in _QUALITIES:
    289                     continue
    290                 thumbnails.append({
    291                     'url': thumbnail_url,
    292                     'preference': quality_key(thumbnail_id),
    293                 })
    294         return {
    295             'id': info['_id'],
    296             'title': info.get('title') or 'Untitled Broadcast',
    297             'description': info.get('description'),
    298             'duration': int_or_none(info.get('length')),
    299             'thumbnails': thumbnails,
    300             'uploader': info.get('channel', {}).get('display_name'),
    301             'uploader_id': info.get('channel', {}).get('name'),
    302             'timestamp': parse_iso8601(info.get('recorded_at')),
    303             'view_count': int_or_none(info.get('views')),
    304             'is_live': is_live,
    305         }
    306 
    307     @staticmethod
    308     def _extract_info_gql(info, item_id):
    309         vod_id = info.get('id') or item_id
    310         # id backward compatibility for download archives
    311         if vod_id[0] != 'v':
    312             vod_id = 'v%s' % vod_id
    313         thumbnail = url_or_none(info.get('previewThumbnailURL'))
    314         if thumbnail:
    315             for p in ('width', 'height'):
    316                 thumbnail = thumbnail.replace('{%s}' % p, '0')
    317         return {
    318             'id': vod_id,
    319             'title': info.get('title') or 'Untitled Broadcast',
    320             'description': info.get('description'),
    321             'duration': int_or_none(info.get('lengthSeconds')),
    322             'thumbnail': thumbnail,
    323             'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
    324             'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
    325             'timestamp': unified_timestamp(info.get('publishedAt')),
    326             'view_count': int_or_none(info.get('viewCount')),
    327         }
    328 
    329     def _real_extract(self, url):
    330         vod_id = self._match_id(url)
    331 
    332         info = self._download_info(vod_id)
    333         access_token = self._download_access_token(vod_id, 'video', 'id')
    334 
    335         formats = self._extract_m3u8_formats(
    336             '%s/vod/%s.m3u8?%s' % (
    337                 self._USHER_BASE, vod_id,
    338                 compat_urllib_parse_urlencode({
    339                     'allow_source': 'true',
    340                     'allow_audio_only': 'true',
    341                     'allow_spectre': 'true',
    342                     'player': 'twitchweb',
    343                     'playlist_include_framerate': 'true',
    344                     'nauth': access_token['value'],
    345                     'nauthsig': access_token['signature'],
    346                 })),
    347             vod_id, 'mp4', entry_protocol='m3u8_native')
    348 
    349         self._prefer_source(formats)
    350         info['formats'] = formats
    351 
    352         parsed_url = compat_urllib_parse_urlparse(url)
    353         query = compat_parse_qs(parsed_url.query)
    354         if 't' in query:
    355             info['start_time'] = parse_duration(query['t'][0])
    356 
    357         if info.get('timestamp') is not None:
    358             info['subtitles'] = {
    359                 'rechat': [{
    360                     'url': update_url_query(
    361                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
    362                             'client_id': self._CLIENT_ID,
    363                         }),
    364                     'ext': 'json',
    365                 }],
    366             }
    367 
    368         return info
    369 
    370 
    371 def _make_video_result(node):
    372     assert isinstance(node, dict)
    373     video_id = node.get('id')
    374     if not video_id:
    375         return
    376     return {
    377         '_type': 'url_transparent',
    378         'ie_key': TwitchVodIE.ie_key(),
    379         'id': video_id,
    380         'url': 'https://www.twitch.tv/videos/%s' % video_id,
    381         'title': node.get('title'),
    382         'thumbnail': node.get('previewThumbnailURL'),
    383         'duration': float_or_none(node.get('lengthSeconds')),
    384         'view_count': int_or_none(node.get('viewCount')),
    385     }
    386 
    387 
    388 class TwitchCollectionIE(TwitchBaseIE):
    389     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
    390 
    391     _TESTS = [{
    392         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
    393         'info_dict': {
    394             'id': 'wlDCoH0zEBZZbQ',
    395             'title': 'Overthrow Nook, capitalism for children',
    396         },
    397         'playlist_mincount': 13,
    398     }]
    399 
    400     _OPERATION_NAME = 'CollectionSideBar'
    401 
    402     def _real_extract(self, url):
    403         collection_id = self._match_id(url)
    404         collection = self._download_gql(
    405             collection_id, [{
    406                 'operationName': self._OPERATION_NAME,
    407                 'variables': {'collectionID': collection_id},
    408             }],
    409             'Downloading collection GraphQL')[0]['data']['collection']
    410         title = collection.get('title')
    411         entries = []
    412         for edge in collection['items']['edges']:
    413             if not isinstance(edge, dict):
    414                 continue
    415             node = edge.get('node')
    416             if not isinstance(node, dict):
    417                 continue
    418             video = _make_video_result(node)
    419             if video:
    420                 entries.append(video)
    421         return self.playlist_result(
    422             entries, playlist_id=collection_id, playlist_title=title)
    423 
    424 
    425 class TwitchPlaylistBaseIE(TwitchBaseIE):
    426     _PAGE_LIMIT = 100
    427 
    428     def _entries(self, channel_name, *args):
    429         cursor = None
    430         variables_common = self._make_variables(channel_name, *args)
    431         entries_key = '%ss' % self._ENTRY_KIND
    432         for page_num in itertools.count(1):
    433             variables = variables_common.copy()
    434             variables['limit'] = self._PAGE_LIMIT
    435             if cursor:
    436                 variables['cursor'] = cursor
    437             page = self._download_gql(
    438                 channel_name, [{
    439                     'operationName': self._OPERATION_NAME,
    440                     'variables': variables,
    441                 }],
    442                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
    443                 fatal=False)
    444             if not page:
    445                 break
    446             edges = try_get(
    447                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
    448             if not edges:
    449                 break
    450             for edge in edges:
    451                 if not isinstance(edge, dict):
    452                     continue
    453                 if edge.get('__typename') != self._EDGE_KIND:
    454                     continue
    455                 node = edge.get('node')
    456                 if not isinstance(node, dict):
    457                     continue
    458                 if node.get('__typename') != self._NODE_KIND:
    459                     continue
    460                 entry = self._extract_entry(node)
    461                 if entry:
    462                     cursor = edge.get('cursor')
    463                     yield entry
    464             if not cursor or not isinstance(cursor, compat_str):
    465                 break
    466 
    467 
    468 class TwitchVideosIE(TwitchPlaylistBaseIE):
    469     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
    470 
    471     _TESTS = [{
    472         # All Videos sorted by Date
    473         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
    474         'info_dict': {
    475             'id': 'spamfish',
    476             'title': 'spamfish - All Videos sorted by Date',
    477         },
    478         'playlist_mincount': 924,
    479     }, {
    480         # All Videos sorted by Popular
    481         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
    482         'info_dict': {
    483             'id': 'spamfish',
    484             'title': 'spamfish - All Videos sorted by Popular',
    485         },
    486         'playlist_mincount': 931,
    487     }, {
    488         # Past Broadcasts sorted by Date
    489         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
    490         'info_dict': {
    491             'id': 'spamfish',
    492             'title': 'spamfish - Past Broadcasts sorted by Date',
    493         },
    494         'playlist_mincount': 27,
    495     }, {
    496         # Highlights sorted by Date
    497         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
    498         'info_dict': {
    499             'id': 'spamfish',
    500             'title': 'spamfish - Highlights sorted by Date',
    501         },
    502         'playlist_mincount': 901,
    503     }, {
    504         # Uploads sorted by Date
    505         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
    506         'info_dict': {
    507             'id': 'esl_csgo',
    508             'title': 'esl_csgo - Uploads sorted by Date',
    509         },
    510         'playlist_mincount': 5,
    511     }, {
    512         # Past Premieres sorted by Date
    513         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
    514         'info_dict': {
    515             'id': 'spamfish',
    516             'title': 'spamfish - Past Premieres sorted by Date',
    517         },
    518         'playlist_mincount': 1,
    519     }, {
    520         'url': 'https://www.twitch.tv/spamfish/videos/all',
    521         'only_matching': True,
    522     }, {
    523         'url': 'https://m.twitch.tv/spamfish/videos/all',
    524         'only_matching': True,
    525     }, {
    526         'url': 'https://www.twitch.tv/spamfish/videos',
    527         'only_matching': True,
    528     }]
    529 
    530     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
    531 
    532     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
    533     _BROADCASTS = {
    534         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
    535         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
    536         'uploads': Broadcast('UPLOAD', 'Uploads'),
    537         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
    538         'all': _DEFAULT_BROADCAST,
    539     }
    540 
    541     _DEFAULT_SORTED_BY = 'Date'
    542     _SORTED_BY = {
    543         'time': _DEFAULT_SORTED_BY,
    544         'views': 'Popular',
    545     }
    546 
    547     _OPERATION_NAME = 'FilterableVideoTower_Videos'
    548     _ENTRY_KIND = 'video'
    549     _EDGE_KIND = 'VideoEdge'
    550     _NODE_KIND = 'Video'
    551 
    552     @classmethod
    553     def suitable(cls, url):
    554         return (False
    555                 if any(ie.suitable(url) for ie in (
    556                     TwitchVideosClipsIE,
    557                     TwitchVideosCollectionsIE))
    558                 else super(TwitchVideosIE, cls).suitable(url))
    559 
    560     @staticmethod
    561     def _make_variables(channel_name, broadcast_type, sort):
    562         return {
    563             'channelOwnerLogin': channel_name,
    564             'broadcastType': broadcast_type,
    565             'videoSort': sort.upper(),
    566         }
    567 
    568     @staticmethod
    569     def _extract_entry(node):
    570         return _make_video_result(node)
    571 
    572     def _real_extract(self, url):
    573         channel_name = self._match_id(url)
    574         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
    575         filter = qs.get('filter', ['all'])[0]
    576         sort = qs.get('sort', ['time'])[0]
    577         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
    578         return self.playlist_result(
    579             self._entries(channel_name, broadcast.type, sort),
    580             playlist_id=channel_name,
    581             playlist_title='%s - %s sorted by %s'
    582             % (channel_name, broadcast.label,
    583                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
    584 
    585 
    586 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
    587     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
    588 
    589     _TESTS = [{
    590         # Clips
    591         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
    592         'info_dict': {
    593             'id': 'vanillatv',
    594             'title': 'vanillatv - Clips Top All',
    595         },
    596         'playlist_mincount': 1,
    597     }, {
    598         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
    599         'only_matching': True,
    600     }]
    601 
    602     Clip = collections.namedtuple('Clip', ['filter', 'label'])
    603 
    604     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
    605     _RANGE = {
    606         '24hr': Clip('LAST_DAY', 'Top 24H'),
    607         '7d': _DEFAULT_CLIP,
    608         '30d': Clip('LAST_MONTH', 'Top 30D'),
    609         'all': Clip('ALL_TIME', 'Top All'),
    610     }
    611 
    612     # NB: values other than 20 result in skipped videos
    613     _PAGE_LIMIT = 20
    614 
    615     _OPERATION_NAME = 'ClipsCards__User'
    616     _ENTRY_KIND = 'clip'
    617     _EDGE_KIND = 'ClipEdge'
    618     _NODE_KIND = 'Clip'
    619 
    620     @staticmethod
    621     def _make_variables(channel_name, filter):
    622         return {
    623             'login': channel_name,
    624             'criteria': {
    625                 'filter': filter,
    626             },
    627         }
    628 
    629     @staticmethod
    630     def _extract_entry(node):
    631         assert isinstance(node, dict)
    632         clip_url = url_or_none(node.get('url'))
    633         if not clip_url:
    634             return
    635         return {
    636             '_type': 'url_transparent',
    637             'ie_key': TwitchClipsIE.ie_key(),
    638             'id': node.get('id'),
    639             'url': clip_url,
    640             'title': node.get('title'),
    641             'thumbnail': node.get('thumbnailURL'),
    642             'duration': float_or_none(node.get('durationSeconds')),
    643             'timestamp': unified_timestamp(node.get('createdAt')),
    644             'view_count': int_or_none(node.get('viewCount')),
    645             'language': node.get('language'),
    646         }
    647 
    648     def _real_extract(self, url):
    649         channel_name = self._match_id(url)
    650         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
    651         range = qs.get('range', ['7d'])[0]
    652         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
    653         return self.playlist_result(
    654             self._entries(channel_name, clip.filter),
    655             playlist_id=channel_name,
    656             playlist_title='%s - Clips %s' % (channel_name, clip.label))
    657 
    658 
    659 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
    660     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
    661 
    662     _TESTS = [{
    663         # Collections
    664         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
    665         'info_dict': {
    666             'id': 'spamfish',
    667             'title': 'spamfish - Collections',
    668         },
    669         'playlist_mincount': 3,
    670     }]
    671 
    672     _OPERATION_NAME = 'ChannelCollectionsContent'
    673     _ENTRY_KIND = 'collection'
    674     _EDGE_KIND = 'CollectionsItemEdge'
    675     _NODE_KIND = 'Collection'
    676 
    677     @staticmethod
    678     def _make_variables(channel_name):
    679         return {
    680             'ownerLogin': channel_name,
    681         }
    682 
    683     @staticmethod
    684     def _extract_entry(node):
    685         assert isinstance(node, dict)
    686         collection_id = node.get('id')
    687         if not collection_id:
    688             return
    689         return {
    690             '_type': 'url_transparent',
    691             'ie_key': TwitchCollectionIE.ie_key(),
    692             'id': collection_id,
    693             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
    694             'title': node.get('title'),
    695             'thumbnail': node.get('thumbnailURL'),
    696             'duration': float_or_none(node.get('lengthSeconds')),
    697             'timestamp': unified_timestamp(node.get('updatedAt')),
    698             'view_count': int_or_none(node.get('viewCount')),
    699         }
    700 
    701     def _real_extract(self, url):
    702         channel_name = self._match_id(url)
    703         return self.playlist_result(
    704             self._entries(channel_name), playlist_id=channel_name,
    705             playlist_title='%s - Collections' % channel_name)
    706 
    707 
    708 class TwitchStreamIE(TwitchBaseIE):
    709     IE_NAME = 'twitch:stream'
    710     _VALID_URL = r'''(?x)
    711                     https?://
    712                         (?:
    713                             (?:(?:www|go|m)\.)?twitch\.tv/|
    714                             player\.twitch\.tv/\?.*?\bchannel=
    715                         )
    716                         (?P<id>[^/#?]+)
    717                     '''
    718 
    719     _TESTS = [{
    720         'url': 'http://www.twitch.tv/shroomztv',
    721         'info_dict': {
    722             'id': '12772022048',
    723             'display_id': 'shroomztv',
    724             'ext': 'mp4',
    725             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
    726             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
    727             'is_live': True,
    728             'timestamp': 1421928037,
    729             'upload_date': '20150122',
    730             'uploader': 'ShroomzTV',
    731             'uploader_id': 'shroomztv',
    732             'view_count': int,
    733         },
    734         'params': {
    735             # m3u8 download
    736             'skip_download': True,
    737         },
    738     }, {
    739         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
    740         'only_matching': True,
    741     }, {
    742         'url': 'https://player.twitch.tv/?channel=lotsofs',
    743         'only_matching': True,
    744     }, {
    745         'url': 'https://go.twitch.tv/food',
    746         'only_matching': True,
    747     }, {
    748         'url': 'https://m.twitch.tv/food',
    749         'only_matching': True,
    750     }]
    751 
    752     @classmethod
    753     def suitable(cls, url):
    754         return (False
    755                 if any(ie.suitable(url) for ie in (
    756                     TwitchVodIE,
    757                     TwitchCollectionIE,
    758                     TwitchVideosIE,
    759                     TwitchVideosClipsIE,
    760                     TwitchVideosCollectionsIE,
    761                     TwitchClipsIE))
    762                 else super(TwitchStreamIE, cls).suitable(url))
    763 
    764     def _real_extract(self, url):
    765         channel_name = self._match_id(url).lower()
    766 
    767         gql = self._download_gql(
    768             channel_name, [{
    769                 'operationName': 'StreamMetadata',
    770                 'variables': {'channelLogin': channel_name},
    771             }, {
    772                 'operationName': 'ComscoreStreamingQuery',
    773                 'variables': {
    774                     'channel': channel_name,
    775                     'clipSlug': '',
    776                     'isClip': False,
    777                     'isLive': True,
    778                     'isVodOrCollection': False,
    779                     'vodID': '',
    780                 },
    781             }, {
    782                 'operationName': 'VideoPreviewOverlay',
    783                 'variables': {'login': channel_name},
    784             }],
    785             'Downloading stream GraphQL')
    786 
    787         user = gql[0]['data']['user']
    788 
    789         if not user:
    790             raise ExtractorError(
    791                 '%s does not exist' % channel_name, expected=True)
    792 
    793         stream = user['stream']
    794 
    795         if not stream:
    796             raise ExtractorError('%s is offline' % channel_name, expected=True)
    797 
    798         access_token = self._download_access_token(
    799             channel_name, 'stream', 'channelName')
    800         token = access_token['value']
    801 
    802         stream_id = stream.get('id') or channel_name
    803         query = {
    804             'allow_source': 'true',
    805             'allow_audio_only': 'true',
    806             'allow_spectre': 'true',
    807             'p': random.randint(1000000, 10000000),
    808             'player': 'twitchweb',
    809             'playlist_include_framerate': 'true',
    810             'segment_preference': '4',
    811             'sig': access_token['signature'].encode('utf-8'),
    812             'token': token.encode('utf-8'),
    813         }
    814         formats = self._extract_m3u8_formats(
    815             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
    816             stream_id, 'mp4', query=query)
    817         self._prefer_source(formats)
    818 
    819         view_count = stream.get('viewers')
    820         timestamp = unified_timestamp(stream.get('createdAt'))
    821 
    822         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
    823         uploader = sq_user.get('displayName')
    824         description = try_get(
    825             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
    826 
    827         thumbnail = url_or_none(try_get(
    828             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
    829             compat_str))
    830 
    831         title = uploader or channel_name
    832         stream_type = stream.get('type')
    833         if stream_type in ['rerun', 'live']:
    834             title += ' (%s)' % stream_type
    835 
    836         return {
    837             'id': stream_id,
    838             'display_id': channel_name,
    839             'title': self._live_title(title),
    840             'description': description,
    841             'thumbnail': thumbnail,
    842             'uploader': uploader,
    843             'uploader_id': channel_name,
    844             'timestamp': timestamp,
    845             'view_count': view_count,
    846             'formats': formats,
    847             'is_live': stream_type == 'live',
    848         }
    849 
    850 
    851 class TwitchClipsIE(TwitchBaseIE):
    852     IE_NAME = 'twitch:clips'
    853     _VALID_URL = r'''(?x)
    854                     https?://
    855                         (?:
    856                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
    857                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
    858                         )
    859                         (?P<id>[^/?#&]+)
    860                     '''
    861 
    862     _TESTS = [{
    863         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
    864         'md5': '761769e1eafce0ffebfb4089cb3847cd',
    865         'info_dict': {
    866             'id': '42850523',
    867             'ext': 'mp4',
    868             'title': 'EA Play 2016 Live from the Novo Theatre',
    869             'thumbnail': r're:^https?://.*\.jpg',
    870             'timestamp': 1465767393,
    871             'upload_date': '20160612',
    872             'creator': 'EA',
    873             'uploader': 'stereotype_',
    874             'uploader_id': '43566419',
    875         },
    876     }, {
    877         # multiple formats
    878         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
    879         'only_matching': True,
    880     }, {
    881         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
    882         'only_matching': True,
    883     }, {
    884         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
    885         'only_matching': True,
    886     }, {
    887         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
    888         'only_matching': True,
    889     }, {
    890         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
    891         'only_matching': True,
    892     }]
    893 
    894     def _real_extract(self, url):
    895         video_id = self._match_id(url)
    896 
    897         clip = self._download_gql(
    898             video_id, [{
    899                 'operationName': 'VideoAccessToken_Clip',
    900                 'variables': {
    901                     'slug': video_id,
    902                 },
    903             }],
    904             'Downloading clip access token GraphQL')[0]['data']['clip']
    905 
    906         if not clip:
    907             raise ExtractorError(
    908                 'This clip is no longer available', expected=True)
    909 
    910         access_query = {
    911             'sig': clip['playbackAccessToken']['signature'],
    912             'token': clip['playbackAccessToken']['value'],
    913         }
    914 
    915         data = self._download_base_gql(
    916             video_id, {
    917                 'query': '''{
    918   clip(slug: "%s") {
    919     broadcaster {
    920       displayName
    921     }
    922     createdAt
    923     curator {
    924       displayName
    925       id
    926     }
    927     durationSeconds
    928     id
    929     tiny: thumbnailURL(width: 86, height: 45)
    930     small: thumbnailURL(width: 260, height: 147)
    931     medium: thumbnailURL(width: 480, height: 272)
    932     title
    933     videoQualities {
    934       frameRate
    935       quality
    936       sourceURL
    937     }
    938     viewCount
    939   }
    940 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
    941 
    942         if data:
    943             clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
    944 
    945         formats = []
    946         for option in clip.get('videoQualities', []):
    947             if not isinstance(option, dict):
    948                 continue
    949             source = url_or_none(option.get('sourceURL'))
    950             if not source:
    951                 continue
    952             formats.append({
    953                 'url': update_url_query(source, access_query),
    954                 'format_id': option.get('quality'),
    955                 'height': int_or_none(option.get('quality')),
    956                 'fps': int_or_none(option.get('frameRate')),
    957             })
    958         self._sort_formats(formats)
    959 
    960         thumbnails = []
    961         for thumbnail_id in ('tiny', 'small', 'medium'):
    962             thumbnail_url = clip.get(thumbnail_id)
    963             if not thumbnail_url:
    964                 continue
    965             thumb = {
    966                 'id': thumbnail_id,
    967                 'url': thumbnail_url,
    968             }
    969             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
    970             if mobj:
    971                 thumb.update({
    972                     'height': int(mobj.group(2)),
    973                     'width': int(mobj.group(1)),
    974                 })
    975             thumbnails.append(thumb)
    976 
    977         return {
    978             'id': clip.get('id') or video_id,
    979             'title': clip.get('title') or video_id,
    980             'formats': formats,
    981             'duration': int_or_none(clip.get('durationSeconds')),
    982             'views': int_or_none(clip.get('viewCount')),
    983             'timestamp': unified_timestamp(clip.get('createdAt')),
    984             'thumbnails': thumbnails,
    985             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
    986             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
    987             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
    988         }
	youtube-dl Another place where youtube-dl lives on
	git clone git://git.oshgnacknak.de/youtube-dl.git
	Log \| Files \| Refs \| README \| LICENSE