youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

vvvvid.py (10300B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from .youtube import YoutubeIE
      8 from ..utils import (
      9     ExtractorError,
     10     int_or_none,
     11     str_or_none,
     12 )
     13 
     14 
     15 class VVVVIDIE(InfoExtractor):
     16     _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
     17     _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
     18     _TESTS = [{
     19         # video_type == 'video/vvvvid'
     20         'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
     21         'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
     22         'info_dict': {
     23             'id': '489048',
     24             'ext': 'mp4',
     25             'title': 'Ping Pong',
     26             'duration': 239,
     27             'series': '"Perché dovrei guardarlo?" di Dario Moccia',
     28             'season_id': '437',
     29             'episode': 'Ping Pong',
     30             'episode_number': 1,
     31             'episode_id': '3334',
     32             'view_count': int,
     33             'like_count': int,
     34             'repost_count': int,
     35         },
     36         'params': {
     37             'skip_download': True,
     38         },
     39     }, {
     40         # video_type == 'video/rcs'
     41         'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
     42         'md5': '33e0edfba720ad73a8782157fdebc648',
     43         'info_dict': {
     44             'id': '482493',
     45             'ext': 'mp4',
     46             'title': 'Episodio 01',
     47         },
     48         'params': {
     49             'skip_download': True,
     50         },
     51     }, {
     52         # video_type == 'video/youtube'
     53         'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
     54         'md5': '33e0edfba720ad73a8782157fdebc648',
     55         'info_dict': {
     56             'id': 'RzmFKUDOUgw',
     57             'ext': 'mp4',
     58             'title': 'Trailer',
     59             'upload_date': '20150906',
     60             'description': 'md5:a5e802558d35247fee285875328c0b80',
     61             'uploader_id': 'BandaiVisual',
     62             'uploader': 'BANDAI NAMCO Arts Channel',
     63         },
     64         'params': {
     65             'skip_download': True,
     66         },
     67     }, {
     68         'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
     69         'only_matching': True
     70     }]
     71     _conn_id = None
     72 
     73     def _real_initialize(self):
     74         self._conn_id = self._download_json(
     75             'https://www.vvvvid.it/user/login',
     76             None, headers=self.geo_verification_headers())['data']['conn_id']
     77 
     78     def _download_info(self, show_id, path, video_id, fatal=True, query=None):
     79         q = {
     80             'conn_id': self._conn_id,
     81         }
     82         if query:
     83             q.update(query)
     84         response = self._download_json(
     85             'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
     86             video_id, headers=self.geo_verification_headers(), query=q, fatal=fatal)
     87         if not (response or fatal):
     88             return
     89         if response.get('result') == 'error':
     90             raise ExtractorError('%s said: %s' % (
     91                 self.IE_NAME, response['message']), expected=True)
     92         return response['data']
     93 
     94     def _extract_common_video_info(self, video_data):
     95         return {
     96             'thumbnail': video_data.get('thumbnail'),
     97             'episode_id': str_or_none(video_data.get('id')),
     98         }
     99 
    100     def _real_extract(self, url):
    101         show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
    102 
    103         response = self._download_info(
    104             show_id, 'season/%s' % season_id,
    105             video_id, query={'video_id': video_id})
    106 
    107         vid = int(video_id)
    108         video_data = list(filter(
    109             lambda episode: episode.get('video_id') == vid, response))[0]
    110         title = video_data['title']
    111         formats = []
    112 
    113         # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
    114         def ds(h):
    115             g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
    116 
    117             def f(m):
    118                 l = []
    119                 o = 0
    120                 b = False
    121                 m_len = len(m)
    122                 while ((not b) and o < m_len):
    123                     n = m[o] << 2
    124                     o += 1
    125                     k = -1
    126                     j = -1
    127                     if o < m_len:
    128                         n += m[o] >> 4
    129                         o += 1
    130                         if o < m_len:
    131                             k = (m[o - 1] << 4) & 255
    132                             k += m[o] >> 2
    133                             o += 1
    134                             if o < m_len:
    135                                 j = (m[o - 1] << 6) & 255
    136                                 j += m[o]
    137                                 o += 1
    138                             else:
    139                                 b = True
    140                         else:
    141                             b = True
    142                     else:
    143                         b = True
    144                     l.append(n)
    145                     if k != -1:
    146                         l.append(k)
    147                     if j != -1:
    148                         l.append(j)
    149                 return l
    150 
    151             c = []
    152             for e in h:
    153                 c.append(g.index(e))
    154 
    155             c_len = len(c)
    156             for e in range(c_len * 2 - 1, -1, -1):
    157                 a = c[e % c_len] ^ c[(e + 1) % c_len]
    158                 c[e % c_len] = a
    159 
    160             c = f(c)
    161             d = ''
    162             for e in c:
    163                 d += chr(e)
    164 
    165             return d
    166 
    167         info = {}
    168 
    169         def metadata_from_url(r_url):
    170             if not info and r_url:
    171                 mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
    172                 if mobj:
    173                     info['episode_number'] = int(mobj.group(2))
    174                     season_number = mobj.group(1)
    175                     if season_number:
    176                         info['season_number'] = int(season_number)
    177 
    178         video_type = video_data.get('video_type')
    179         is_youtube = False
    180         for quality in ('', '_sd'):
    181             embed_code = video_data.get('embed_info' + quality)
    182             if not embed_code:
    183                 continue
    184             embed_code = ds(embed_code)
    185             if video_type == 'video/kenc':
    186                 embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
    187                 kenc = self._download_json(
    188                     'https://www.vvvvid.it/kenc', video_id, query={
    189                         'action': 'kt',
    190                         'conn_id': self._conn_id,
    191                         'url': embed_code,
    192                     }, fatal=False) or {}
    193                 kenc_message = kenc.get('message')
    194                 if kenc_message:
    195                     embed_code += '?' + ds(kenc_message)
    196                 formats.extend(self._extract_m3u8_formats(
    197                     embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
    198             elif video_type == 'video/rcs':
    199                 formats.extend(self._extract_akamai_formats(embed_code, video_id))
    200             elif video_type == 'video/youtube':
    201                 info.update({
    202                     '_type': 'url_transparent',
    203                     'ie_key': YoutubeIE.ie_key(),
    204                     'url': embed_code,
    205                 })
    206                 is_youtube = True
    207                 break
    208             else:
    209                 formats.extend(self._extract_wowza_formats(
    210                     'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
    211             metadata_from_url(embed_code)
    212 
    213         if not is_youtube:
    214             self._sort_formats(formats)
    215             info['formats'] = formats
    216 
    217         metadata_from_url(video_data.get('thumbnail'))
    218         info.update(self._extract_common_video_info(video_data))
    219         info.update({
    220             'id': video_id,
    221             'title': title,
    222             'duration': int_or_none(video_data.get('length')),
    223             'series': video_data.get('show_title'),
    224             'season_id': season_id,
    225             'episode': title,
    226             'view_count': int_or_none(video_data.get('views')),
    227             'like_count': int_or_none(video_data.get('video_likes')),
    228             'repost_count': int_or_none(video_data.get('video_shares')),
    229         })
    230         return info
    231 
    232 
    233 class VVVVIDShowIE(VVVVIDIE):
    234     _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
    235     _TESTS = [{
    236         'url': 'https://www.vvvvid.it/show/156/psyco-pass',
    237         'info_dict': {
    238             'id': '156',
    239             'title': 'Psycho-Pass',
    240             'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
    241         },
    242         'playlist_count': 46,
    243     }, {
    244         'url': 'https://www.vvvvid.it/show/156',
    245         'only_matching': True,
    246     }]
    247 
    248     def _real_extract(self, url):
    249         base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
    250 
    251         seasons = self._download_info(
    252             show_id, 'seasons/', show_title)
    253 
    254         show_info = self._download_info(
    255             show_id, 'info/', show_title, fatal=False)
    256 
    257         if not show_title:
    258             base_url += "/title"
    259 
    260         entries = []
    261         for season in (seasons or []):
    262             episodes = season.get('episodes') or []
    263             playlist_title = season.get('name') or show_info.get('title')
    264             for episode in episodes:
    265                 if episode.get('playable') is False:
    266                     continue
    267                 season_id = str_or_none(episode.get('season_id'))
    268                 video_id = str_or_none(episode.get('video_id'))
    269                 if not (season_id and video_id):
    270                     continue
    271                 info = self._extract_common_video_info(episode)
    272                 info.update({
    273                     '_type': 'url_transparent',
    274                     'ie_key': VVVVIDIE.ie_key(),
    275                     'url': '/'.join([base_url, season_id, video_id]),
    276                     'title': episode.get('title'),
    277                     'description': episode.get('description'),
    278                     'season_id': season_id,
    279                     'playlist_title': playlist_title,
    280                 })
    281                 entries.append(info)
    282 
    283         return self.playlist_result(
    284             entries, show_id, show_info.get('title'), show_info.get('description'))