youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

weibo.py (4493B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 
      6 import json
      7 import random
      8 import re
      9 
     10 from ..compat import (
     11     compat_parse_qs,
     12     compat_str,
     13 )
     14 from ..utils import (
     15     js_to_json,
     16     strip_jsonp,
     17     urlencode_postdata,
     18 )
     19 
     20 
     21 class WeiboIE(InfoExtractor):
     22     _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
     23     _TEST = {
     24         'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
     25         'info_dict': {
     26             'id': 'Fp6RGfbff',
     27             'ext': 'mp4',
     28             'title': 'You should have servants to massage you,... 来自Hosico_猫 - 微博',
     29         }
     30     }
     31 
     32     def _real_extract(self, url):
     33         video_id = self._match_id(url)
     34         # to get Referer url for genvisitor
     35         webpage, urlh = self._download_webpage_handle(url, video_id)
     36 
     37         visitor_url = urlh.geturl()
     38 
     39         if 'passport.weibo.com' in visitor_url:
     40             # first visit
     41             visitor_data = self._download_json(
     42                 'https://passport.weibo.com/visitor/genvisitor', video_id,
     43                 note='Generating first-visit data',
     44                 transform_source=strip_jsonp,
     45                 headers={'Referer': visitor_url},
     46                 data=urlencode_postdata({
     47                     'cb': 'gen_callback',
     48                     'fp': json.dumps({
     49                         'os': '2',
     50                         'browser': 'Gecko57,0,0,0',
     51                         'fonts': 'undefined',
     52                         'screenInfo': '1440*900*24',
     53                         'plugins': '',
     54                     }),
     55                 }))
     56 
     57             tid = visitor_data['data']['tid']
     58             cnfd = '%03d' % visitor_data['data']['confidence']
     59 
     60             self._download_webpage(
     61                 'https://passport.weibo.com/visitor/visitor', video_id,
     62                 note='Running first-visit callback',
     63                 query={
     64                     'a': 'incarnate',
     65                     't': tid,
     66                     'w': 2,
     67                     'c': cnfd,
     68                     'cb': 'cross_domain',
     69                     'from': 'weibo',
     70                     '_rand': random.random(),
     71                 })
     72 
     73             webpage = self._download_webpage(
     74                 url, video_id, note='Revisiting webpage')
     75 
     76         title = self._html_search_regex(
     77             r'<title>(.+?)</title>', webpage, 'title')
     78 
     79         video_formats = compat_parse_qs(self._search_regex(
     80             r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
     81 
     82         formats = []
     83         supported_resolutions = (480, 720)
     84         for res in supported_resolutions:
     85             vid_urls = video_formats.get(compat_str(res))
     86             if not vid_urls or not isinstance(vid_urls, list):
     87                 continue
     88 
     89             vid_url = vid_urls[0]
     90             formats.append({
     91                 'url': vid_url,
     92                 'height': res,
     93             })
     94 
     95         self._sort_formats(formats)
     96 
     97         uploader = self._og_search_property(
     98             'nick-name', webpage, 'uploader', default=None)
     99 
    100         return {
    101             'id': video_id,
    102             'title': title,
    103             'uploader': uploader,
    104             'formats': formats
    105         }
    106 
    107 
    108 class WeiboMobileIE(InfoExtractor):
    109     _VALID_URL = r'https?://m\.weibo\.cn/status/(?P<id>[0-9]+)(\?.+)?'
    110     _TEST = {
    111         'url': 'https://m.weibo.cn/status/4189191225395228?wm=3333_2001&sourcetype=weixin&featurecode=newtitle&from=singlemessage&isappinstalled=0',
    112         'info_dict': {
    113             'id': '4189191225395228',
    114             'ext': 'mp4',
    115             'title': '午睡当然是要甜甜蜜蜜的啦',
    116             'uploader': '柴犬柴犬'
    117         }
    118     }
    119 
    120     def _real_extract(self, url):
    121         video_id = self._match_id(url)
    122         # to get Referer url for genvisitor
    123         webpage = self._download_webpage(url, video_id, note='visit the page')
    124 
    125         weibo_info = self._parse_json(self._search_regex(
    126             r'var\s+\$render_data\s*=\s*\[({.*})\]\[0\]\s*\|\|\s*{};',
    127             webpage, 'js_code', flags=re.DOTALL),
    128             video_id, transform_source=js_to_json)
    129 
    130         status_data = weibo_info.get('status', {})
    131         page_info = status_data.get('page_info')
    132         title = status_data['status_title']
    133         uploader = status_data.get('user', {}).get('screen_name')
    134 
    135         return {
    136             'id': video_id,
    137             'title': title,
    138             'uploader': uploader,
    139             'url': page_info['media_info']['stream_url']
    140         }