zhihu.py (2626B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import float_or_none, int_or_none 6 7 8 class ZhihuIE(InfoExtractor): 9 _VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)' 10 _TEST = { 11 'url': 'https://www.zhihu.com/zvideo/1342930761977176064', 12 'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464', 13 'info_dict': { 14 'id': '1342930761977176064', 15 'ext': 'mp4', 16 'title': '写春联也太难了吧!', 17 'thumbnail': r're:^https?://.*\.jpg', 18 'uploader': '桥半舫', 19 'timestamp': 1612959715, 20 'upload_date': '20210210', 21 'uploader_id': '244ecb13b0fd7daf92235288c8ca3365', 22 'duration': 146.333, 23 'view_count': int, 24 'like_count': int, 25 'comment_count': int, 26 } 27 } 28 29 def _real_extract(self, url): 30 video_id = self._match_id(url) 31 zvideo = self._download_json( 32 'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id) 33 title = zvideo['title'] 34 video = zvideo.get('video') or {} 35 36 formats = [] 37 for format_id, q in (video.get('playlist') or {}).items(): 38 play_url = q.get('url') or q.get('play_url') 39 if not play_url: 40 continue 41 formats.append({ 42 'asr': int_or_none(q.get('sample_rate')), 43 'filesize': int_or_none(q.get('size')), 44 'format_id': format_id, 45 'fps': int_or_none(q.get('fps')), 46 'height': int_or_none(q.get('height')), 47 'tbr': float_or_none(q.get('bitrate')), 48 'url': play_url, 49 'width': int_or_none(q.get('width')), 50 }) 51 self._sort_formats(formats) 52 53 author = zvideo.get('author') or {} 54 url_token = author.get('url_token') 55 56 return { 57 'id': video_id, 58 'title': title, 59 'formats': formats, 60 'thumbnail': video.get('thumbnail') or zvideo.get('image_url'), 61 'uploader': author.get('name'), 62 'timestamp': int_or_none(zvideo.get('published_at')), 63 'uploader_id': author.get('id'), 64 'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None, 65 'duration': float_or_none(video.get('duration')), 66 'view_count': int_or_none(zvideo.get('play_count')), 67 'like_count': int_or_none(zvideo.get('liked_count')), 68 'comment_count': int_or_none(zvideo.get('comment_count')), 69 }