tiktok.py (5002B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 compat_str, 7 ExtractorError, 8 float_or_none, 9 int_or_none, 10 str_or_none, 11 try_get, 12 url_or_none, 13 ) 14 15 16 class TikTokBaseIE(InfoExtractor): 17 def _extract_video(self, data, video_id=None): 18 video = data['video'] 19 description = str_or_none(try_get(data, lambda x: x['desc'])) 20 width = int_or_none(try_get(data, lambda x: video['width'])) 21 height = int_or_none(try_get(data, lambda x: video['height'])) 22 23 format_urls = set() 24 formats = [] 25 for format_id in ('download', 'play'): 26 format_url = url_or_none(video.get('%sAddr' % format_id)) 27 if not format_url: 28 continue 29 if format_url in format_urls: 30 continue 31 format_urls.add(format_url) 32 formats.append({ 33 'url': format_url, 34 'ext': 'mp4', 35 'height': height, 36 'width': width, 37 'http_headers': { 38 'Referer': 'https://www.tiktok.com/', 39 } 40 }) 41 self._sort_formats(formats) 42 43 thumbnail = url_or_none(video.get('cover')) 44 duration = float_or_none(video.get('duration')) 45 46 uploader = try_get(data, lambda x: x['author']['nickname'], compat_str) 47 uploader_id = try_get(data, lambda x: x['author']['id'], compat_str) 48 49 timestamp = int_or_none(data.get('createTime')) 50 51 def stats(key): 52 return int_or_none(try_get( 53 data, lambda x: x['stats']['%sCount' % key])) 54 55 view_count = stats('play') 56 like_count = stats('digg') 57 comment_count = stats('comment') 58 repost_count = stats('share') 59 60 aweme_id = data.get('id') or video_id 61 62 return { 63 'id': aweme_id, 64 'title': uploader or aweme_id, 65 'description': description, 66 'thumbnail': thumbnail, 67 'duration': duration, 68 'uploader': uploader, 69 'uploader_id': uploader_id, 70 'timestamp': timestamp, 71 'view_count': view_count, 72 'like_count': like_count, 73 'comment_count': comment_count, 74 'repost_count': repost_count, 75 'formats': formats, 76 } 77 78 79 class TikTokIE(TikTokBaseIE): 80 _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)' 81 _TESTS = [{ 82 'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213', 83 'md5': '163ceff303bb52de60e6887fe399e6cd', 84 'info_dict': { 85 'id': '6606727368545406213', 86 'ext': 'mp4', 87 'title': 'Zureeal', 88 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay', 89 'thumbnail': r're:^https?://.*', 90 'duration': 15, 91 'uploader': 'Zureeal', 92 'uploader_id': '188294915489964032', 93 'timestamp': 1538248586, 94 'upload_date': '20180929', 95 'view_count': int, 96 'like_count': int, 97 'comment_count': int, 98 'repost_count': int, 99 } 100 }] 101 102 def _real_initialize(self): 103 # Setup session (will set necessary cookies) 104 self._request_webpage( 105 'https://www.tiktok.com/', None, note='Setting up session') 106 107 def _real_extract(self, url): 108 video_id = self._match_id(url) 109 webpage = self._download_webpage(url, video_id) 110 page_props = self._parse_json(self._search_regex( 111 r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script', 112 webpage, 'data'), video_id)['props']['pageProps'] 113 data = try_get(page_props, lambda x: x['itemInfo']['itemStruct'], dict) 114 if not data and page_props.get('statusCode') == 10216: 115 raise ExtractorError('This video is private', expected=True) 116 return self._extract_video(data, video_id) 117 118 119 class TikTokUserIE(TikTokBaseIE): 120 _VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)' 121 _TESTS = [{ 122 'url': 'https://www.tiktok.com/@zureeal', 123 'info_dict': { 124 'id': '188294915489964032', 125 }, 126 'playlist_mincount': 24, 127 }] 128 _WORKING = False 129 130 @classmethod 131 def suitable(cls, url): 132 return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url) 133 134 def _real_extract(self, url): 135 user_id = self._match_id(url) 136 data = self._download_json( 137 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id, 138 query={'_signature': '_'}) 139 entries = [] 140 for aweme in data['aweme_list']: 141 try: 142 entry = self._extract_video(aweme) 143 except ExtractorError: 144 continue 145 entry['extractor_key'] = TikTokIE.ie_key() 146 entries.append(entry) 147 return self.playlist_result(entries, user_id)