line.py (8423B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_str 8 from ..utils import ( 9 ExtractorError, 10 int_or_none, 11 js_to_json, 12 str_or_none, 13 ) 14 15 16 class LineTVIE(InfoExtractor): 17 _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)' 18 19 _TESTS = [{ 20 'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246', 21 'info_dict': { 22 'id': '793123_ep1-1', 23 'ext': 'mp4', 24 'title': 'Goodbye Mr.Black | EP.1-1', 25 'thumbnail': r're:^https?://.*\.jpg$', 26 'duration': 998.509, 27 'view_count': int, 28 }, 29 }, { 30 'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245', 31 'only_matching': True, 32 }] 33 34 def _real_extract(self, url): 35 series_id, segment = re.match(self._VALID_URL, url).groups() 36 video_id = '%s_%s' % (series_id, segment) 37 38 webpage = self._download_webpage(url, video_id) 39 40 player_params = self._parse_json(self._search_regex( 41 r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'), 42 video_id, transform_source=js_to_json) 43 44 video_info = self._download_json( 45 'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json', 46 video_id, query={ 47 'videoId': player_params['videoId'], 48 'key': player_params['key'], 49 }) 50 51 stream = video_info['streams'][0] 52 extra_query = '?__gda__=' + stream['key']['value'] 53 formats = self._extract_m3u8_formats( 54 stream['source'] + extra_query, video_id, ext='mp4', 55 entry_protocol='m3u8_native', m3u8_id='hls') 56 57 for a_format in formats: 58 a_format['url'] += extra_query 59 60 duration = None 61 for video in video_info.get('videos', {}).get('list', []): 62 encoding_option = video.get('encodingOption', {}) 63 abr = video['bitrate']['audio'] 64 vbr = video['bitrate']['video'] 65 tbr = abr + vbr 66 formats.append({ 67 'url': video['source'], 68 'format_id': 'http-%d' % int(tbr), 69 'height': encoding_option.get('height'), 70 'width': encoding_option.get('width'), 71 'abr': abr, 72 'vbr': vbr, 73 'filesize': video.get('size'), 74 }) 75 if video.get('duration') and duration is None: 76 duration = video['duration'] 77 78 self._sort_formats(formats) 79 80 if not formats[0].get('width'): 81 formats[0]['vcodec'] = 'none' 82 83 title = self._og_search_title(webpage) 84 85 # like_count requires an additional API request https://tv.line.me/api/likeit/getCount 86 87 return { 88 'id': video_id, 89 'title': title, 90 'formats': formats, 91 'extra_param_to_segment_url': extra_query[1:], 92 'duration': duration, 93 'thumbnails': [{'url': thumbnail['source']} 94 for thumbnail in video_info.get('thumbnails', {}).get('list', [])], 95 'view_count': video_info.get('meta', {}).get('count'), 96 } 97 98 99 class LineLiveBaseIE(InfoExtractor): 100 _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/' 101 102 def _parse_broadcast_item(self, item): 103 broadcast_id = compat_str(item['id']) 104 title = item['title'] 105 is_live = item.get('isBroadcastingNow') 106 107 thumbnails = [] 108 for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items(): 109 if not thumbnail_url: 110 continue 111 thumbnails.append({ 112 'id': thumbnail_id, 113 'url': thumbnail_url, 114 }) 115 116 channel = item.get('channel') or {} 117 channel_id = str_or_none(channel.get('id')) 118 119 return { 120 'id': broadcast_id, 121 'title': self._live_title(title) if is_live else title, 122 'thumbnails': thumbnails, 123 'timestamp': int_or_none(item.get('createdAt')), 124 'channel': channel.get('name'), 125 'channel_id': channel_id, 126 'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None, 127 'duration': int_or_none(item.get('archiveDuration')), 128 'view_count': int_or_none(item.get('viewerCount')), 129 'comment_count': int_or_none(item.get('chatCount')), 130 'is_live': is_live, 131 } 132 133 134 class LineLiveIE(LineLiveBaseIE): 135 _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)' 136 _TESTS = [{ 137 'url': 'https://live.line.me/channels/4867368/broadcast/16331360', 138 'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3', 139 'info_dict': { 140 'id': '16331360', 141 'title': '振りコピ講座😙😙😙', 142 'ext': 'mp4', 143 'timestamp': 1617095132, 144 'upload_date': '20210330', 145 'channel': '白川ゆめか', 146 'channel_id': '4867368', 147 'view_count': int, 148 'comment_count': int, 149 'is_live': False, 150 } 151 }, { 152 # archiveStatus == 'DELETED' 153 'url': 'https://live.line.me/channels/4778159/broadcast/16378488', 154 'only_matching': True, 155 }] 156 157 def _real_extract(self, url): 158 channel_id, broadcast_id = re.match(self._VALID_URL, url).groups() 159 broadcast = self._download_json( 160 self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id), 161 broadcast_id) 162 item = broadcast['item'] 163 info = self._parse_broadcast_item(item) 164 protocol = 'm3u8' if info['is_live'] else 'm3u8_native' 165 formats = [] 166 for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items(): 167 if not v: 168 continue 169 if k == 'abr': 170 formats.extend(self._extract_m3u8_formats( 171 v, broadcast_id, 'mp4', protocol, 172 m3u8_id='hls', fatal=False)) 173 continue 174 f = { 175 'ext': 'mp4', 176 'format_id': 'hls-' + k, 177 'protocol': protocol, 178 'url': v, 179 } 180 if not k.isdigit(): 181 f['vcodec'] = 'none' 182 formats.append(f) 183 if not formats: 184 archive_status = item.get('archiveStatus') 185 if archive_status != 'ARCHIVED': 186 raise ExtractorError('this video has been ' + archive_status.lower(), expected=True) 187 self._sort_formats(formats) 188 info['formats'] = formats 189 return info 190 191 192 class LineLiveChannelIE(LineLiveBaseIE): 193 _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)' 194 _TEST = { 195 'url': 'https://live.line.me/channels/5893542', 196 'info_dict': { 197 'id': '5893542', 198 'title': 'いくらちゃん', 199 'description': 'md5:c3a4af801f43b2fac0b02294976580be', 200 }, 201 'playlist_mincount': 29 202 } 203 204 def _archived_broadcasts_entries(self, archived_broadcasts, channel_id): 205 while True: 206 for row in (archived_broadcasts.get('rows') or []): 207 share_url = str_or_none(row.get('shareURL')) 208 if not share_url: 209 continue 210 info = self._parse_broadcast_item(row) 211 info.update({ 212 '_type': 'url', 213 'url': share_url, 214 'ie_key': LineLiveIE.ie_key(), 215 }) 216 yield info 217 if not archived_broadcasts.get('hasNextPage'): 218 return 219 archived_broadcasts = self._download_json( 220 self._API_BASE_URL + channel_id + '/archived_broadcasts', 221 channel_id, query={ 222 'lastId': info['id'], 223 }) 224 225 def _real_extract(self, url): 226 channel_id = self._match_id(url) 227 channel = self._download_json(self._API_BASE_URL + channel_id, channel_id) 228 return self.playlist_result( 229 self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id), 230 channel_id, channel.get('title'), channel.get('information'))