youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

line.py (8423B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_str
      8 from ..utils import (
      9     ExtractorError,
     10     int_or_none,
     11     js_to_json,
     12     str_or_none,
     13 )
     14 
     15 
     16 class LineTVIE(InfoExtractor):
     17     _VALID_URL = r'https?://tv\.line\.me/v/(?P<id>\d+)_[^/]+-(?P<segment>ep\d+-\d+)'
     18 
     19     _TESTS = [{
     20         'url': 'https://tv.line.me/v/793123_goodbye-mrblack-ep1-1/list/69246',
     21         'info_dict': {
     22             'id': '793123_ep1-1',
     23             'ext': 'mp4',
     24             'title': 'Goodbye Mr.Black | EP.1-1',
     25             'thumbnail': r're:^https?://.*\.jpg$',
     26             'duration': 998.509,
     27             'view_count': int,
     28         },
     29     }, {
     30         'url': 'https://tv.line.me/v/2587507_%E6%B4%BE%E9%81%A3%E5%A5%B3%E9%86%ABx-ep1-02/list/185245',
     31         'only_matching': True,
     32     }]
     33 
     34     def _real_extract(self, url):
     35         series_id, segment = re.match(self._VALID_URL, url).groups()
     36         video_id = '%s_%s' % (series_id, segment)
     37 
     38         webpage = self._download_webpage(url, video_id)
     39 
     40         player_params = self._parse_json(self._search_regex(
     41             r'naver\.WebPlayer\(({[^}]+})\)', webpage, 'player parameters'),
     42             video_id, transform_source=js_to_json)
     43 
     44         video_info = self._download_json(
     45             'https://global-nvapis.line.me/linetv/rmcnmv/vod_play_videoInfo.json',
     46             video_id, query={
     47                 'videoId': player_params['videoId'],
     48                 'key': player_params['key'],
     49             })
     50 
     51         stream = video_info['streams'][0]
     52         extra_query = '?__gda__=' + stream['key']['value']
     53         formats = self._extract_m3u8_formats(
     54             stream['source'] + extra_query, video_id, ext='mp4',
     55             entry_protocol='m3u8_native', m3u8_id='hls')
     56 
     57         for a_format in formats:
     58             a_format['url'] += extra_query
     59 
     60         duration = None
     61         for video in video_info.get('videos', {}).get('list', []):
     62             encoding_option = video.get('encodingOption', {})
     63             abr = video['bitrate']['audio']
     64             vbr = video['bitrate']['video']
     65             tbr = abr + vbr
     66             formats.append({
     67                 'url': video['source'],
     68                 'format_id': 'http-%d' % int(tbr),
     69                 'height': encoding_option.get('height'),
     70                 'width': encoding_option.get('width'),
     71                 'abr': abr,
     72                 'vbr': vbr,
     73                 'filesize': video.get('size'),
     74             })
     75             if video.get('duration') and duration is None:
     76                 duration = video['duration']
     77 
     78         self._sort_formats(formats)
     79 
     80         if not formats[0].get('width'):
     81             formats[0]['vcodec'] = 'none'
     82 
     83         title = self._og_search_title(webpage)
     84 
     85         # like_count requires an additional API request https://tv.line.me/api/likeit/getCount
     86 
     87         return {
     88             'id': video_id,
     89             'title': title,
     90             'formats': formats,
     91             'extra_param_to_segment_url': extra_query[1:],
     92             'duration': duration,
     93             'thumbnails': [{'url': thumbnail['source']}
     94                            for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
     95             'view_count': video_info.get('meta', {}).get('count'),
     96         }
     97 
     98 
     99 class LineLiveBaseIE(InfoExtractor):
    100     _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
    101 
    102     def _parse_broadcast_item(self, item):
    103         broadcast_id = compat_str(item['id'])
    104         title = item['title']
    105         is_live = item.get('isBroadcastingNow')
    106 
    107         thumbnails = []
    108         for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
    109             if not thumbnail_url:
    110                 continue
    111             thumbnails.append({
    112                 'id': thumbnail_id,
    113                 'url': thumbnail_url,
    114             })
    115 
    116         channel = item.get('channel') or {}
    117         channel_id = str_or_none(channel.get('id'))
    118 
    119         return {
    120             'id': broadcast_id,
    121             'title': self._live_title(title) if is_live else title,
    122             'thumbnails': thumbnails,
    123             'timestamp': int_or_none(item.get('createdAt')),
    124             'channel': channel.get('name'),
    125             'channel_id': channel_id,
    126             'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
    127             'duration': int_or_none(item.get('archiveDuration')),
    128             'view_count': int_or_none(item.get('viewerCount')),
    129             'comment_count': int_or_none(item.get('chatCount')),
    130             'is_live': is_live,
    131         }
    132 
    133 
    134 class LineLiveIE(LineLiveBaseIE):
    135     _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
    136     _TESTS = [{
    137         'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
    138         'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
    139         'info_dict': {
    140             'id': '16331360',
    141             'title': '振りコピ講座😙😙😙',
    142             'ext': 'mp4',
    143             'timestamp': 1617095132,
    144             'upload_date': '20210330',
    145             'channel': '白川ゆめか',
    146             'channel_id': '4867368',
    147             'view_count': int,
    148             'comment_count': int,
    149             'is_live': False,
    150         }
    151     }, {
    152         # archiveStatus == 'DELETED'
    153         'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
    154         'only_matching': True,
    155     }]
    156 
    157     def _real_extract(self, url):
    158         channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
    159         broadcast = self._download_json(
    160             self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
    161             broadcast_id)
    162         item = broadcast['item']
    163         info = self._parse_broadcast_item(item)
    164         protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
    165         formats = []
    166         for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
    167             if not v:
    168                 continue
    169             if k == 'abr':
    170                 formats.extend(self._extract_m3u8_formats(
    171                     v, broadcast_id, 'mp4', protocol,
    172                     m3u8_id='hls', fatal=False))
    173                 continue
    174             f = {
    175                 'ext': 'mp4',
    176                 'format_id': 'hls-' + k,
    177                 'protocol': protocol,
    178                 'url': v,
    179             }
    180             if not k.isdigit():
    181                 f['vcodec'] = 'none'
    182             formats.append(f)
    183         if not formats:
    184             archive_status = item.get('archiveStatus')
    185             if archive_status != 'ARCHIVED':
    186                 raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
    187         self._sort_formats(formats)
    188         info['formats'] = formats
    189         return info
    190 
    191 
    192 class LineLiveChannelIE(LineLiveBaseIE):
    193     _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
    194     _TEST = {
    195         'url': 'https://live.line.me/channels/5893542',
    196         'info_dict': {
    197             'id': '5893542',
    198             'title': 'いくらちゃん',
    199             'description': 'md5:c3a4af801f43b2fac0b02294976580be',
    200         },
    201         'playlist_mincount': 29
    202     }
    203 
    204     def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
    205         while True:
    206             for row in (archived_broadcasts.get('rows') or []):
    207                 share_url = str_or_none(row.get('shareURL'))
    208                 if not share_url:
    209                     continue
    210                 info = self._parse_broadcast_item(row)
    211                 info.update({
    212                     '_type': 'url',
    213                     'url': share_url,
    214                     'ie_key': LineLiveIE.ie_key(),
    215                 })
    216                 yield info
    217             if not archived_broadcasts.get('hasNextPage'):
    218                 return
    219             archived_broadcasts = self._download_json(
    220                 self._API_BASE_URL + channel_id + '/archived_broadcasts',
    221                 channel_id, query={
    222                     'lastId': info['id'],
    223                 })
    224 
    225     def _real_extract(self, url):
    226         channel_id = self._match_id(url)
    227         channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
    228         return self.playlist_result(
    229             self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
    230             channel_id, channel.get('title'), channel.get('information'))