njpwworld.py (3444B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .common import InfoExtractor 7 from ..compat import compat_urlparse 8 from ..utils import ( 9 get_element_by_class, 10 urlencode_postdata, 11 ) 12 13 14 class NJPWWorldIE(InfoExtractor): 15 _VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)' 16 IE_DESC = '新日本プロレスワールド' 17 _NETRC_MACHINE = 'njpwworld' 18 19 _TESTS = [{ 20 'url': 'http://njpwworld.com/p/s_series_00155_1_9/', 21 'info_dict': { 22 'id': 's_series_00155_1_9', 23 'ext': 'mp4', 24 'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー', 25 'tags': list, 26 }, 27 'params': { 28 'skip_download': True, # AES-encrypted m3u8 29 }, 30 'skip': 'Requires login', 31 }, { 32 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs', 33 'info_dict': { 34 'id': 's_series_00563_16_bs', 35 'ext': 'mp4', 36 'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)', 37 'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"], 38 }, 39 'params': { 40 'skip_download': True, 41 }, 42 }] 43 44 _LOGIN_URL = 'https://front.njpwworld.com/auth/login' 45 46 def _real_initialize(self): 47 self._login() 48 49 def _login(self): 50 username, password = self._get_login_info() 51 # No authentication to be performed 52 if not username: 53 return True 54 55 # Setup session (will set necessary cookies) 56 self._request_webpage( 57 'https://njpwworld.com/', None, note='Setting up session') 58 59 webpage, urlh = self._download_webpage_handle( 60 self._LOGIN_URL, None, 61 note='Logging in', errnote='Unable to login', 62 data=urlencode_postdata({'login_id': username, 'pw': password}), 63 headers={'Referer': 'https://front.njpwworld.com/auth'}) 64 # /auth/login will return 302 for successful logins 65 if urlh.geturl() == self._LOGIN_URL: 66 self.report_warning('unable to login') 67 return False 68 69 return True 70 71 def _real_extract(self, url): 72 video_id = self._match_id(url) 73 74 webpage = self._download_webpage(url, video_id) 75 76 formats = [] 77 for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage): 78 player_path = '/intent?id=%s&type=url' % vid 79 player_url = compat_urlparse.urljoin(url, player_path) 80 formats.append({ 81 'url': player_url, 82 'format_id': kind, 83 'ext': 'mp4', 84 'protocol': 'm3u8', 85 'quality': 2 if kind == 'high' else 1, 86 }) 87 88 self._sort_formats(formats) 89 90 tag_block = get_element_by_class('tag-block', webpage) 91 tags = re.findall( 92 r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block 93 ) if tag_block else None 94 95 return { 96 'id': video_id, 97 'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage), 98 'formats': formats, 99 'tags': tags, 100 }