raywenderlich.py (6169B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from .vimeo import VimeoIE 7 from ..compat import compat_str 8 from ..utils import ( 9 ExtractorError, 10 int_or_none, 11 merge_dicts, 12 try_get, 13 unescapeHTML, 14 unified_timestamp, 15 urljoin, 16 ) 17 18 19 class RayWenderlichIE(InfoExtractor): 20 _VALID_URL = r'''(?x) 21 https?:// 22 (?: 23 videos\.raywenderlich\.com/courses| 24 (?:www\.)?raywenderlich\.com 25 )/ 26 (?P<course_id>[^/]+)/lessons/(?P<id>\d+) 27 ''' 28 29 _TESTS = [{ 30 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', 31 'info_dict': { 32 'id': '248377018', 33 'ext': 'mp4', 34 'title': 'Introduction', 35 'description': 'md5:804d031b3efa9fcb49777d512d74f722', 36 'timestamp': 1513906277, 37 'upload_date': '20171222', 38 'duration': 133, 39 'uploader': 'Ray Wenderlich', 40 'uploader_id': 'user3304672', 41 }, 42 'params': { 43 'noplaylist': True, 44 'skip_download': True, 45 }, 46 'add_ie': [VimeoIE.ie_key()], 47 'expected_warnings': ['HTTP Error 403: Forbidden'], 48 }, { 49 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 50 'only_matching': True, 51 }] 52 53 @staticmethod 54 def _extract_video_id(data, lesson_id): 55 if not data: 56 return 57 groups = try_get(data, lambda x: x['groups'], list) or [] 58 if not groups: 59 return 60 for group in groups: 61 if not isinstance(group, dict): 62 continue 63 contents = try_get(data, lambda x: x['contents'], list) or [] 64 for content in contents: 65 if not isinstance(content, dict): 66 continue 67 ordinal = int_or_none(content.get('ordinal')) 68 if ordinal != lesson_id: 69 continue 70 video_id = content.get('identifier') 71 if video_id: 72 return compat_str(video_id) 73 74 def _real_extract(self, url): 75 mobj = re.match(self._VALID_URL, url) 76 course_id, lesson_id = mobj.group('course_id', 'id') 77 display_id = '%s/%s' % (course_id, lesson_id) 78 79 webpage = self._download_webpage(url, display_id) 80 81 thumbnail = self._og_search_thumbnail( 82 webpage, default=None) or self._html_search_meta( 83 'twitter:image', webpage, 'thumbnail') 84 85 if '>Subscribe to unlock' in webpage: 86 raise ExtractorError( 87 'This content is only available for subscribers', 88 expected=True) 89 90 info = { 91 'thumbnail': thumbnail, 92 } 93 94 vimeo_id = self._search_regex( 95 r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) 96 97 if not vimeo_id: 98 data = self._parse_json( 99 self._search_regex( 100 r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, 101 'data collection', default='{}', group='data'), 102 display_id, transform_source=unescapeHTML, fatal=False) 103 video_id = self._extract_video_id( 104 data, lesson_id) or self._search_regex( 105 r'/videos/(\d+)/', thumbnail, 'video id') 106 headers = { 107 'Referer': url, 108 'X-Requested-With': 'XMLHttpRequest', 109 } 110 csrf_token = self._html_search_meta( 111 'csrf-token', webpage, 'csrf token', default=None) 112 if csrf_token: 113 headers['X-CSRF-Token'] = csrf_token 114 video = self._download_json( 115 'https://videos.raywenderlich.com/api/v1/videos/%s.json' 116 % video_id, display_id, headers=headers)['video'] 117 vimeo_id = video['clips'][0]['provider_id'] 118 info.update({ 119 '_type': 'url_transparent', 120 'title': video.get('name'), 121 'description': video.get('description') or video.get( 122 'meta_description'), 123 'duration': int_or_none(video.get('duration')), 124 'timestamp': unified_timestamp(video.get('created_at')), 125 }) 126 127 return merge_dicts(info, self.url_result( 128 VimeoIE._smuggle_referrer( 129 'https://player.vimeo.com/video/%s' % vimeo_id, url), 130 ie=VimeoIE.ie_key(), video_id=vimeo_id)) 131 132 133 class RayWenderlichCourseIE(InfoExtractor): 134 _VALID_URL = r'''(?x) 135 https?:// 136 (?: 137 videos\.raywenderlich\.com/courses| 138 (?:www\.)?raywenderlich\.com 139 )/ 140 (?P<id>[^/]+) 141 ''' 142 143 _TEST = { 144 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', 145 'info_dict': { 146 'title': 'Testing in iOS', 147 'id': '3530-testing-in-ios', 148 }, 149 'params': { 150 'noplaylist': False, 151 }, 152 'playlist_count': 29, 153 } 154 155 @classmethod 156 def suitable(cls, url): 157 return False if RayWenderlichIE.suitable(url) else super( 158 RayWenderlichCourseIE, cls).suitable(url) 159 160 def _real_extract(self, url): 161 course_id = self._match_id(url) 162 163 webpage = self._download_webpage(url, course_id) 164 165 entries = [] 166 lesson_urls = set() 167 for lesson_url in re.findall( 168 r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): 169 if lesson_url in lesson_urls: 170 continue 171 lesson_urls.add(lesson_url) 172 entries.append(self.url_result( 173 urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) 174 175 title = self._og_search_title( 176 webpage, default=None) or self._html_search_meta( 177 'twitter:title', webpage, 'title', default=None) 178 179 return self.playlist_result(entries, course_id, title)