appleconnect.py (1909B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 str_to_int, 7 ExtractorError 8 ) 9 10 11 class AppleConnectIE(InfoExtractor): 12 _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)' 13 _TESTS = [{ 14 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 15 'md5': 'c1d41f72c8bcaf222e089434619316e4', 16 'info_dict': { 17 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', 18 'ext': 'm4v', 19 'title': 'Energy', 20 'uploader': 'Drake', 21 'thumbnail': r're:^https?://.*\.jpg$', 22 'upload_date': '20150710', 23 'timestamp': 1436545535, 24 }, 25 }, { 26 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9', 27 'only_matching': True, 28 }] 29 30 def _real_extract(self, url): 31 video_id = self._match_id(url) 32 webpage = self._download_webpage(url, video_id) 33 34 try: 35 video_json = self._html_search_regex( 36 r'class="auc-video-data">(\{.*?\})', webpage, 'json') 37 except ExtractorError: 38 raise ExtractorError('This post doesn\'t contain a video', expected=True) 39 40 video_data = self._parse_json(video_json, video_id) 41 timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) 42 like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None)) 43 44 return { 45 'id': video_id, 46 'url': video_data['sslSrc'], 47 'title': video_data['title'], 48 'description': video_data['description'], 49 'uploader': video_data['artistName'], 50 'thumbnail': video_data['artworkUrl'], 51 'timestamp': timestamp, 52 'like_count': like_count, 53 }