radiojavan.py (2761B)
1 from __future__ import unicode_literals 2 3 import re 4 5 from .common import InfoExtractor 6 from ..utils import ( 7 parse_resolution, 8 str_to_int, 9 unified_strdate, 10 urlencode_postdata, 11 urljoin, 12 ) 13 14 15 class RadioJavanIE(InfoExtractor): 16 _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' 17 _TEST = { 18 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 19 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', 20 'info_dict': { 21 'id': 'chaartaar-ashoobam', 22 'ext': 'mp4', 23 'title': 'Chaartaar - Ashoobam', 24 'thumbnail': r're:^https?://.*\.jpe?g$', 25 'upload_date': '20150215', 26 'view_count': int, 27 'like_count': int, 28 'dislike_count': int, 29 } 30 } 31 32 def _real_extract(self, url): 33 video_id = self._match_id(url) 34 35 download_host = self._download_json( 36 'https://www.radiojavan.com/videos/video_host', video_id, 37 data=urlencode_postdata({'id': video_id}), 38 headers={ 39 'Content-Type': 'application/x-www-form-urlencoded', 40 'Referer': url, 41 }).get('host', 'https://host1.rjmusicmedia.com') 42 43 webpage = self._download_webpage(url, video_id) 44 45 formats = [] 46 for format_id, _, video_path in re.findall( 47 r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', 48 webpage): 49 f = parse_resolution(format_id) 50 f.update({ 51 'url': urljoin(download_host, video_path), 52 'format_id': format_id, 53 }) 54 formats.append(f) 55 self._sort_formats(formats) 56 57 title = self._og_search_title(webpage) 58 thumbnail = self._og_search_thumbnail(webpage) 59 60 upload_date = unified_strdate(self._search_regex( 61 r'class="date_added">Date added: ([^<]+)<', 62 webpage, 'upload date', fatal=False)) 63 64 view_count = str_to_int(self._search_regex( 65 r'class="views">Plays: ([\d,]+)', 66 webpage, 'view count', fatal=False)) 67 like_count = str_to_int(self._search_regex( 68 r'class="rating">([\d,]+) likes', 69 webpage, 'like count', fatal=False)) 70 dislike_count = str_to_int(self._search_regex( 71 r'class="rating">([\d,]+) dislikes', 72 webpage, 'dislike count', fatal=False)) 73 74 return { 75 'id': video_id, 76 'title': title, 77 'thumbnail': thumbnail, 78 'upload_date': upload_date, 79 'view_count': view_count, 80 'like_count': like_count, 81 'dislike_count': dislike_count, 82 'formats': formats, 83 }