zingmp3.py (5546B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 ExtractorError, 7 int_or_none, 8 ) 9 10 11 class ZingMp3BaseIE(InfoExtractor): 12 _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?:%s)/[^/]+/(?P<id>\w+)\.html' 13 _GEO_COUNTRIES = ['VN'] 14 15 def _extract_item(self, item, fatal): 16 item_id = item['id'] 17 title = item.get('name') or item['title'] 18 19 formats = [] 20 for k, v in (item.get('source') or {}).items(): 21 if not v: 22 continue 23 if k in ('mp4', 'hls'): 24 for res, video_url in v.items(): 25 if not video_url: 26 continue 27 if k == 'hls': 28 formats.extend(self._extract_m3u8_formats( 29 video_url, item_id, 'mp4', 30 'm3u8_native', m3u8_id=k, fatal=False)) 31 elif k == 'mp4': 32 formats.append({ 33 'format_id': 'mp4-' + res, 34 'url': video_url, 35 'height': int_or_none(self._search_regex( 36 r'^(\d+)p', res, 'resolution', default=None)), 37 }) 38 else: 39 formats.append({ 40 'ext': 'mp3', 41 'format_id': k, 42 'tbr': int_or_none(k), 43 'url': self._proto_relative_url(v), 44 'vcodec': 'none', 45 }) 46 if not formats: 47 if not fatal: 48 return 49 msg = item['msg'] 50 if msg == 'Sorry, this content is not available in your country.': 51 self.raise_geo_restricted(countries=self._GEO_COUNTRIES) 52 raise ExtractorError(msg, expected=True) 53 self._sort_formats(formats) 54 55 subtitles = None 56 lyric = item.get('lyric') 57 if lyric: 58 subtitles = { 59 'origin': [{ 60 'url': lyric, 61 }], 62 } 63 64 album = item.get('album') or {} 65 66 return { 67 'id': item_id, 68 'title': title, 69 'formats': formats, 70 'thumbnail': item.get('thumbnail'), 71 'subtitles': subtitles, 72 'duration': int_or_none(item.get('duration')), 73 'track': title, 74 'artist': item.get('artists_names'), 75 'album': album.get('name') or album.get('title'), 76 'album_artist': album.get('artists_names'), 77 } 78 79 def _real_extract(self, url): 80 page_id = self._match_id(url) 81 webpage = self._download_webpage( 82 url.replace('://zingmp3.vn/', '://mp3.zing.vn/'), 83 page_id, query={'play_song': 1}) 84 data_path = self._search_regex( 85 r'data-xml="([^"]+)', webpage, 'data path') 86 return self._process_data(self._download_json( 87 'https://mp3.zing.vn/xhr' + data_path, page_id)['data']) 88 89 90 class ZingMp3IE(ZingMp3BaseIE): 91 _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip' 92 _TESTS = [{ 93 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 94 'md5': 'ead7ae13693b3205cbc89536a077daed', 95 'info_dict': { 96 'id': 'ZWZB9WAB', 97 'title': 'Xa Mãi Xa', 98 'ext': 'mp3', 99 'thumbnail': r're:^https?://.+\.jpg', 100 'subtitles': { 101 'origin': [{ 102 'ext': 'lrc', 103 }] 104 }, 105 'duration': 255, 106 'track': 'Xa Mãi Xa', 107 'artist': 'Bảo Thy', 108 'album': 'Special Album', 109 'album_artist': 'Bảo Thy', 110 }, 111 }, { 112 'url': 'https://mp3.zing.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', 113 'md5': 'e9c972b693aa88301ef981c8151c4343', 114 'info_dict': { 115 'id': 'ZO8ZF7C7', 116 'title': 'Sương Hoa Đưa Lối', 117 'ext': 'mp4', 118 'thumbnail': r're:^https?://.+\.jpg', 119 'duration': 207, 120 'track': 'Sương Hoa Đưa Lối', 121 'artist': 'K-ICM, RYO', 122 }, 123 }, { 124 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 125 'only_matching': True, 126 }] 127 IE_NAME = 'zingmp3' 128 IE_DESC = 'mp3.zing.vn' 129 130 def _process_data(self, data): 131 return self._extract_item(data, True) 132 133 134 class ZingMp3AlbumIE(ZingMp3BaseIE): 135 _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'album|playlist' 136 _TESTS = [{ 137 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 138 'info_dict': { 139 '_type': 'playlist', 140 'id': 'ZWZBWDAF', 141 'title': 'Lâu Đài Tình Ái', 142 }, 143 'playlist_count': 10, 144 }, { 145 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 146 'only_matching': True, 147 }, { 148 'url': 'https://zingmp3.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 149 'only_matching': True, 150 }] 151 IE_NAME = 'zingmp3:album' 152 153 def _process_data(self, data): 154 def entries(): 155 for item in (data.get('items') or []): 156 entry = self._extract_item(item, False) 157 if entry: 158 yield entry 159 info = data.get('info') or {} 160 return self.playlist_result( 161 entries(), info.get('id'), info.get('name') or info.get('title'))