ynet.py (1807B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 import json 6 7 from .common import InfoExtractor 8 from ..compat import compat_urllib_parse_unquote_plus 9 10 11 class YnetIE(InfoExtractor): 12 _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html' 13 _TESTS = [ 14 { 15 'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html', 16 'info_dict': { 17 'id': 'L-11659-99244', 18 'ext': 'flv', 19 'title': 'איש לא יודע מאיפה באנו', 20 'thumbnail': r're:^https?://.*\.jpg', 21 } 22 }, { 23 'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html', 24 'info_dict': { 25 'id': 'L-8859-84418', 26 'ext': 'flv', 27 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין", 28 'thumbnail': r're:^https?://.*\.jpg', 29 } 30 } 31 ] 32 33 def _real_extract(self, url): 34 video_id = self._match_id(url) 35 webpage = self._download_webpage(url, video_id) 36 37 content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage)) 38 config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) 39 f4m_url = config['clip']['url'] 40 title = self._og_search_title(webpage) 41 m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) 42 if m: 43 title = m.group('title') 44 formats = self._extract_f4m_formats(f4m_url, video_id) 45 self._sort_formats(formats) 46 47 return { 48 'id': video_id, 49 'title': title, 50 'formats': formats, 51 'thumbnail': self._og_search_thumbnail(webpage), 52 }