abcnews.py (6400B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import re 5 6 from .amp import AMPIE 7 from .common import InfoExtractor 8 from ..utils import ( 9 parse_duration, 10 parse_iso8601, 11 try_get, 12 ) 13 14 15 class AbcNewsVideoIE(AMPIE): 16 IE_NAME = 'abcnews:video' 17 _VALID_URL = r'''(?x) 18 https?:// 19 (?: 20 abcnews\.go\.com/ 21 (?: 22 (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-| 23 video/(?:embed|itemfeed)\?.*?\bid= 24 )| 25 fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ 26 ) 27 (?P<id>\d+) 28 ''' 29 30 _TESTS = [{ 31 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', 32 'info_dict': { 33 'id': '20411932', 34 'ext': 'mp4', 35 'display_id': 'week-exclusive-irans-foreign-minister-zarif', 36 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', 37 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', 38 'duration': 180, 39 'thumbnail': r're:^https?://.*\.jpg$', 40 'timestamp': 1380454200, 41 'upload_date': '20130929', 42 }, 43 'params': { 44 # m3u8 download 45 'skip_download': True, 46 }, 47 }, { 48 'url': 'http://abcnews.go.com/video/embed?id=46979033', 49 'only_matching': True, 50 }, { 51 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', 52 'only_matching': True, 53 }, { 54 'url': 'http://abcnews.go.com/video/itemfeed?id=46979033', 55 'only_matching': True, 56 }, { 57 'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761', 58 'only_matching': True, 59 }] 60 61 def _real_extract(self, url): 62 mobj = re.match(self._VALID_URL, url) 63 display_id = mobj.group('display_id') 64 video_id = mobj.group('id') 65 info_dict = self._extract_feed_info( 66 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) 67 info_dict.update({ 68 'id': video_id, 69 'display_id': display_id, 70 }) 71 return info_dict 72 73 74 class AbcNewsIE(InfoExtractor): 75 IE_NAME = 'abcnews' 76 _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' 77 78 _TESTS = [{ 79 # Youtube Embeds 80 'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501', 81 'info_dict': { 82 'id': '51286501', 83 'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player", 84 'description': 'Billingsley went from a child actor to Hollywood power player.', 85 }, 86 'playlist_count': 5, 87 }, { 88 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', 89 'info_dict': { 90 'id': '38897857', 91 'ext': 'mp4', 92 'title': 'Justin Timberlake Drops Hints For Secret Single', 93 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', 94 'upload_date': '20160505', 95 'timestamp': 1462442280, 96 }, 97 'params': { 98 # m3u8 download 99 'skip_download': True, 100 # The embedded YouTube video is blocked due to copyright issues 101 'playlist_items': '1', 102 }, 103 'add_ie': ['AbcNewsVideo'], 104 }, { 105 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 106 'only_matching': True, 107 }, { 108 # inline.type == 'video' 109 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', 110 'only_matching': True, 111 }] 112 113 def _real_extract(self, url): 114 story_id = self._match_id(url) 115 webpage = self._download_webpage(url, story_id) 116 story = self._parse_json(self._search_regex( 117 r"window\['__abcnews__'\]\s*=\s*({.+?});", 118 webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0] 119 article_contents = story.get('articleContents') or {} 120 121 def entries(): 122 featured_video = story.get('featuredVideo') or {} 123 feed = try_get(featured_video, lambda x: x['video']['feed']) 124 if feed: 125 yield { 126 '_type': 'url', 127 'id': featured_video.get('id'), 128 'title': featured_video.get('name'), 129 'url': feed, 130 'thumbnail': featured_video.get('images'), 131 'description': featured_video.get('description'), 132 'timestamp': parse_iso8601(featured_video.get('uploadDate')), 133 'duration': parse_duration(featured_video.get('duration')), 134 'ie_key': AbcNewsVideoIE.ie_key(), 135 } 136 137 for inline in (article_contents.get('inlines') or []): 138 inline_type = inline.get('type') 139 if inline_type == 'iframe': 140 iframe_url = try_get(inline, lambda x: x['attrs']['src']) 141 if iframe_url: 142 yield self.url_result(iframe_url) 143 elif inline_type == 'video': 144 video_id = inline.get('id') 145 if video_id: 146 yield { 147 '_type': 'url', 148 'id': video_id, 149 'url': 'http://abcnews.go.com/video/embed?id=' + video_id, 150 'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'), 151 'description': inline.get('description'), 152 'duration': parse_duration(inline.get('duration')), 153 'ie_key': AbcNewsVideoIE.ie_key(), 154 } 155 156 return self.playlist_result( 157 entries(), story_id, article_contents.get('headline'), 158 article_contents.get('subHead'))