theintercept.py (1801B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..compat import compat_str 6 from ..utils import ( 7 parse_iso8601, 8 int_or_none, 9 ExtractorError, 10 ) 11 12 13 class TheInterceptIE(InfoExtractor): 14 _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)' 15 _TESTS = [{ 16 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', 17 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', 18 'info_dict': { 19 'id': '46214', 20 'ext': 'mp4', 21 'title': '#ThisIsACoup – Episode Four: Surrender or Die', 22 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140', 23 'timestamp': 1450429239, 24 'upload_date': '20151218', 25 'comment_count': int, 26 } 27 }] 28 29 def _real_extract(self, url): 30 display_id = self._match_id(url) 31 webpage = self._download_webpage(url, display_id) 32 33 json_data = self._parse_json(self._search_regex( 34 r'initialStoreTree\s*=\s*(?P<json_data>{.+})', webpage, 35 'initialStoreTree'), display_id) 36 37 for post in json_data['resources']['posts'].values(): 38 if post['slug'] == display_id: 39 return { 40 '_type': 'url_transparent', 41 'url': 'jwplatform:%s' % post['fov_videoid'], 42 'id': compat_str(post['ID']), 43 'display_id': display_id, 44 'title': post['title'], 45 'description': post.get('excerpt'), 46 'timestamp': parse_iso8601(post.get('date')), 47 'comment_count': int_or_none(post.get('comments_number')), 48 } 49 raise ExtractorError('Unable to find the current post')