amara.py (3583B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from .youtube import YoutubeIE 6 from .vimeo import VimeoIE 7 from ..utils import ( 8 int_or_none, 9 parse_iso8601, 10 update_url_query, 11 ) 12 13 14 class AmaraIE(InfoExtractor): 15 _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' 16 _TESTS = [{ 17 # Youtube 18 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', 19 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', 20 'info_dict': { 21 'id': 'h6ZuVdvYnfE', 22 'ext': 'mp4', 23 'title': 'Why jury trials are becoming less common', 24 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1', 25 'thumbnail': r're:^https?://.*\.jpg$', 26 'subtitles': dict, 27 'upload_date': '20160813', 28 'uploader': 'PBS NewsHour', 29 'uploader_id': 'PBSNewsHour', 30 'timestamp': 1549639570, 31 } 32 }, { 33 # Vimeo 34 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', 35 'md5': '99392c75fa05d432a8f11df03612195e', 36 'info_dict': { 37 'id': '18622084', 38 'ext': 'mov', 39 'title': 'Vimeo at CES 2011!', 40 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 41 'thumbnail': r're:^https?://.*\.jpg$', 42 'subtitles': dict, 43 'timestamp': 1294763658, 44 'upload_date': '20110111', 45 'uploader': 'Sam Morrill', 46 'uploader_id': 'sammorrill' 47 } 48 }, { 49 # Direct Link 50 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', 51 'md5': 'd3970f08512738ee60c5807311ff5d3f', 52 'info_dict': { 53 'id': 's8KL7I3jLmh6', 54 'ext': 'mp4', 55 'title': 'The danger of a single story', 56 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', 57 'thumbnail': r're:^https?://.*\.jpg$', 58 'subtitles': dict, 59 'upload_date': '20091007', 60 'timestamp': 1254942511, 61 } 62 }] 63 64 def _real_extract(self, url): 65 video_id = self._match_id(url) 66 meta = self._download_json( 67 'https://amara.org/api/videos/%s/' % video_id, 68 video_id, query={'format': 'json'}) 69 title = meta['title'] 70 video_url = meta['all_urls'][0] 71 72 subtitles = {} 73 for language in (meta.get('languages') or []): 74 subtitles_uri = language.get('subtitles_uri') 75 if not (subtitles_uri and language.get('published')): 76 continue 77 subtitle = subtitles.setdefault(language.get('code') or 'en', []) 78 for f in ('json', 'srt', 'vtt'): 79 subtitle.append({ 80 'ext': f, 81 'url': update_url_query(subtitles_uri, {'format': f}), 82 }) 83 84 info = { 85 'url': video_url, 86 'id': video_id, 87 'subtitles': subtitles, 88 'title': title, 89 'description': meta.get('description'), 90 'thumbnail': meta.get('thumbnail'), 91 'duration': int_or_none(meta.get('duration')), 92 'timestamp': parse_iso8601(meta.get('created')), 93 } 94 95 for ie in (YoutubeIE, VimeoIE): 96 if ie.suitable(video_url): 97 info.update({ 98 '_type': 'url_transparent', 99 'ie_key': ie.ie_key(), 100 }) 101 break 102 103 return info