youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

amara.py (3583B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from .youtube import YoutubeIE
      6 from .vimeo import VimeoIE
      7 from ..utils import (
      8     int_or_none,
      9     parse_iso8601,
     10     update_url_query,
     11 )
     12 
     13 
     14 class AmaraIE(InfoExtractor):
     15     _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
     16     _TESTS = [{
     17         # Youtube
     18         'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
     19         'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
     20         'info_dict': {
     21             'id': 'h6ZuVdvYnfE',
     22             'ext': 'mp4',
     23             'title': 'Why jury trials are becoming less common',
     24             'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
     25             'thumbnail': r're:^https?://.*\.jpg$',
     26             'subtitles': dict,
     27             'upload_date': '20160813',
     28             'uploader': 'PBS NewsHour',
     29             'uploader_id': 'PBSNewsHour',
     30             'timestamp': 1549639570,
     31         }
     32     }, {
     33         # Vimeo
     34         'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
     35         'md5': '99392c75fa05d432a8f11df03612195e',
     36         'info_dict': {
     37             'id': '18622084',
     38             'ext': 'mov',
     39             'title': 'Vimeo at CES 2011!',
     40             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
     41             'thumbnail': r're:^https?://.*\.jpg$',
     42             'subtitles': dict,
     43             'timestamp': 1294763658,
     44             'upload_date': '20110111',
     45             'uploader': 'Sam Morrill',
     46             'uploader_id': 'sammorrill'
     47         }
     48     }, {
     49         # Direct Link
     50         'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
     51         'md5': 'd3970f08512738ee60c5807311ff5d3f',
     52         'info_dict': {
     53             'id': 's8KL7I3jLmh6',
     54             'ext': 'mp4',
     55             'title': 'The danger of a single story',
     56             'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
     57             'thumbnail': r're:^https?://.*\.jpg$',
     58             'subtitles': dict,
     59             'upload_date': '20091007',
     60             'timestamp': 1254942511,
     61         }
     62     }]
     63 
     64     def _real_extract(self, url):
     65         video_id = self._match_id(url)
     66         meta = self._download_json(
     67             'https://amara.org/api/videos/%s/' % video_id,
     68             video_id, query={'format': 'json'})
     69         title = meta['title']
     70         video_url = meta['all_urls'][0]
     71 
     72         subtitles = {}
     73         for language in (meta.get('languages') or []):
     74             subtitles_uri = language.get('subtitles_uri')
     75             if not (subtitles_uri and language.get('published')):
     76                 continue
     77             subtitle = subtitles.setdefault(language.get('code') or 'en', [])
     78             for f in ('json', 'srt', 'vtt'):
     79                 subtitle.append({
     80                     'ext': f,
     81                     'url': update_url_query(subtitles_uri, {'format': f}),
     82                 })
     83 
     84         info = {
     85             'url': video_url,
     86             'id': video_id,
     87             'subtitles': subtitles,
     88             'title': title,
     89             'description': meta.get('description'),
     90             'thumbnail': meta.get('thumbnail'),
     91             'duration': int_or_none(meta.get('duration')),
     92             'timestamp': parse_iso8601(meta.get('created')),
     93         }
     94 
     95         for ie in (YoutubeIE, VimeoIE):
     96             if ie.suitable(video_url):
     97                 info.update({
     98                     '_type': 'url_transparent',
     99                     'ie_key': ie.ie_key(),
    100                 })
    101                 break
    102 
    103         return info