youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

ccc.py (3875B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..utils import (
      6     int_or_none,
      7     parse_iso8601,
      8     try_get,
      9     url_or_none,
     10 )
     11 
     12 
     13 class CCCIE(InfoExtractor):
     14     IE_NAME = 'media.ccc.de'
     15     _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)'
     16 
     17     _TESTS = [{
     18         'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',
     19         'md5': '3a1eda8f3a29515d27f5adb967d7e740',
     20         'info_dict': {
     21             'id': '1839',
     22             'ext': 'mp4',
     23             'title': 'Introduction to Processor Design',
     24             'creator': 'byterazor',
     25             'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
     26             'thumbnail': r're:^https?://.*\.jpg$',
     27             'upload_date': '20131228',
     28             'timestamp': 1388188800,
     29             'duration': 3710,
     30             'tags': list,
     31         }
     32     }, {
     33         'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
     34         'only_matching': True,
     35     }]
     36 
     37     def _real_extract(self, url):
     38         display_id = self._match_id(url)
     39         webpage = self._download_webpage(url, display_id)
     40         event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id')
     41         event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id)
     42 
     43         formats = []
     44         for recording in event_data.get('recordings', []):
     45             recording_url = recording.get('recording_url')
     46             if not recording_url:
     47                 continue
     48             language = recording.get('language')
     49             folder = recording.get('folder')
     50             format_id = None
     51             if language:
     52                 format_id = language
     53             if folder:
     54                 if language:
     55                     format_id += '-' + folder
     56                 else:
     57                     format_id = folder
     58             vcodec = 'h264' if 'h264' in folder else (
     59                 'none' if folder in ('mp3', 'opus') else None
     60             )
     61             formats.append({
     62                 'format_id': format_id,
     63                 'url': recording_url,
     64                 'width': int_or_none(recording.get('width')),
     65                 'height': int_or_none(recording.get('height')),
     66                 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024),
     67                 'language': language,
     68                 'vcodec': vcodec,
     69             })
     70         self._sort_formats(formats)
     71 
     72         return {
     73             'id': event_id,
     74             'display_id': display_id,
     75             'title': event_data['title'],
     76             'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
     77             'description': event_data.get('description'),
     78             'thumbnail': event_data.get('thumb_url'),
     79             'timestamp': parse_iso8601(event_data.get('date')),
     80             'duration': int_or_none(event_data.get('length')),
     81             'tags': event_data.get('tags'),
     82             'formats': formats,
     83         }
     84 
     85 
     86 class CCCPlaylistIE(InfoExtractor):
     87     IE_NAME = 'media.ccc.de:lists'
     88     _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
     89     _TESTS = [{
     90         'url': 'https://media.ccc.de/c/30c3',
     91         'info_dict': {
     92             'title': '30C3',
     93             'id': '30c3',
     94         },
     95         'playlist_count': 135,
     96     }]
     97 
     98     def _real_extract(self, url):
     99         playlist_id = self._match_id(url).lower()
    100 
    101         conf = self._download_json(
    102             'https://media.ccc.de/public/conferences/' + playlist_id,
    103             playlist_id)
    104 
    105         entries = []
    106         for e in conf['events']:
    107             event_url = url_or_none(e.get('frontend_link'))
    108             if event_url:
    109                 entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
    110 
    111         return self.playlist_result(entries, playlist_id, conf.get('title'))