ccc.py (3875B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 from .common import InfoExtractor 5 from ..utils import ( 6 int_or_none, 7 parse_iso8601, 8 try_get, 9 url_or_none, 10 ) 11 12 13 class CCCIE(InfoExtractor): 14 IE_NAME = 'media.ccc.de' 15 _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)' 16 17 _TESTS = [{ 18 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video', 19 'md5': '3a1eda8f3a29515d27f5adb967d7e740', 20 'info_dict': { 21 'id': '1839', 22 'ext': 'mp4', 23 'title': 'Introduction to Processor Design', 24 'creator': 'byterazor', 25 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', 26 'thumbnail': r're:^https?://.*\.jpg$', 27 'upload_date': '20131228', 28 'timestamp': 1388188800, 29 'duration': 3710, 30 'tags': list, 31 } 32 }, { 33 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', 34 'only_matching': True, 35 }] 36 37 def _real_extract(self, url): 38 display_id = self._match_id(url) 39 webpage = self._download_webpage(url, display_id) 40 event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') 41 event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) 42 43 formats = [] 44 for recording in event_data.get('recordings', []): 45 recording_url = recording.get('recording_url') 46 if not recording_url: 47 continue 48 language = recording.get('language') 49 folder = recording.get('folder') 50 format_id = None 51 if language: 52 format_id = language 53 if folder: 54 if language: 55 format_id += '-' + folder 56 else: 57 format_id = folder 58 vcodec = 'h264' if 'h264' in folder else ( 59 'none' if folder in ('mp3', 'opus') else None 60 ) 61 formats.append({ 62 'format_id': format_id, 63 'url': recording_url, 64 'width': int_or_none(recording.get('width')), 65 'height': int_or_none(recording.get('height')), 66 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024), 67 'language': language, 68 'vcodec': vcodec, 69 }) 70 self._sort_formats(formats) 71 72 return { 73 'id': event_id, 74 'display_id': display_id, 75 'title': event_data['title'], 76 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), 77 'description': event_data.get('description'), 78 'thumbnail': event_data.get('thumb_url'), 79 'timestamp': parse_iso8601(event_data.get('date')), 80 'duration': int_or_none(event_data.get('length')), 81 'tags': event_data.get('tags'), 82 'formats': formats, 83 } 84 85 86 class CCCPlaylistIE(InfoExtractor): 87 IE_NAME = 'media.ccc.de:lists' 88 _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)' 89 _TESTS = [{ 90 'url': 'https://media.ccc.de/c/30c3', 91 'info_dict': { 92 'title': '30C3', 93 'id': '30c3', 94 }, 95 'playlist_count': 135, 96 }] 97 98 def _real_extract(self, url): 99 playlist_id = self._match_id(url).lower() 100 101 conf = self._download_json( 102 'https://media.ccc.de/public/conferences/' + playlist_id, 103 playlist_id) 104 105 entries = [] 106 for e in conf['events']: 107 event_url = url_or_none(e.get('frontend_link')) 108 if event_url: 109 entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) 110 111 return self.playlist_result(entries, playlist_id, conf.get('title'))