youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

netzkino.py (3050B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     clean_html,
      9     int_or_none,
     10     js_to_json,
     11     parse_iso8601,
     12 )
     13 
     14 
     15 class NetzkinoIE(InfoExtractor):
     16     _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
     17 
     18     _TEST = {
     19         'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
     20         'md5': '92a3f8b76f8d7220acce5377ea5d4873',
     21         'info_dict': {
     22             'id': 'rakete-zum-mond',
     23             'ext': 'mp4',
     24             'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
     25             'comments': 'mincount:3',
     26             'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
     27             'upload_date': '20120813',
     28             'thumbnail': r're:https?://.*\.jpg$',
     29             'timestamp': 1344858571,
     30             'age_limit': 12,
     31         },
     32         'params': {
     33             'skip_download': 'Download only works from Germany',
     34         }
     35     }
     36 
     37     def _real_extract(self, url):
     38         mobj = re.match(self._VALID_URL, url)
     39         category_id = mobj.group('category')
     40         video_id = mobj.group('id')
     41 
     42         api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
     43         api_info = self._download_json(api_url, video_id)
     44         info = next(
     45             p for p in api_info['posts'] if p['slug'] == video_id)
     46         custom_fields = info['custom_fields']
     47 
     48         production_js = self._download_webpage(
     49             'http://www.netzkino.de/beta/dist/production.min.js', video_id,
     50             note='Downloading player code')
     51         avo_js = self._search_regex(
     52             r'var urlTemplate=(\{.*?"\})',
     53             production_js, 'URL templates')
     54         templates = self._parse_json(
     55             avo_js, video_id, transform_source=js_to_json)
     56 
     57         suffix = {
     58             'hds': '.mp4/manifest.f4m',
     59             'hls': '.mp4/master.m3u8',
     60             'pmd': '.mp4',
     61         }
     62         film_fn = custom_fields['Streaming'][0]
     63         formats = [{
     64             'format_id': key,
     65             'ext': 'mp4',
     66             'url': tpl.replace('{}', film_fn) + suffix[key],
     67         } for key, tpl in templates.items()]
     68         self._sort_formats(formats)
     69 
     70         comments = [{
     71             'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
     72             'id': c['id'],
     73             'author': c['name'],
     74             'html': c['content'],
     75             'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
     76         } for c in info.get('comments', [])]
     77 
     78         return {
     79             'id': video_id,
     80             'formats': formats,
     81             'comments': comments,
     82             'title': info['title'],
     83             'age_limit': int_or_none(custom_fields.get('FSK')[0]),
     84             'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
     85             'description': clean_html(info.get('content')),
     86             'thumbnail': info.get('thumbnail'),
     87             'playlist_title': api_info.get('title'),
     88             'playlist_id': category_id,
     89         }