youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

xstream.py (3981B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     int_or_none,
      9     parse_iso8601,
     10     xpath_with_ns,
     11     xpath_text,
     12     find_xpath_attr,
     13 )
     14 
     15 
     16 class XstreamIE(InfoExtractor):
     17     _VALID_URL = r'''(?x)
     18                     (?:
     19                         xstream:|
     20                         https?://frontend\.xstream\.(?:dk|net)/
     21                     )
     22                     (?P<partner_id>[^/]+)
     23                     (?:
     24                         :|
     25                         /feed/video/\?.*?\bid=
     26                     )
     27                     (?P<id>\d+)
     28                     '''
     29     _TESTS = [{
     30         'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588',
     31         'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
     32         'info_dict': {
     33             'id': '86588',
     34             'ext': 'mov',
     35             'title': 'Otto Wollertsen',
     36             'description': 'Vestlendingen Otto Fredrik Wollertsen',
     37             'timestamp': 1430473209,
     38             'upload_date': '20150501',
     39         },
     40     }, {
     41         'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039',
     42         'only_matching': True,
     43     }]
     44 
     45     def _extract_video_info(self, partner_id, video_id):
     46         data = self._download_xml(
     47             'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
     48             % (partner_id, video_id),
     49             video_id)
     50 
     51         NS_MAP = {
     52             'atom': 'http://www.w3.org/2005/Atom',
     53             'xt': 'http://xstream.dk/',
     54             'media': 'http://search.yahoo.com/mrss/',
     55         }
     56 
     57         entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
     58 
     59         title = xpath_text(
     60             entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
     61         description = xpath_text(
     62             entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
     63         timestamp = parse_iso8601(xpath_text(
     64             entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
     65 
     66         formats = []
     67         media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
     68         for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
     69             media_url = media_content.get('url')
     70             if not media_url:
     71                 continue
     72             tbr = int_or_none(media_content.get('bitrate'))
     73             mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
     74             if mobj:
     75                 formats.append({
     76                     'url': mobj.group('url'),
     77                     'play_path': 'mp4:%s' % mobj.group('playpath'),
     78                     'app': mobj.group('app'),
     79                     'ext': 'flv',
     80                     'tbr': tbr,
     81                     'format_id': 'rtmp-%d' % tbr,
     82                 })
     83             else:
     84                 formats.append({
     85                     'url': media_url,
     86                     'tbr': tbr,
     87                 })
     88         self._sort_formats(formats)
     89 
     90         link = find_xpath_attr(
     91             entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
     92         if link is not None:
     93             formats.append({
     94                 'url': link.get('href'),
     95                 'format_id': link.get('rel'),
     96                 'preference': 1,
     97             })
     98 
     99         thumbnails = [{
    100             'url': splash.get('url'),
    101             'width': int_or_none(splash.get('width')),
    102             'height': int_or_none(splash.get('height')),
    103         } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
    104 
    105         return {
    106             'id': video_id,
    107             'title': title,
    108             'description': description,
    109             'timestamp': timestamp,
    110             'formats': formats,
    111             'thumbnails': thumbnails,
    112         }
    113 
    114     def _real_extract(self, url):
    115         mobj = re.match(self._VALID_URL, url)
    116         partner_id = mobj.group('partner_id')
    117         video_id = mobj.group('id')
    118 
    119         return self._extract_video_info(partner_id, video_id)