youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

folketinget.py (2643B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .common import InfoExtractor
      5 from ..compat import compat_parse_qs
      6 from ..utils import (
      7     int_or_none,
      8     parse_duration,
      9     parse_iso8601,
     10     xpath_text,
     11 )
     12 
     13 
     14 class FolketingetIE(InfoExtractor):
     15     IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
     16     _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
     17     _TEST = {
     18         'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
     19         'md5': '6269e8626fa1a891bf5369b386ae996a',
     20         'info_dict': {
     21             'id': '1165642',
     22             'ext': 'mp4',
     23             'title': 'Åbent samråd i Erhvervsudvalget',
     24             'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
     25             'view_count': int,
     26             'width': 768,
     27             'height': 432,
     28             'tbr': 928000,
     29             'timestamp': 1416493800,
     30             'upload_date': '20141120',
     31             'duration': 3960,
     32         },
     33         'params': {
     34             # rtmp download
     35             'skip_download': True,
     36         },
     37     }
     38 
     39     def _real_extract(self, url):
     40         video_id = self._match_id(url)
     41         webpage = self._download_webpage(url, video_id)
     42 
     43         title = self._og_search_title(webpage)
     44         description = self._html_search_regex(
     45             r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
     46             webpage, 'description', fatal=False)
     47 
     48         player_params = compat_parse_qs(self._search_regex(
     49             r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
     50             webpage, 'player params'))
     51         xml_url = player_params['xml'][0]
     52         doc = self._download_xml(xml_url, video_id)
     53 
     54         timestamp = parse_iso8601(xpath_text(doc, './/date'))
     55         duration = parse_duration(xpath_text(doc, './/duration'))
     56         width = int_or_none(xpath_text(doc, './/width'))
     57         height = int_or_none(xpath_text(doc, './/height'))
     58         view_count = int_or_none(xpath_text(doc, './/views'))
     59 
     60         formats = [{
     61             'format_id': n.attrib['bitrate'],
     62             'url': xpath_text(n, './url', fatal=True),
     63             'tbr': int_or_none(n.attrib['bitrate']),
     64         } for n in doc.findall('.//streams/stream')]
     65         self._sort_formats(formats)
     66 
     67         return {
     68             'id': video_id,
     69             'title': title,
     70             'formats': formats,
     71             'description': description,
     72             'timestamp': timestamp,
     73             'width': width,
     74             'height': height,
     75             'duration': duration,
     76             'view_count': view_count,
     77         }