youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

zapiks.py (3832B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     parse_duration,
      9     parse_iso8601,
     10     xpath_with_ns,
     11     xpath_text,
     12     int_or_none,
     13 )
     14 
     15 
     16 class ZapiksIE(InfoExtractor):
     17     _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
     18     _TESTS = [
     19         {
     20             'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
     21             'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
     22             'info_dict': {
     23                 'id': '80798',
     24                 'ext': 'mp4',
     25                 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
     26                 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
     27                 'thumbnail': r're:^https?://.*\.jpg$',
     28                 'duration': 528,
     29                 'timestamp': 1359044972,
     30                 'upload_date': '20130124',
     31                 'view_count': int,
     32             },
     33         },
     34         {
     35             'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
     36             'only_matching': True,
     37         },
     38         {
     39             'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
     40             'only_matching': True,
     41         },
     42         {
     43             'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
     44             'only_matching': True,
     45         },
     46     ]
     47 
     48     def _real_extract(self, url):
     49         mobj = re.match(self._VALID_URL, url)
     50         video_id = mobj.group('id')
     51         display_id = mobj.group('display_id') or video_id
     52 
     53         webpage = self._download_webpage(url, display_id)
     54 
     55         if not video_id:
     56             video_id = self._search_regex(
     57                 r'data-media-id="(\d+)"', webpage, 'video id')
     58 
     59         playlist = self._download_xml(
     60             'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
     61             display_id)
     62 
     63         NS_MAP = {
     64             'jwplayer': 'http://rss.jwpcdn.com/'
     65         }
     66 
     67         def ns(path):
     68             return xpath_with_ns(path, NS_MAP)
     69 
     70         item = playlist.find('./channel/item')
     71 
     72         title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
     73         description = self._og_search_description(webpage, default=None)
     74         thumbnail = xpath_text(
     75             item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
     76         duration = parse_duration(self._html_search_meta(
     77             'duration', webpage, 'duration', default=None))
     78         timestamp = parse_iso8601(self._html_search_meta(
     79             'uploadDate', webpage, 'upload date', default=None), ' ')
     80 
     81         view_count = int_or_none(self._search_regex(
     82             r'UserPlays:(\d+)', webpage, 'view count', default=None))
     83         comment_count = int_or_none(self._search_regex(
     84             r'UserComments:(\d+)', webpage, 'comment count', default=None))
     85 
     86         formats = []
     87         for source in item.findall(ns('./jwplayer:source')):
     88             format_id = source.attrib['label']
     89             f = {
     90                 'url': source.attrib['file'],
     91                 'format_id': format_id,
     92             }
     93             m = re.search(r'^(?P<height>\d+)[pP]', format_id)
     94             if m:
     95                 f['height'] = int(m.group('height'))
     96             formats.append(f)
     97         self._sort_formats(formats)
     98 
     99         return {
    100             'id': video_id,
    101             'title': title,
    102             'description': description,
    103             'thumbnail': thumbnail,
    104             'duration': duration,
    105             'timestamp': timestamp,
    106             'view_count': view_count,
    107             'comment_count': comment_count,
    108             'formats': formats,
    109         }