youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

indavideo.py (4415B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..compat import compat_str
      8 from ..utils import (
      9     int_or_none,
     10     parse_age_limit,
     11     parse_iso8601,
     12     update_url_query,
     13 )
     14 
     15 
     16 class IndavideoEmbedIE(InfoExtractor):
     17     _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
     18     _TESTS = [{
     19         'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
     20         'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
     21         'info_dict': {
     22             'id': '1837039',
     23             'ext': 'mp4',
     24             'title': 'Cicatánc',
     25             'description': '',
     26             'thumbnail': r're:^https?://.*\.jpg$',
     27             'uploader': 'cukiajanlo',
     28             'uploader_id': '83729',
     29             'timestamp': 1439193826,
     30             'upload_date': '20150810',
     31             'duration': 72,
     32             'age_limit': 0,
     33             'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
     34         },
     35     }, {
     36         'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
     37         'only_matching': True,
     38     }, {
     39         'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
     40         'only_matching': True,
     41     }]
     42 
     43     # Some example URLs covered by generic extractor:
     44     #   http://indavideo.hu/video/Vicces_cica_1
     45     #   http://index.indavideo.hu/video/2015_0728_beregszasz
     46     #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
     47     #   http://erotika.indavideo.hu/video/Amator_tini_punci
     48     #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
     49     #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
     50 
     51     @staticmethod
     52     def _extract_urls(webpage):
     53         return re.findall(
     54             r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
     55             webpage)
     56 
     57     def _real_extract(self, url):
     58         video_id = self._match_id(url)
     59 
     60         video = self._download_json(
     61             'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
     62             video_id)['data']
     63 
     64         title = video['title']
     65 
     66         video_urls = []
     67 
     68         video_files = video.get('video_files')
     69         if isinstance(video_files, list):
     70             video_urls.extend(video_files)
     71         elif isinstance(video_files, dict):
     72             video_urls.extend(video_files.values())
     73 
     74         video_file = video.get('video_file')
     75         if video:
     76             video_urls.append(video_file)
     77         video_urls = list(set(video_urls))
     78 
     79         video_prefix = video_urls[0].rsplit('/', 1)[0]
     80 
     81         for flv_file in video.get('flv_files', []):
     82             flv_url = '%s/%s' % (video_prefix, flv_file)
     83             if flv_url not in video_urls:
     84                 video_urls.append(flv_url)
     85 
     86         filesh = video.get('filesh')
     87 
     88         formats = []
     89         for video_url in video_urls:
     90             height = int_or_none(self._search_regex(
     91                 r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
     92             if filesh:
     93                 if not height:
     94                     continue
     95                 token = filesh.get(compat_str(height))
     96                 if token is None:
     97                     continue
     98                 video_url = update_url_query(video_url, {'token': token})
     99             formats.append({
    100                 'url': video_url,
    101                 'height': height,
    102             })
    103         self._sort_formats(formats)
    104 
    105         timestamp = video.get('date')
    106         if timestamp:
    107             # upload date is in CEST
    108             timestamp = parse_iso8601(timestamp + ' +0200', ' ')
    109 
    110         thumbnails = [{
    111             'url': self._proto_relative_url(thumbnail)
    112         } for thumbnail in video.get('thumbnails', [])]
    113 
    114         tags = [tag['title'] for tag in video.get('tags') or []]
    115 
    116         return {
    117             'id': video.get('id') or video_id,
    118             'title': title,
    119             'description': video.get('description'),
    120             'thumbnails': thumbnails,
    121             'uploader': video.get('user_name'),
    122             'uploader_id': video.get('user_id'),
    123             'timestamp': timestamp,
    124             'duration': int_or_none(video.get('length')),
    125             'age_limit': parse_age_limit(video.get('age_limit')),
    126             'tags': tags,
    127             'formats': formats,
    128         }