youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

ustudio.py (4392B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     int_or_none,
      8     unified_strdate,
      9     unescapeHTML,
     10 )
     11 
     12 
     13 class UstudioIE(InfoExtractor):
     14     IE_NAME = 'ustudio'
     15     _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
     16     _TEST = {
     17         'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge',
     18         'md5': '58bbfca62125378742df01fc2abbdef6',
     19         'info_dict': {
     20             'id': 'Uxu2my9bgSph',
     21             'display_id': 'san_francisco_golden_gate_bridge',
     22             'ext': 'mp4',
     23             'title': 'San Francisco: Golden Gate Bridge',
     24             'description': 'md5:23925500697f2c6d4830e387ba51a9be',
     25             'thumbnail': r're:^https?://.*\.jpg$',
     26             'upload_date': '20111107',
     27             'uploader': 'Tony Farley',
     28         }
     29     }
     30 
     31     def _real_extract(self, url):
     32         video_id, display_id = re.match(self._VALID_URL, url).groups()
     33 
     34         config = self._download_xml(
     35             'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id,
     36             display_id)
     37 
     38         def extract(kind):
     39             return [{
     40                 'url': unescapeHTML(item.attrib['url']),
     41                 'width': int_or_none(item.get('width')),
     42                 'height': int_or_none(item.get('height')),
     43             } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
     44 
     45         formats = extract('video')
     46         self._sort_formats(formats)
     47 
     48         webpage = self._download_webpage(url, display_id)
     49 
     50         title = self._og_search_title(webpage)
     51         upload_date = unified_strdate(self._search_regex(
     52             r'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>',
     53             webpage, 'upload date', fatal=False))
     54         uploader = self._search_regex(
     55             r'Uploaded by\s*<a[^>]*>([^<]+)<',
     56             webpage, 'uploader', fatal=False)
     57 
     58         return {
     59             'id': video_id,
     60             'display_id': display_id,
     61             'title': title,
     62             'description': self._og_search_description(webpage),
     63             'thumbnails': extract('image'),
     64             'upload_date': upload_date,
     65             'uploader': uploader,
     66             'formats': formats,
     67         }
     68 
     69 
     70 class UstudioEmbedIE(InfoExtractor):
     71     IE_NAME = 'ustudio:embed'
     72     _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)'
     73     _TEST = {
     74         'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T',
     75         'md5': '47c0be52a09b23a7f40de9469cec58f4',
     76         'info_dict': {
     77             'id': 'Uw7G1kMCe65T',
     78             'ext': 'mp4',
     79             'title': '5 Things IT Should Know About Video',
     80             'description': 'md5:93d32650884b500115e158c5677d25ad',
     81             'uploader_id': 'DeN7VdYRDKhP',
     82         }
     83     }
     84 
     85     def _real_extract(self, url):
     86         uploader_id, video_id = re.match(self._VALID_URL, url).groups()
     87         video_data = self._download_json(
     88             'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id),
     89             video_id)['videos'][0]
     90         title = video_data['name']
     91 
     92         formats = []
     93         for ext, qualities in video_data.get('transcodes', {}).items():
     94             for quality in qualities:
     95                 quality_url = quality.get('url')
     96                 if not quality_url:
     97                     continue
     98                 height = int_or_none(quality.get('height'))
     99                 formats.append({
    100                     'format_id': '%s-%dp' % (ext, height) if height else ext,
    101                     'url': quality_url,
    102                     'width': int_or_none(quality.get('width')),
    103                     'height': height,
    104                 })
    105         self._sort_formats(formats)
    106 
    107         thumbnails = []
    108         for image in video_data.get('images', []):
    109             image_url = image.get('url')
    110             if not image_url:
    111                 continue
    112             thumbnails.append({
    113                 'url': image_url,
    114             })
    115 
    116         return {
    117             'id': video_id,
    118             'title': title,
    119             'description': video_data.get('description'),
    120             'duration': int_or_none(video_data.get('duration')),
    121             'uploader_id': uploader_id,
    122             'tags': video_data.get('keywords'),
    123             'thumbnails': thumbnails,
    124             'formats': formats,
    125         }