youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

twitcasting.py (4146B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 import re
      5 
      6 from .common import InfoExtractor
      7 from ..utils import (
      8     clean_html,
      9     float_or_none,
     10     get_element_by_class,
     11     get_element_by_id,
     12     parse_duration,
     13     str_to_int,
     14     unified_timestamp,
     15     urlencode_postdata,
     16 )
     17 
     18 
     19 class TwitCastingIE(InfoExtractor):
     20     _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
     21     _TESTS = [{
     22         'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
     23         'md5': '745243cad58c4681dc752490f7540d7f',
     24         'info_dict': {
     25             'id': '2357609',
     26             'ext': 'mp4',
     27             'title': 'Live #2357609',
     28             'uploader_id': 'ivetesangalo',
     29             'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
     30             'thumbnail': r're:^https?://.*\.jpg$',
     31             'upload_date': '20110822',
     32             'timestamp': 1314010824,
     33             'duration': 32,
     34             'view_count': int,
     35         },
     36         'params': {
     37             'skip_download': True,
     38         },
     39     }, {
     40         'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
     41         'info_dict': {
     42             'id': '3689740',
     43             'ext': 'mp4',
     44             'title': 'Live playing something #3689740',
     45             'uploader_id': 'mttbernardini',
     46             'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
     47             'thumbnail': r're:^https?://.*\.jpg$',
     48             'upload_date': '20120212',
     49             'timestamp': 1329028024,
     50             'duration': 681,
     51             'view_count': int,
     52         },
     53         'params': {
     54             'skip_download': True,
     55             'videopassword': 'abc',
     56         },
     57     }]
     58 
     59     def _real_extract(self, url):
     60         uploader_id, video_id = re.match(self._VALID_URL, url).groups()
     61 
     62         video_password = self._downloader.params.get('videopassword')
     63         request_data = None
     64         if video_password:
     65             request_data = urlencode_postdata({
     66                 'password': video_password,
     67             })
     68         webpage = self._download_webpage(url, video_id, data=request_data)
     69 
     70         title = clean_html(get_element_by_id(
     71             'movietitle', webpage)) or self._html_search_meta(
     72             ['og:title', 'twitter:title'], webpage, fatal=True)
     73 
     74         video_js_data = {}
     75         m3u8_url = self._search_regex(
     76             r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
     77             webpage, 'm3u8 url', group='url', default=None)
     78         if not m3u8_url:
     79             video_js_data = self._parse_json(self._search_regex(
     80                 r"data-movie-playlist='(\[[^']+\])'",
     81                 webpage, 'movie playlist'), video_id)[0]
     82             m3u8_url = video_js_data['source']['url']
     83 
     84         # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
     85         formats = self._extract_m3u8_formats(
     86             m3u8_url, video_id, 'mp4', m3u8_id='hls')
     87 
     88         thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
     89         description = clean_html(get_element_by_id(
     90             'authorcomment', webpage)) or self._html_search_meta(
     91             ['description', 'og:description', 'twitter:description'], webpage)
     92         duration = float_or_none(video_js_data.get(
     93             'duration'), 1000) or parse_duration(clean_html(
     94                 get_element_by_class('tw-player-duration-time', webpage)))
     95         view_count = str_to_int(self._search_regex(
     96             r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
     97         timestamp = unified_timestamp(self._search_regex(
     98             r'data-toggle="true"[^>]+datetime="([^"]+)"',
     99             webpage, 'datetime', None))
    100 
    101         return {
    102             'id': video_id,
    103             'title': title,
    104             'description': description,
    105             'thumbnail': thumbnail,
    106             'timestamp': timestamp,
    107             'uploader_id': uploader_id,
    108             'duration': duration,
    109             'view_count': view_count,
    110             'formats': formats,
    111         }