youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

cartoonnetwork.py (2374B)


      1 # coding: utf-8
      2 from __future__ import unicode_literals
      3 
      4 from .turner import TurnerBaseIE
      5 from ..utils import int_or_none
      6 
      7 
      8 class CartoonNetworkIE(TurnerBaseIE):
      9     _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
     10     _TEST = {
     11         'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
     12         'info_dict': {
     13             'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
     14             'ext': 'mp4',
     15             'title': 'How to Draw Upgrade',
     16             'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
     17         },
     18         'params': {
     19             # m3u8 download
     20             'skip_download': True,
     21         },
     22     }
     23 
     24     def _real_extract(self, url):
     25         display_id = self._match_id(url)
     26         webpage = self._download_webpage(url, display_id)
     27 
     28         def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
     29             metadata_re = ''
     30             if content_re:
     31                 metadata_re = r'|video_metadata\.content_' + content_re
     32             return self._search_regex(
     33                 r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
     34                 webpage, name, fatal=fatal)
     35 
     36         media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
     37         title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
     38 
     39         info = self._extract_ngtv_info(
     40             media_id, {'networkId': 'cartoonnetwork'}, {
     41                 'url': url,
     42                 'site_name': 'CartoonNetwork',
     43                 'auth_required': find_field('authType', 'auth type') != 'unauth',
     44             })
     45 
     46         series = find_field(
     47             'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
     48         info.update({
     49             'id': media_id,
     50             'display_id': display_id,
     51             'title': title,
     52             'description': self._html_search_meta('description', webpage),
     53             'series': series,
     54             'episode': title,
     55         })
     56 
     57         for field in ('season', 'episode'):
     58             field_name = field + 'Number'
     59             info[field + '_number'] = int_or_none(find_field(
     60                 field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
     61 
     62         return info