youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

mlb.py (9355B)


      1 from __future__ import unicode_literals
      2 
      3 import re
      4 
      5 from .common import InfoExtractor
      6 from ..utils import (
      7     determine_ext,
      8     int_or_none,
      9     parse_duration,
     10     parse_iso8601,
     11     try_get,
     12 )
     13 
     14 
     15 class MLBBaseIE(InfoExtractor):
     16     def _real_extract(self, url):
     17         display_id = self._match_id(url)
     18         video = self._download_video_data(display_id)
     19         video_id = video['id']
     20         title = video['title']
     21         feed = self._get_feed(video)
     22 
     23         formats = []
     24         for playback in (feed.get('playbacks') or []):
     25             playback_url = playback.get('url')
     26             if not playback_url:
     27                 continue
     28             name = playback.get('name')
     29             ext = determine_ext(playback_url)
     30             if ext == 'm3u8':
     31                 formats.extend(self._extract_m3u8_formats(
     32                     playback_url, video_id, 'mp4',
     33                     'm3u8_native', m3u8_id=name, fatal=False))
     34             else:
     35                 f = {
     36                     'format_id': name,
     37                     'url': playback_url,
     38                 }
     39                 mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
     40                 if mobj:
     41                     f.update({
     42                         'height': int(mobj.group(3)),
     43                         'tbr': int(mobj.group(1)),
     44                         'width': int(mobj.group(2)),
     45                     })
     46                 mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
     47                 if mobj:
     48                     f.update({
     49                         'fps': int(mobj.group(3)),
     50                         'height': int(mobj.group(2)),
     51                         'tbr': int(mobj.group(4)),
     52                         'width': int(mobj.group(1)),
     53                     })
     54                 formats.append(f)
     55         self._sort_formats(formats)
     56 
     57         thumbnails = []
     58         for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
     59             src = cut.get('src')
     60             if not src:
     61                 continue
     62             thumbnails.append({
     63                 'height': int_or_none(cut.get('height')),
     64                 'url': src,
     65                 'width': int_or_none(cut.get('width')),
     66             })
     67 
     68         language = (video.get('language') or 'EN').lower()
     69 
     70         return {
     71             'id': video_id,
     72             'title': title,
     73             'formats': formats,
     74             'description': video.get('description'),
     75             'duration': parse_duration(feed.get('duration')),
     76             'thumbnails': thumbnails,
     77             'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
     78             'subtitles': self._extract_mlb_subtitles(feed, language),
     79         }
     80 
     81 
     82 class MLBIE(MLBBaseIE):
     83     _VALID_URL = r'''(?x)
     84                     https?://
     85                         (?:[\da-z_-]+\.)*mlb\.com/
     86                         (?:
     87                             (?:
     88                                 (?:[^/]+/)*video/[^/]+/c-|
     89                                 (?:
     90                                     shared/video/embed/(?:embed|m-internal-embed)\.html|
     91                                     (?:[^/]+/)+(?:play|index)\.jsp|
     92                                 )\?.*?\bcontent_id=
     93                             )
     94                             (?P<id>\d+)
     95                         )
     96                     '''
     97     _TESTS = [
     98         {
     99             'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
    100             'md5': '632358dacfceec06bad823b83d21df2d',
    101             'info_dict': {
    102                 'id': '34698933',
    103                 'ext': 'mp4',
    104                 'title': "Ackley's spectacular catch",
    105                 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
    106                 'duration': 66,
    107                 'timestamp': 1405995000,
    108                 'upload_date': '20140722',
    109                 'thumbnail': r're:^https?://.*\.jpg$',
    110             },
    111         },
    112         {
    113             'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
    114             'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
    115             'info_dict': {
    116                 'id': '34496663',
    117                 'ext': 'mp4',
    118                 'title': 'Stanton prepares for Derby',
    119                 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
    120                 'duration': 46,
    121                 'timestamp': 1405120200,
    122                 'upload_date': '20140711',
    123                 'thumbnail': r're:^https?://.*\.jpg$',
    124             },
    125         },
    126         {
    127             'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
    128             'md5': '99bb9176531adc600b90880fb8be9328',
    129             'info_dict': {
    130                 'id': '34578115',
    131                 'ext': 'mp4',
    132                 'title': 'Cespedes repeats as Derby champ',
    133                 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
    134                 'duration': 488,
    135                 'timestamp': 1405414336,
    136                 'upload_date': '20140715',
    137                 'thumbnail': r're:^https?://.*\.jpg$',
    138             },
    139         },
    140         {
    141             'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
    142             'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
    143             'info_dict': {
    144                 'id': '34577915',
    145                 'ext': 'mp4',
    146                 'title': 'Bautista on Home Run Derby',
    147                 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
    148                 'duration': 52,
    149                 'timestamp': 1405405122,
    150                 'upload_date': '20140715',
    151                 'thumbnail': r're:^https?://.*\.jpg$',
    152             },
    153         },
    154         {
    155             'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
    156             'only_matching': True,
    157         },
    158         {
    159             'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
    160             'only_matching': True,
    161         },
    162         {
    163             'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
    164             'only_matching': True,
    165         },
    166         {
    167             'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
    168             'only_matching': True,
    169         },
    170         {
    171             'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
    172             'only_matching': True,
    173         },
    174         {
    175             # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
    176             'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
    177             'only_matching': True,
    178         },
    179     ]
    180     _TIMESTAMP_KEY = 'date'
    181 
    182     @staticmethod
    183     def _get_feed(video):
    184         return video
    185 
    186     @staticmethod
    187     def _extract_mlb_subtitles(feed, language):
    188         subtitles = {}
    189         for keyword in (feed.get('keywordsAll') or []):
    190             keyword_type = keyword.get('type')
    191             if keyword_type and keyword_type.startswith('closed_captions_location_'):
    192                 cc_location = keyword.get('value')
    193                 if cc_location:
    194                     subtitles.setdefault(language, []).append({
    195                         'url': cc_location,
    196                     })
    197         return subtitles
    198 
    199     def _download_video_data(self, display_id):
    200         return self._download_json(
    201             'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
    202             display_id)
    203 
    204 
    205 class MLBVideoIE(MLBBaseIE):
    206     _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
    207     _TEST = {
    208         'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
    209         'md5': '632358dacfceec06bad823b83d21df2d',
    210         'info_dict': {
    211             'id': 'c04a8863-f569-42e6-9f87-992393657614',
    212             'ext': 'mp4',
    213             'title': "Ackley's spectacular catch",
    214             'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
    215             'duration': 66,
    216             'timestamp': 1405995000,
    217             'upload_date': '20140722',
    218             'thumbnail': r're:^https?://.+',
    219         },
    220     }
    221     _TIMESTAMP_KEY = 'timestamp'
    222 
    223     @classmethod
    224     def suitable(cls, url):
    225         return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
    226 
    227     @staticmethod
    228     def _get_feed(video):
    229         return video['feeds'][0]
    230 
    231     @staticmethod
    232     def _extract_mlb_subtitles(feed, language):
    233         subtitles = {}
    234         for cc_location in (feed.get('closedCaptions') or []):
    235             subtitles.setdefault(language, []).append({
    236                 'url': cc_location,
    237             })
    238 
    239     def _download_video_data(self, display_id):
    240         # https://www.mlb.com/data-service/en/videos/[SLUG]
    241         return self._download_json(
    242             'https://fastball-gateway.mlb.com/graphql',
    243             display_id, query={
    244                 'query': '''{
    245   mediaPlayback(ids: "%s") {
    246     description
    247     feeds(types: CMS) {
    248       closedCaptions
    249       duration
    250       image {
    251         cuts {
    252           width
    253           height
    254           src
    255         }
    256       }
    257       playbacks {
    258         name
    259         url
    260       }
    261     }
    262     id
    263     timestamp
    264     title
    265   }
    266 }''' % display_id,
    267             })['data']['mediaPlayback'][0]