youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

discovery.py (4912B)


      1 from __future__ import unicode_literals
      2 
      3 import random
      4 import re
      5 import string
      6 
      7 from .discoverygo import DiscoveryGoBaseIE
      8 from ..compat import compat_urllib_parse_unquote
      9 from ..utils import ExtractorError
     10 from ..compat import compat_HTTPError
     11 
     12 
     13 class DiscoveryIE(DiscoveryGoBaseIE):
     14     _VALID_URL = r'''(?x)https?://
     15         (?P<site>
     16             go\.discovery|
     17             www\.
     18                 (?:
     19                     investigationdiscovery|
     20                     discoverylife|
     21                     animalplanet|
     22                     ahctv|
     23                     destinationamerica|
     24                     sciencechannel|
     25                     tlc
     26                 )|
     27             watch\.
     28                 (?:
     29                     hgtv|
     30                     foodnetwork|
     31                     travelchannel|
     32                     diynetwork|
     33                     cookingchanneltv|
     34                     motortrend
     35                 )
     36         )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
     37     _TESTS = [{
     38         'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
     39         'info_dict': {
     40             'id': '5a2f35ce6b66d17a5026e29e',
     41             'ext': 'mp4',
     42             'title': 'Riding with Matthew Perry',
     43             'description': 'md5:a34333153e79bc4526019a5129e7f878',
     44             'duration': 84,
     45         },
     46         'params': {
     47             'skip_download': True,  # requires ffmpeg
     48         }
     49     }, {
     50         'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
     51         'only_matching': True,
     52     }, {
     53         'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
     54         'only_matching': True,
     55     }, {
     56         # using `show_slug` is important to get the correct video data
     57         'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
     58         'only_matching': True,
     59     }]
     60     _GEO_COUNTRIES = ['US']
     61     _GEO_BYPASS = False
     62     _API_BASE_URL = 'https://api.discovery.com/v1/'
     63 
     64     def _real_extract(self, url):
     65         site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
     66 
     67         access_token = None
     68         cookies = self._get_cookies(url)
     69 
     70         # prefer Affiliate Auth Token over Anonymous Auth Token
     71         auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
     72         if auth_storage_cookie and auth_storage_cookie.value:
     73             auth_storage = self._parse_json(compat_urllib_parse_unquote(
     74                 compat_urllib_parse_unquote(auth_storage_cookie.value)),
     75                 display_id, fatal=False) or {}
     76             access_token = auth_storage.get('a') or auth_storage.get('access_token')
     77 
     78         if not access_token:
     79             access_token = self._download_json(
     80                 'https://%s.com/anonymous' % site, display_id,
     81                 'Downloading token JSON metadata', query={
     82                     'authRel': 'authorization',
     83                     'client_id': '3020a40c2356a645b4b4',
     84                     'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
     85                     'redirectUri': 'https://www.discovery.com/',
     86                 })['access_token']
     87 
     88         headers = self.geo_verification_headers()
     89         headers['Authorization'] = 'Bearer ' + access_token
     90 
     91         try:
     92             video = self._download_json(
     93                 self._API_BASE_URL + 'content/videos',
     94                 display_id, 'Downloading content JSON metadata',
     95                 headers=headers, query={
     96                     'embed': 'show.name',
     97                     'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
     98                     'slug': display_id,
     99                     'show_slug': show_slug,
    100                 })[0]
    101             video_id = video['id']
    102             stream = self._download_json(
    103                 self._API_BASE_URL + 'streaming/video/' + video_id,
    104                 display_id, 'Downloading streaming JSON metadata', headers=headers)
    105         except ExtractorError as e:
    106             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
    107                 e_description = self._parse_json(
    108                     e.cause.read().decode(), display_id)['description']
    109                 if 'resource not available for country' in e_description:
    110                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
    111                 if 'Authorized Networks' in e_description:
    112                     raise ExtractorError(
    113                         'This video is only available via cable service provider subscription that'
    114                         ' is not currently supported. You may want to use --cookies.', expected=True)
    115                 raise ExtractorError(e_description)
    116             raise
    117 
    118         return self._extract_video_info(video, stream, display_id)