box.py (3795B)
1 # coding: utf-8 2 from __future__ import unicode_literals 3 4 import json 5 import re 6 7 from .common import InfoExtractor 8 from ..utils import ( 9 determine_ext, 10 parse_iso8601, 11 # try_get, 12 update_url_query, 13 ) 14 15 16 class BoxIE(InfoExtractor): 17 _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)' 18 _TEST = { 19 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 20 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', 21 'info_dict': { 22 'id': '510727257538', 23 'ext': 'mp4', 24 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4', 25 'uploader': 'MLS Video', 26 'timestamp': 1566320259, 27 'upload_date': '20190820', 28 'uploader_id': '235196876', 29 } 30 } 31 32 def _real_extract(self, url): 33 shared_name, file_id = re.match(self._VALID_URL, url).groups() 34 webpage = self._download_webpage(url, file_id) 35 request_token = self._parse_json(self._search_regex( 36 r'Box\.config\s*=\s*({.+?});', webpage, 37 'Box config'), file_id)['requestToken'] 38 access_token = self._download_json( 39 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, 40 'Downloading token JSON metadata', 41 data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ 42 'Content-Type': 'application/json', 43 'X-Request-Token': request_token, 44 'X-Box-EndUser-API': 'sharedName=' + shared_name, 45 })[file_id]['read'] 46 shared_link = 'https://app.box.com/s/' + shared_name 47 f = self._download_json( 48 'https://api.box.com/2.0/files/' + file_id, file_id, 49 'Downloading file JSON metadata', headers={ 50 'Authorization': 'Bearer ' + access_token, 51 'BoxApi': 'shared_link=' + shared_link, 52 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats 53 }, query={ 54 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' 55 }) 56 title = f['name'] 57 58 query = { 59 'access_token': access_token, 60 'shared_link': shared_link 61 } 62 63 formats = [] 64 65 # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []): 66 # entry_url_template = try_get( 67 # entry, lambda x: x['content']['url_template']) 68 # if not entry_url_template: 69 # continue 70 # representation = entry.get('representation') 71 # if representation == 'dash': 72 # TODO: append query to every fragment URL 73 # formats.extend(self._extract_mpd_formats( 74 # entry_url_template.replace('{+asset_path}', 'manifest.mpd'), 75 # file_id, query=query)) 76 77 authenticated_download_url = f.get('authenticated_download_url') 78 if authenticated_download_url and f.get('is_download_available'): 79 formats.append({ 80 'ext': f.get('extension') or determine_ext(title), 81 'filesize': f.get('size'), 82 'format_id': 'download', 83 'url': update_url_query(authenticated_download_url, query), 84 }) 85 86 self._sort_formats(formats) 87 88 creator = f.get('created_by') or {} 89 90 return { 91 'id': file_id, 92 'title': title, 93 'formats': formats, 94 'description': f.get('description') or None, 95 'uploader': creator.get('name'), 96 'timestamp': parse_iso8601(f.get('created_at')), 97 'uploader_id': creator.get('id'), 98 }