fragment.py (10957B)
1 from __future__ import division, unicode_literals 2 3 import os 4 import time 5 import json 6 7 from .common import FileDownloader 8 from .http import HttpFD 9 from ..utils import ( 10 error_to_compat_str, 11 encodeFilename, 12 sanitize_open, 13 sanitized_Request, 14 ) 15 16 17 class HttpQuietDownloader(HttpFD): 18 def to_screen(self, *args, **kargs): 19 pass 20 21 22 class FragmentFD(FileDownloader): 23 """ 24 A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). 25 26 Available options: 27 28 fragment_retries: Number of times to retry a fragment for HTTP error (DASH 29 and hlsnative only) 30 skip_unavailable_fragments: 31 Skip unavailable fragments (DASH and hlsnative only) 32 keep_fragments: Keep downloaded fragments on disk after downloading is 33 finished 34 35 For each incomplete fragment download youtube-dl keeps on disk a special 36 bookkeeping file with download state and metadata (in future such files will 37 be used for any incomplete download handled by youtube-dl). This file is 38 used to properly handle resuming, check download file consistency and detect 39 potential errors. The file has a .ytdl extension and represents a standard 40 JSON file of the following format: 41 42 extractor: 43 Dictionary of extractor related data. TBD. 44 45 downloader: 46 Dictionary of downloader related data. May contain following data: 47 current_fragment: 48 Dictionary with current (being downloaded) fragment data: 49 index: 0-based index of current fragment among all fragments 50 fragment_count: 51 Total count of fragments 52 53 This feature is experimental and file format may change in future. 54 """ 55 56 def report_retry_fragment(self, err, frag_index, count, retries): 57 self.to_screen( 58 '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' 59 % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) 60 61 def report_skip_fragment(self, frag_index): 62 self.to_screen('[download] Skipping fragment %d...' % frag_index) 63 64 def _prepare_url(self, info_dict, url): 65 headers = info_dict.get('http_headers') 66 return sanitized_Request(url, None, headers) if headers else url 67 68 def _prepare_and_start_frag_download(self, ctx): 69 self._prepare_frag_download(ctx) 70 self._start_frag_download(ctx) 71 72 @staticmethod 73 def __do_ytdl_file(ctx): 74 return not ctx['live'] and not ctx['tmpfilename'] == '-' 75 76 def _read_ytdl_file(self, ctx): 77 assert 'ytdl_corrupt' not in ctx 78 stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') 79 try: 80 ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] 81 except Exception: 82 ctx['ytdl_corrupt'] = True 83 finally: 84 stream.close() 85 86 def _write_ytdl_file(self, ctx): 87 frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') 88 downloader = { 89 'current_fragment': { 90 'index': ctx['fragment_index'], 91 }, 92 } 93 if ctx.get('fragment_count') is not None: 94 downloader['fragment_count'] = ctx['fragment_count'] 95 frag_index_stream.write(json.dumps({'downloader': downloader})) 96 frag_index_stream.close() 97 98 def _download_fragment(self, ctx, frag_url, info_dict, headers=None): 99 fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) 100 fragment_info_dict = { 101 'url': frag_url, 102 'http_headers': headers or info_dict.get('http_headers'), 103 } 104 success = ctx['dl'].download(fragment_filename, fragment_info_dict) 105 if not success: 106 return False, None 107 if fragment_info_dict.get('filetime'): 108 ctx['fragment_filetime'] = fragment_info_dict.get('filetime') 109 down, frag_sanitized = sanitize_open(fragment_filename, 'rb') 110 ctx['fragment_filename_sanitized'] = frag_sanitized 111 frag_content = down.read() 112 down.close() 113 return True, frag_content 114 115 def _append_fragment(self, ctx, frag_content): 116 try: 117 ctx['dest_stream'].write(frag_content) 118 ctx['dest_stream'].flush() 119 finally: 120 if self.__do_ytdl_file(ctx): 121 self._write_ytdl_file(ctx) 122 if not self.params.get('keep_fragments', False): 123 os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) 124 del ctx['fragment_filename_sanitized'] 125 126 def _prepare_frag_download(self, ctx): 127 if 'live' not in ctx: 128 ctx['live'] = False 129 if not ctx['live']: 130 total_frags_str = '%d' % ctx['total_frags'] 131 ad_frags = ctx.get('ad_frags', 0) 132 if ad_frags: 133 total_frags_str += ' (not including %d ad)' % ad_frags 134 else: 135 total_frags_str = 'unknown (live)' 136 self.to_screen( 137 '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) 138 self.report_destination(ctx['filename']) 139 dl = HttpQuietDownloader( 140 self.ydl, 141 { 142 'continuedl': True, 143 'quiet': True, 144 'noprogress': True, 145 'ratelimit': self.params.get('ratelimit'), 146 'retries': self.params.get('retries', 0), 147 'nopart': self.params.get('nopart', False), 148 'test': self.params.get('test', False), 149 } 150 ) 151 tmpfilename = self.temp_name(ctx['filename']) 152 open_mode = 'wb' 153 resume_len = 0 154 155 # Establish possible resume length 156 if os.path.isfile(encodeFilename(tmpfilename)): 157 open_mode = 'ab' 158 resume_len = os.path.getsize(encodeFilename(tmpfilename)) 159 160 # Should be initialized before ytdl file check 161 ctx.update({ 162 'tmpfilename': tmpfilename, 163 'fragment_index': 0, 164 }) 165 166 if self.__do_ytdl_file(ctx): 167 if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): 168 self._read_ytdl_file(ctx) 169 is_corrupt = ctx.get('ytdl_corrupt') is True 170 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 171 if is_corrupt or is_inconsistent: 172 message = ( 173 '.ytdl file is corrupt' if is_corrupt else 174 'Inconsistent state of incomplete fragment download') 175 self.report_warning( 176 '%s. Restarting from the beginning...' % message) 177 ctx['fragment_index'] = resume_len = 0 178 if 'ytdl_corrupt' in ctx: 179 del ctx['ytdl_corrupt'] 180 self._write_ytdl_file(ctx) 181 else: 182 self._write_ytdl_file(ctx) 183 assert ctx['fragment_index'] == 0 184 185 dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) 186 187 ctx.update({ 188 'dl': dl, 189 'dest_stream': dest_stream, 190 'tmpfilename': tmpfilename, 191 # Total complete fragments downloaded so far in bytes 192 'complete_frags_downloaded_bytes': resume_len, 193 }) 194 195 def _start_frag_download(self, ctx): 196 resume_len = ctx['complete_frags_downloaded_bytes'] 197 total_frags = ctx['total_frags'] 198 # This dict stores the download progress, it's updated by the progress 199 # hook 200 state = { 201 'status': 'downloading', 202 'downloaded_bytes': resume_len, 203 'fragment_index': ctx['fragment_index'], 204 'fragment_count': total_frags, 205 'filename': ctx['filename'], 206 'tmpfilename': ctx['tmpfilename'], 207 } 208 209 start = time.time() 210 ctx.update({ 211 'started': start, 212 # Amount of fragment's bytes downloaded by the time of the previous 213 # frag progress hook invocation 214 'prev_frag_downloaded_bytes': 0, 215 }) 216 217 def frag_progress_hook(s): 218 if s['status'] not in ('downloading', 'finished'): 219 return 220 221 time_now = time.time() 222 state['elapsed'] = time_now - start 223 frag_total_bytes = s.get('total_bytes') or 0 224 if not ctx['live']: 225 estimated_size = ( 226 (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) 227 / (state['fragment_index'] + 1) * total_frags) 228 state['total_bytes_estimate'] = estimated_size 229 230 if s['status'] == 'finished': 231 state['fragment_index'] += 1 232 ctx['fragment_index'] = state['fragment_index'] 233 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] 234 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] 235 ctx['prev_frag_downloaded_bytes'] = 0 236 else: 237 frag_downloaded_bytes = s['downloaded_bytes'] 238 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] 239 if not ctx['live']: 240 state['eta'] = self.calc_eta( 241 start, time_now, estimated_size - resume_len, 242 state['downloaded_bytes'] - resume_len) 243 state['speed'] = s.get('speed') or ctx.get('speed') 244 ctx['speed'] = state['speed'] 245 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes 246 self._hook_progress(state) 247 248 ctx['dl'].add_progress_hook(frag_progress_hook) 249 250 return start 251 252 def _finish_frag_download(self, ctx): 253 ctx['dest_stream'].close() 254 if self.__do_ytdl_file(ctx): 255 ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) 256 if os.path.isfile(ytdl_filename): 257 os.remove(ytdl_filename) 258 elapsed = time.time() - ctx['started'] 259 260 if ctx['tmpfilename'] == '-': 261 downloaded_bytes = ctx['complete_frags_downloaded_bytes'] 262 else: 263 self.try_rename(ctx['tmpfilename'], ctx['filename']) 264 if self.params.get('updatetime', True): 265 filetime = ctx.get('fragment_filetime') 266 if filetime: 267 try: 268 os.utime(ctx['filename'], (time.time(), filetime)) 269 except Exception: 270 pass 271 downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) 272 273 self._hook_progress({ 274 'downloaded_bytes': downloaded_bytes, 275 'total_bytes': downloaded_bytes, 276 'filename': ctx['filename'], 277 'status': 'finished', 278 'elapsed': elapsed, 279 })