[Youtube] Construct dash formats with `range` query
authorpukkandan <pukkandan.ytdlp@gmail.com>
Tue, 28 Feb 2023 17:33:44 +0000 (23:03 +0530)
committerdirkf <fieldhouse@gmx.net>
Fri, 3 Mar 2023 15:02:15 +0000 (15:02 +0000)
See yt-dlp/yt_dlp#6369

youtube_dl/extractor/youtube.py

index 4246d84f9e5e8b34d12a6d4c2b24e196605e9e9c..89711c84e442bfeaeb0527df8a452c2966bb2b67 100644 (file)
@@ -1694,8 +1694,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            fmt['url'] = update_url(
-                parsed_fmt_url, query_update={'n': [n_response]})
+            for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
+                fmt_dct['url'] = update_url(
+                    fmt_dct['url'], query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -2047,10 +2048,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if no_video:
                 dct['abr'] = tbr
             if no_audio or no_video:
-                dct['downloader_options'] = {
-                    # Youtube throttles chunks >~10M
-                    'http_chunk_size': 10485760,
-                }
+                CHUNK_SIZE = 10 << 20
+                # avoid Youtube throttling
+                dct.update({
+                    'protocol': 'http_dash_segments',
+                    'fragments': [{
+                        'url': update_url_query(dct['url'], {
+                            'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
+                        })
+                    } for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
+                } if dct['filesize'] else {
+                    'downloader_options': {'http_chunk_size': CHUNK_SIZE}  # No longer useful?
+                })
+
                 if dct.get('ext'):
                     dct['container'] = dct['ext'] + '_dash'
             formats.append(dct)