[extractor/common] improve Akamai HTTP format extraction
authorRemita Amine <remitamine@gmail.com>
Wed, 2 Dec 2020 20:37:14 +0000 (21:37 +0100)
committerRemita Amine <remitamine@gmail.com>
Wed, 2 Dec 2020 20:49:09 +0000 (21:49 +0100)
- Allow m3u8 manifest without an additional audio format
- Fix extraction for qualities starting with a number
Solution provided by @nixxo based on: https://stackoverflow.com/a/5984688

youtube_dl/extractor/common.py

index 16aff885c6b0935d62415668ce38fda452489692..4463e06b37f3f3f4adc70fe37076197150888614 100644 (file)
@@ -2623,7 +2623,7 @@ class InfoExtractor(object):
             REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
             qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
             qualities_length = len(qualities)
-            if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
+            if len(formats) in (qualities_length, qualities_length + 1, qualities_length * 2, qualities_length * 2 + 1):
                 i = 0
                 http_formats = []
                 for f in formats:
@@ -2632,7 +2632,7 @@ class InfoExtractor(object):
                             http_f = f.copy()
                             del http_f['manifest_url']
                             http_url = re.sub(
-                                REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
+                                REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
                             http_f.update({
                                 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
                                 'url': http_url,