[voicerepublic] Detect list of available formats from the web page
authorDuncan <duncan@vtllf.org>
Sun, 10 May 2015 04:03:09 +0000 (16:03 +1200)
committerDuncan <duncan@vtllf.org>
Sun, 10 May 2015 04:03:09 +0000 (16:03 +1200)
youtube_dl/extractor/voicerepublic.py

index d3e35a815b6844df4f5e193ea1f83262f0b0aa6a..d150b5b5e275bcc402c3d50aae0603cc28aa81b4 100644 (file)
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..compat import compat_urllib_request
 from ..utils import ExtractorError
@@ -32,12 +34,15 @@ class VoiceRepublicIE(InfoExtractor):
         if '<a>Queued for processing, please stand by...</a>' in webpage:
             raise ExtractorError('Audio is still queued for processing')
 
+        ext_matches = re.finditer(r'data-\w+=\'/vrmedia/\d+-clean\.(\w+)\'', webpage)
+        exts = [match.group(1) for match in ext_matches]
+
         formats = [{
             'url': 'https://voicerepublic.com/vrmedia/{}-clean.{}'.format(video_id, ext),
             'ext': ext,
             'format_id': ext,
             'vcodec': 'none',
-        } for ext in ['m4a', 'mp3', 'ogg']]
+        } for ext in exts]
         self._sort_formats(formats)
 
         return {