[channel9] Add low quality formats and modernize

author Sergey M․ <dstftw@gmail.com>

Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)

committer Sergey M․ <dstftw@gmail.com>

Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)
author Sergey M․ <dstftw@gmail.com>
Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)
committer Sergey M․ <dstftw@gmail.com>
Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py

index 1ce004932bb9de3225c0a964149acb994aeabc1e..3a88181d84dad91d930b03460ae2a3e33eaa6f8a 100644 (file)
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -3,7 +3,11 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    parse_filesize,
+    qualities,
+)
  
  
  class Channel9IE(InfoExtractor):
@@ -52,23 +56,6 @@ class Channel9IE(InfoExtractor):
  
      _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
  
-    # Sorted by quality
-    _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
-
-    def _restore_bytes(self, formatted_size):
-        if not formatted_size:
-            return 0
-        m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
-        if not m:
-            return 0
-        units = m.group('units')
-        try:
-            exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
-        except ValueError:
-            return 0
-        size = float(m.group('size'))
-        return int(size * (1024 ** exponent))
-
      def _formats_from_html(self, html):
          FORMAT_REGEX = r'''
              (?x)
@@ -78,16 +65,20 @@ class Channel9IE(InfoExtractor):
              <h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
              </div>)?                                                # File size part may be missing
          '''
-        # Extract known formats
+        quality = qualities((
+            'MP3', 'MP4',
+            'Low Quality WMV', 'Low Quality MP4',
+            'Mid Quality WMV', 'Mid Quality MP4',
+            'High Quality WMV', 'High Quality MP4'))
          formats = [{
              'url': x.group('url'),
              'format_id': x.group('quality'),
              'format_note': x.group('note'),
              'format': '%s (%s)' % (x.group('quality'), x.group('note')),
-            'filesize': self._restore_bytes(x.group('filesize')),  # File size is approximate
-            'preference': self._known_formats.index(x.group('quality')),
+            'filesize_approx': parse_filesize(x.group('filesize')),
+            'quality': quality(x.group('quality')),
              'vcodec': 'none' if x.group('note') == 'Audio only' else None,
-        } for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
+        } for x in list(re.finditer(FORMAT_REGEX, html))]
  
          self._sort_formats(formats)
author	Sergey M․ <dstftw@gmail.com>
	Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Wed, 14 Oct 2015 19:52:25 +0000 (01:52 +0600)