youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit be64b5b098e3563d563bcf091f6f74edf22d7764
parent c3e74731c2acd2878a38c2a2daa2ed0c11352762
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue, 25 Nov 2014 09:54:54 +0100

[xminus] Simplify and extend (#4302)

Diffstat:
Mtest/test_utils.py | 10++++++++++
Myoutube_dl/extractor/xminus.py | 54++++++++++++++++++++++++++++++++++--------------------
Myoutube_dl/utils.py | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 95 insertions(+), 20 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -47,6 +47,7 @@ from youtube_dl.utils import ( js_to_json, intlist_to_bytes, args_to_str, + parse_filesize, ) @@ -367,5 +368,14 @@ class TestUtil(unittest.TestCase): 'foo ba/r -baz \'2 be\' \'\'' ) + def test_parse_filesize(self): + self.assertEqual(parse_filesize(None), None) + self.assertEqual(parse_filesize(''), None) + self.assertEqual(parse_filesize('91 B'), 91) + self.assertEqual(parse_filesize('foobar'), None) + self.assertEqual(parse_filesize('2 MiB'), 2097152) + self.assertEqual(parse_filesize('5 GB'), 5000000000) + self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py @@ -2,7 +2,14 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import ( + compat_chr, + compat_ord, +) +from ..utils import ( + int_or_none, + parse_filesize, +) class XMinusIE(InfoExtractor): @@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor): 'ext': 'mp3', 'title': 'Леонид Агутин-Песенка шофера', 'duration': 156, + 'tbr': 320, + 'filesize_approx': 5900000, + 'view_count': int, } } def _real_extract(self, url): video_id = self._match_id(url) - - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) + artist = self._html_search_regex( - r'minus_track.artist="(.+?)"', webpage, 'artist') + r'minus_track\.artist="(.+?)"', webpage, 'artist') title = artist + '-' + self._html_search_regex( - r'minus_track.title="(.+?)"', webpage, 'title') + r'minus_track\.title="(.+?)"', webpage, 'title') duration = int_or_none(self._html_search_regex( - r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration')) + r'minus_track\.dur_sec=\'([0-9]*?)\'', + webpage, 'duration', fatal=False)) + filesize_approx = parse_filesize(self._html_search_regex( + r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])', + webpage, 'approximate filesize', fatal=False)) + tbr = int_or_none(self._html_search_regex( + r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps', + webpage, 'bitrate', fatal=False)) + view_count = int_or_none(self._html_search_regex( + r'<div class="quality.*?► ([0-9]+)', + webpage, 'view count', fatal=False)) + enc_token = self._html_search_regex( r'data-mt="(.*?)"', webpage, 'enc_token') - token = self._decode_token(enc_token) - url = 'http://x-minus.org/dwlf/{}/{}.mp3'.format(video_id, token) + token = ''.join( + c if pos == 3 else compat_chr(compat_ord(c) - 1) + for pos, c in enumerate(reversed(enc_token))) + video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token) return { 'id': video_id, 'title': title, - 'url': url, + 'url': video_url, 'duration': duration, + 'filesize_approx': filesize_approx, + 'tbr': tbr, + 'view_count': view_count, } - - def _decode_token(self, enc_token): - token = '' - pos = 0 - for c in reversed(enc_token): - if pos != 3: - token += chr(ord(c) - 1) - else: - token += c - pos += 1 - return token diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1046,6 +1046,57 @@ def format_bytes(bytes): return '%.2f%s' % (converted, suffix) +def parse_filesize(s): + if s is None: + return None + + # The lower-case forms are of course incorrect and inofficial, + # but we support those too + _UNIT_TABLE = { + 'B': 1, + 'b': 1, + 'KiB': 1024, + 'KB': 1000, + 'kB': 1024, + 'Kb': 1000, + 'MiB': 1024 ** 2, + 'MB': 1000 ** 2, + 'mB': 1024 ** 2, + 'Mb': 1000 ** 2, + 'GiB': 1024 ** 3, + 'GB': 1000 ** 3, + 'gB': 1024 ** 3, + 'Gb': 1000 ** 3, + 'TiB': 1024 ** 4, + 'TB': 1000 ** 4, + 'tB': 1024 ** 4, + 'Tb': 1000 ** 4, + 'PiB': 1024 ** 5, + 'PB': 1000 ** 5, + 'pB': 1024 ** 5, + 'Pb': 1000 ** 5, + 'EiB': 1024 ** 6, + 'EB': 1000 ** 6, + 'eB': 1024 ** 6, + 'Eb': 1000 ** 6, + 'ZiB': 1024 ** 7, + 'ZB': 1000 ** 7, + 'zB': 1024 ** 7, + 'Zb': 1000 ** 7, + 'YiB': 1024 ** 8, + 'YB': 1000 ** 8, + 'yB': 1024 ** 8, + 'Yb': 1000 ** 8, + } + + units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE) + m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s) + if not m: + return None + + return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) + + def get_term_width(): columns = compat_getenv('COLUMNS', None) if columns: