youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 825abb81759d76e53127644a45e1d6cb7ff4f654
parent 8944ec0109b1e9c847f178755123d5453400dd50
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu, 24 Jul 2014 10:41:12 +0200

[jsinterp] Implement splice and general improvement

I still get 403s on YouTube though.

Diffstat:
Myoutube_dl/jsinterp.py | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 58 insertions(+), 28 deletions(-)

diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .utils import ( @@ -40,8 +41,9 @@ class JSInterpreter(object): assign = lambda v: v expr = stmt[len('return '):] else: - raise ExtractorError( - 'Cannot determine left side of statement in %r' % stmt) + # Try interpreting it as an expression + expr = stmt + assign = lambda v: v v = self.interpret_expression(expr, local_vars, allow_recursion) return assign(v) @@ -53,35 +55,62 @@ class JSInterpreter(object): if expr.isalpha(): return local_vars[expr] - m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr) + try: + return json.loads(expr) + except ValueError: + pass + + m = re.match( + r'^(?P<var>[a-z]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', + expr) if m: + variable = m.group('var') member = m.group('member') - variable = m.group('in') + arg_str = m.group('args') - if variable not in local_vars: + if variable in local_vars: + obj = local_vars[variable] + else: if variable not in self._objects: self._objects[variable] = self.extract_object(variable) obj = self._objects[variable] - key, args = member.split('(', 1) - args = args.strip(')') - argvals = [int(v) if v.isdigit() else local_vars[v] - for v in args.split(',')] - return obj[key](argvals) - - val = local_vars[variable] - if member == 'split("")': - return list(val) - if member == 'join("")': - return ''.join(val) - if member == 'length': - return len(val) - if member == 'reverse()': - return val[::-1] - slice_m = re.match(r'slice\((?P<idx>.*)\)', member) - if slice_m: - idx = self.interpret_expression( - slice_m.group('idx'), local_vars, allow_recursion - 1) - return val[idx:] + + if arg_str is None: + # Member access + if member == 'length': + return len(obj) + return obj[member] + + assert expr.endswith(')') + # Function call + if arg_str == '': + argvals = tuple() + else: + argvals = tuple([ + self.interpret_expression(v, local_vars, allow_recursion) + for v in arg_str.split(',')]) + + if member == 'split': + assert argvals == ('',) + return list(obj) + if member == 'join': + assert len(argvals) == 1 + return argvals[0].join(obj) + if member == 'reverse': + assert len(argvals) == 0 + return obj[::-1] + if member == 'slice': + assert len(argvals) == 1 + return obj[argvals[0]:] + if member == 'splice': + assert isinstance(obj, list) + index, howMany = argvals + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(i)) + return res + + return obj[member](argvals) m = re.match( r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) @@ -100,13 +129,14 @@ class JSInterpreter(object): return a % b m = re.match( - r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) + r'^(?P<func>[.a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) if m: fname = m.group('func') + argvals = tuple([ + int(v) if v.isdigit() else local_vars[v] + for v in m.group('args').split(',')]) if fname not in self._functions: self._functions[fname] = self.extract_function(fname) - argvals = [int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')] return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr)