import math
import re
-from youtube_dl.jsinterp import JSInterpreter
-undefined = JSInterpreter.undefined
+from youtube_dl.compat import compat_re_Pattern
+
+from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('''
function x() { return undefined; }
''')
- self.assertIs(jsi.call_function('x'), undefined)
+ self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { let v; return v; }
''')
- self.assertIs(jsi.call_function('x'), undefined)
+ self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
jsi = JSInterpreter('''
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
''')
- self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined])
+ self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
self.assertEqual(jsi.call_function('x'), 42)
jsi = JSInterpreter('''
function x() { let a; return a?.qq; }
''')
- self.assertIs(jsi.call_function('x'), undefined)
+ self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
''')
- self.assertIs(jsi.call_function('x'), undefined)
+ self.assertIs(jsi.call_function('x'), JS_Undefined)
def test_regex(self):
jsi = JSInterpreter('''
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; return a; }
''')
- # Pythons disagree on the type of a pattern
- self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile(''))))
+ self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/i; return a; }
''')
- self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
+ self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
if __name__ == '__main__':
compat_str,
)
-_NAME_RE = r'[a-zA-Z_$][\w$]*'
-
-_UNDEFINED = object()
-
def _js_bit_op(op):
def wrapped(a, b):
def zeroise(x):
- return 0 if x in (None, _UNDEFINED) else x
+ return 0 if x in (None, JS_Undefined) else x
return op(zeroise(a), zeroise(b))
return wrapped
def _js_arith_op(op):
def wrapped(a, b):
- if _UNDEFINED in (a, b):
+ if JS_Undefined in (a, b):
return float('nan')
return op(a or 0, b or 0)
def _js_div(a, b):
- if _UNDEFINED in (a, b) or not (a and b):
+ if JS_Undefined in (a, b) or not (a and b):
return float('nan')
return float('inf') if not b else operator.truediv(a or 0, b)
def _js_mod(a, b):
- if _UNDEFINED in (a, b) or not b:
+ if JS_Undefined in (a, b) or not b:
return float('nan')
return (a or 0) % b
def _js_exp(a, b):
if not b:
- # even 0 ** 0 !!
- return 1
- if _UNDEFINED in (a, b):
+ return 1 # even 0 ** 0 !!
+ elif JS_Undefined in (a, b):
return float('nan')
return (a or 0) ** b
def _js_eq_op(op):
def wrapped(a, b):
- if set((a, b)) <= set((None, _UNDEFINED)):
+ if set((a, b)) <= set((None, JS_Undefined)):
return op(a, a)
return op(a, b)
def _js_comp_op(op):
def wrapped(a, b):
- if _UNDEFINED in (a, b):
+ if JS_Undefined in (a, b):
return False
return op(a or 0, b or 0)
return wrapped
+def _js_ternary(cndn, if_true=True, if_false=False):
+ """Simulate JS's ternary operator (cndn?if_true:if_false)"""
+ if cndn in (False, None, 0, '', JS_Undefined):
+ return if_false
+ try:
+ if math.isnan(cndn): # NB: NaN cannot be checked by membership
+ return if_false
+ except TypeError:
+ pass
+ return if_true
+
+
# (op, definition) in order of binding priority, tightest first
# avoid dict to maintain order
# definition None => Defined in JSInterpreter._operator
-_DOT_OPERATORS = (
- ('.', None),
- # TODO: ('?.', None),
-)
-
_OPERATORS = (
('>>', _js_bit_op(operator.rshift)),
('<<', _js_bit_op(operator.lshift)),
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
_QUOTES = '\'"/'
-def _ternary(cndn, if_true=True, if_false=False):
- """Simulate JS's ternary operator (cndn?if_true:if_false)"""
- if cndn in (False, None, 0, '', _UNDEFINED):
- return if_false
- try:
- if math.isnan(cndn): # NB: NaN cannot be checked by membership
- return if_false
- except TypeError:
- pass
- return if_true
+class JS_Undefined(object):
+ pass
class JS_Break(ExtractorError):
try:
return super(LocalNameSpace, self).__getitem__(key)
except KeyError:
- return _UNDEFINED
+ return JS_Undefined
def __setitem__(self, key, value):
for scope in self.maps:
def __delitem__(self, key):
raise NotImplementedError('Deleting is not supported')
- # except
- def pop(self, key, *args):
- try:
- off = self.__getitem__(key)
- super(LocalNameSpace, self).__delitem__(key)
- return off
- except KeyError:
- if len(args) > 0:
- return args[0]
- raise
-
- def __contains__(self, key):
- try:
- super(LocalNameSpace, self).__getitem__(key)
- return True
- except KeyError:
- return False
-
def __repr__(self):
return 'LocalNameSpace%s' % (self.maps, )
class JSInterpreter(object):
__named_object_counter = 0
- undefined = _UNDEFINED
-
- RE_FLAGS = {
+ _RE_FLAGS = {
# special knowledge: Python's re flags are bitmask values, current max 128
# invent new bitmask values well above that for literal parsing
# TODO: new pattern class to execute matches with these flags
if not expr:
return flags, expr
for idx, ch in enumerate(expr):
- if ch not in cls.RE_FLAGS:
+ if ch not in cls._RE_FLAGS:
break
- flags |= cls.RE_FLAGS[ch]
- return flags, expr[idx:] if idx > 0 else expr
+ flags |= cls._RE_FLAGS[ch]
+ return flags, expr[idx + 1:]
@classmethod
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
if not escaping and char in _QUOTES and in_quote in (char, None):
if in_quote or after_op or char != '/':
in_quote = None if in_quote and not in_regex_char_group else char
- if in_quote is None and char == '/' and delim != '/':
- # regexp flags
- n_idx = idx + 1
- while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS:
- n_idx += 1
- skip_re = n_idx - idx - 1
- if skip_re > 0:
- continue
elif in_quote == '/' and char in '[]':
in_regex_char_group = char == '['
escaping = not escaping and in_quote and char == '\\'
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'):
- if (op == '&&') ^ _ternary(left_val):
+ if (op == '&&') ^ _js_ternary(left_val):
return left_val # short circuiting
elif op == '??':
- if left_val not in (None, self.undefined):
+ if left_val not in (None, JS_Undefined):
return left_val
elif op == '?':
- right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1))
+ right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
except Exception as e:
if allow_undefined:
- return self.undefined
+ return JS_Undefined
raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
if expr[0] in _QUOTES:
inner, outer = self._separate(expr, expr[0], 1)
if expr[0] == '/':
- flags, _ = self._regex_flags(outer)
- inner, outer = inner.replace('"', r'\"'), ''
- inner = re.compile(js_to_json(inner + expr[0]), flags=flags) # , strict=True))
+ flags, outer = self._regex_flags(outer)
+ inner = re.compile(inner[1:], flags=flags) # , strict=True))
else:
inner = json.loads(js_to_json(inner + expr[0])) # , strict=True))
if not outer:
if expr.startswith('{'):
inner, outer = self._separate_at_paren(expr, '}')
- # try for object expression
+ # try for object expression (Map)
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
return dict(
(?P<try>try|finally)\s*|
(?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
(?P<switch>switch)\s*\(|
- (?P<for>for)\s*\(|'''.format(**globals()), expr)
+ (?P<for>for)\s*\(|
+ '''.format(**globals()), expr)
md = m.groupdict() if m else {}
if md.get('try'):
if expr[m.end()] == '{':
start, cndn, increment = self._separate(constructor, ';')
self.interpret_expression(start, local_vars, allow_recursion)
while True:
- if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
+ if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
break
try:
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
local_vars[m.group('out')] = self._operator(
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
return local_vars[m.group('out')], should_return
- elif left_val in (None, self.undefined):
+ elif left_val in (None, JS_Undefined):
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
raise JS_Continue()
elif expr == 'undefined':
- return self.undefined, should_return
+ return JS_Undefined, should_return
elif md.get('return'):
return local_vars[m.group('name')], should_return
'Math': float,
}
obj = local_vars.get(variable)
- if obj in (self.undefined, None):
- obj = types.get(variable, self.undefined)
- if obj is self.undefined:
+ if obj in (JS_Undefined, None):
+ obj = types.get(variable, JS_Undefined)
+ if obj is JS_Undefined:
try:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
if not nullish:
raise
- if nullish and obj is self.undefined:
- return self.undefined
+ if nullish and obj is JS_Undefined:
+ return JS_Undefined
# Member access
if arg_str is None: