[jsinterp] Improve try/catch/finally support
authordirkf <fieldhouse@gmx.net>
Fri, 26 Aug 2022 07:17:54 +0000 (08:17 +0100)
committerdirkf <fieldhouse@gmx.net>
Fri, 26 Aug 2022 07:51:17 +0000 (08:51 +0100)
test/test_jsinterp.py
youtube_dl/jsinterp.py

index 0a97bdbc411fb61b8fcf4f9858357c7d9f14adba..fb4882d0039bc879bf89cdcc7d65edbd383ff7e3 100644 (file)
@@ -74,6 +74,9 @@ class TestJSInterpreter(unittest.TestCase):
         jsi = JSInterpreter('function f(){return 0 ?? 42;}')
         self.assertEqual(jsi.call_function('f'), 0)
 
+        jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
+        self.assertFalse(jsi.call_function('f'))
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -198,7 +201,6 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
 
-    @unittest.expectedFailure
     def test_finally(self):
         jsi = JSInterpreter('''
         function x() { try{throw 10} finally {return 42} }
@@ -212,7 +214,7 @@ class TestJSInterpreter(unittest.TestCase):
     def test_nested_try(self):
         jsi = JSInterpreter('''
         function x() {try {
-            try{throw 10} finally {throw 42} 
+            try{throw 10} finally {throw 42}
             } catch(e){return 5} }
         ''')
         self.assertEqual(jsi.call_function('x'), 5)
@@ -229,6 +231,14 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x'), 0)
 
+    def test_for_loop_try(self):
+        jsi = JSInterpreter('''
+        function x() {
+            for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
+            return 42 }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 42)
+
     def test_literal_list(self):
         jsi = JSInterpreter('''
         function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
index a8456ec1c784c3e7d446d5a607c8255696f4b99c..08726e4785b575ebc07191afe36a39af01fbe87b 100644 (file)
@@ -5,7 +5,6 @@ import json
 import math
 import operator
 import re
-from collections import Counter
 
 from .utils import (
     error_to_compat_str,
@@ -15,6 +14,7 @@ from .utils import (
     unified_timestamp,
 )
 from .compat import (
+    compat_basestring,
     compat_collections_chain_map as ChainMap,
     compat_itertools_zip_longest as zip_longest,
     compat_str,
@@ -76,6 +76,10 @@ def _js_comp_op(op):
     def wrapped(a, b):
         if JS_Undefined in (a, b):
             return False
+        if isinstance(a, compat_basestring):
+            b = compat_str(b or 0)
+        elif isinstance(b, compat_basestring):
+            a = compat_str(a or 0)
         return op(a or 0, b or 0)
 
     return wrapped
@@ -195,7 +199,6 @@ class JSInterpreter(object):
         'y': 4096,  # Perform a "sticky" search that matches starting at the current position in the target string
     }
 
-    _EXC_NAME = '__youtube_dl_exception__'
     _OBJ_NAME = '__youtube_dl_jsinterp_obj'
 
     OP_CHARS = None
@@ -242,9 +245,8 @@ class JSInterpreter(object):
     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
         if not expr:
             return
-        # collections.Counter() is ~10% slower
+        # collections.Counter() is ~10% slower in both 2.7 and 3.9
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
-        # counters = Counter()
         start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
         in_quote, escaping, skipping = None, False, 0
         after_op, in_regex_char_group, skip_re = True, False, 0
@@ -291,7 +293,9 @@ class JSInterpreter(object):
         yield expr[start:]
 
     @classmethod
-    def _separate_at_paren(cls, expr, delim):
+    def _separate_at_paren(cls, expr, delim=None):
+        if delim is None:
+            delim = expr and _MATCHING_PARENS[expr[0]]
         separated = list(cls._separate(expr, delim, 1))
 
         if len(separated) < 2:
@@ -376,7 +380,7 @@ class JSInterpreter(object):
         if expr.startswith('new '):
             obj = expr[4:]
             if obj.startswith('Date('):
-                left, right = self._separate_at_paren(obj[4:], ')')
+                left, right = self._separate_at_paren(obj[4:])
                 expr = unified_timestamp(
                     self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
@@ -390,7 +394,7 @@ class JSInterpreter(object):
             return None, should_return
 
         if expr.startswith('{'):
-            inner, outer = self._separate_at_paren(expr, '}')
+            inner, outer = self._separate_at_paren(expr)
             # try for object expression (Map)
             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
             if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
@@ -406,7 +410,7 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('('):
-            inner, outer = self._separate_at_paren(expr, ')')
+            inner, outer = self._separate_at_paren(expr)
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
             if not outer or should_abort:
                 return inner, should_abort or should_return
@@ -414,57 +418,63 @@ class JSInterpreter(object):
                 expr = self._dump(inner, local_vars) + outer
 
         if expr.startswith('['):
-            inner, outer = self._separate_at_paren(expr, ']')
+            inner, outer = self._separate_at_paren(expr)
             name = self._named_object(local_vars, [
                 self.interpret_expression(item, local_vars, allow_recursion)
                 for item in self._separate(inner)])
             expr = name + outer
 
         m = re.match(r'''(?x)
-            (?P<try>try|finally)\s*|
-            (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
-            (?P<switch>switch)\s*\(|
-            (?P<for>for)\s*\(|
-            '''.format(**globals()), expr)
+                (?P<try>try)\s*\{|
+                (?P<switch>switch)\s*\(|
+                (?P<for>for)\s*\(
+                ''', expr)
         md = m.groupdict() if m else {}
         if md.get('try'):
-            if expr[m.end()] == '{':
-                try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            else:
-                try_expr, expr = expr[m.end() - 1:], ''
+            try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+            err = None
             try:
                 ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
-            except JS_Throw as e:
-                local_vars[self._EXC_NAME] = e.error
             except Exception as e:
                 # XXX: This works for now, but makes debugging future issues very hard
-                local_vars[self._EXC_NAME] = e
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
-
-        elif md.get('catch'):
-
-            catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
-            if self._EXC_NAME in local_vars:
-                catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
-                ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+                err = e
+
+            pending = (None, False)
+            m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                if err:
+                    catch_vars = {}
+                    if m.group('err'):
+                        catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+                    catch_vars = local_vars.new_child(m=catch_vars)
+                    err = None
+                    pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+
+            m = re.match(r'finally\s*\{', expr)
+            if m:
+                sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+                ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
                 if should_abort:
                     return ret, True
 
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+            ret, should_abort = pending
+            if should_abort:
+                return ret, True
 
-            return ret, should_abort or should_return
+            if err:
+                raise err
 
         elif md.get('for'):
-            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
             if remaining.startswith('{'):
-                body, expr = self._separate_at_paren(remaining, '}')
+                body, expr = self._separate_at_paren(remaining)
             else:
                 switch_m = re.match(r'switch\s*\(', remaining)  # FIXME
                 if switch_m:
-                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
+                    switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
                     body, expr = self._separate_at_paren(remaining, '}')
                     body = 'switch(%s){%s}' % (switch_val, body)
                 else:
@@ -483,11 +493,9 @@ class JSInterpreter(object):
                 except JS_Continue:
                     pass
                 self.interpret_expression(increment, local_vars, allow_recursion)
-            ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
-            return ret, should_abort or should_return
 
         elif md.get('switch'):
-            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+            switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
             switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
             body, expr = self._separate_at_paren(remaining, '}')
             items = body.replace('default:', 'case default:').split('case ')[1:]
@@ -510,6 +518,8 @@ class JSInterpreter(object):
                         break
                 if matched:
                     break
+
+        if md:
             ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
             return ret, should_abort or should_return
 
@@ -618,7 +628,7 @@ class JSInterpreter(object):
                 member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
             arg_str = expr[m.end():]
             if arg_str.startswith('('):
-                arg_str, remaining = self._separate_at_paren(arg_str, ')')
+                arg_str, remaining = self._separate_at_paren(arg_str)
             else:
                 arg_str, remaining = None, arg_str
 
@@ -795,7 +805,7 @@ class JSInterpreter(object):
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
             self.code)
-        code, _ = self._separate_at_paren(func_m.group('code'), '}')  # refine the match
+        code, _ = self._separate_at_paren(func_m.group('code'))  # refine the match
         if func_m is None:
             raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
         return self.build_arglist(func_m.group('args')), code
@@ -810,7 +820,7 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            body, remaining = self._separate_at_paren(code[body_start - 1:])
             name = self._named_object(
                 local_vars,
                 self.extract_function_from_code(