[Videa] Align with yt-dlp
authordirkf <fieldhouse@gmx.net>
Thu, 29 Feb 2024 14:12:37 +0000 (14:12 +0000)
committerdirkf <fieldhouse@gmx.net>
Fri, 8 Mar 2024 13:14:52 +0000 (13:14 +0000)
youtube_dl/extractor/videa.py

index 4589e78a1a8aadbc86e93674104828d786f31591..0689764a5da9a72d83fb4916006173a872d9050e 100644 (file)
@@ -6,22 +6,31 @@ import re
 import string
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_b64decode,
+    compat_ord,
+    compat_struct_pack,
+)
 from ..utils import (
     ExtractorError,
     int_or_none,
     mimetype2ext,
     parse_codecs,
+    parse_qs,
     update_url_query,
     urljoin,
     xpath_element,
     xpath_text,
 )
-from ..compat import (
-    compat_b64decode,
-    compat_ord,
-    compat_struct_pack,
-    compat_urlparse,
-)
+
+
+def compat_random_choices(population, *args, **kwargs):
+    # weights=None, *, cum_weights=None, k=1
+    # limited implementation needed here
+    weights = args[0] if args else kwargs.get('weights')
+    assert all(w is None for w in (weights, kwargs.get('cum_weights')))
+    k = kwargs.get('k', 1)
+    return ''.join(random.choice(population) for _ in range(k))
 
 
 class VideaIE(InfoExtractor):
@@ -35,6 +44,7 @@ class VideaIE(InfoExtractor):
                         )
                         (?P<id>[^?#&]+)
                     '''
+    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
     _TESTS = [{
         'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
         'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
@@ -80,11 +90,14 @@ class VideaIE(InfoExtractor):
     }]
     _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return [url for _, url in re.findall(
-            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
-            webpage)]
+    @classmethod
+    def _extract_urls(cls, webpage):
+        def yield_urls():
+            for pattern in cls._EMBED_REGEX:
+                for m in re.finditer(pattern, webpage):
+                    yield m.group('url')
+
+        return list(yield_urls())
 
     @staticmethod
     def rc4(cipher_text, key):
@@ -130,8 +143,8 @@ class VideaIE(InfoExtractor):
         for i in range(0, 32):
             result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
 
-        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
-        random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
+        query = parse_qs(player_url)
+        random_seed = ''.join(compat_random_choices(string.ascii_letters + string.digits, k=8))
         query['_s'] = random_seed
         query['_t'] = result[:16]