[aparat] Fix extraction
authorRemita Amine <remitamine@gmail.com>
Mon, 28 Dec 2020 17:19:30 +0000 (18:19 +0100)
committerRemita Amine <remitamine@gmail.com>
Mon, 28 Dec 2020 17:19:30 +0000 (18:19 +0100)
closes #22285
closes #22611
closes #23348
closes #24354
closes #24591
closes #24904
closes #25418
closes #26070
closes #26350
closes #26738
closes #27563

youtube_dl/extractor/aparat.py

index 883dcee7aa4cae953fff16dbca8cbc5fbf07e64e..a9527e78550297e416baa9afa11ee01c767daf3c 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
+    get_element_by_id,
     int_or_none,
     merge_dicts,
     mimetype2ext,
@@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id, fatal=False)
 
         if not webpage:
-            # Note: There is an easier-to-parse configuration at
-            # http://www.aparat.com/video/video/config/videohash/%video_id
-            # but the URL in there does not work
             webpage = self._download_webpage(
                 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
                 video_id)
 
-        options = self._parse_json(
-            self._search_regex(
-                r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
-                webpage, 'options', group='value'),
-            video_id)
-
-        player = options['plugins']['sabaPlayerPlugin']
+        options = self._parse_json(self._search_regex(
+            r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
 
         formats = []
-        for sources in player['multiSRC']:
+        for sources in (options.get('multiSRC') or []):
             for item in sources:
                 if not isinstance(item, dict):
                     continue
@@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
         info = self._search_json_ld(webpage, video_id, default={})
 
         if not info.get('title'):
-            info['title'] = player['title']
+            info['title'] = get_element_by_id('videoTitle', webpage) or \
+                self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
 
         return merge_dicts(info, {
             'id': video_id,
             'thumbnail': url_or_none(options.get('poster')),
-            'duration': int_or_none(player.get('duration')),
+            'duration': int_or_none(options.get('duration')),
             'formats': formats,
         })