[extractor/common] keep support for non standard JSON-LD VideoObject author values
authorRemita Amine <remitamine@gmail.com>
Sun, 4 Apr 2021 18:16:17 +0000 (19:16 +0100)
committerRemita Amine <remitamine@gmail.com>
Sun, 4 Apr 2021 18:16:17 +0000 (19:16 +0100)
youtube_dl/extractor/common.py

index 8ef22779abdfdee0a2027e11f0041b4489d9955c..78ff5b6d08cea63c2772025d8ea5020340545c4b 100644 (file)
@@ -70,7 +70,6 @@ from ..utils import (
     str_or_none,
     str_to_int,
     strip_or_none,
-    try_get,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
@@ -1276,6 +1275,7 @@ class InfoExtractor(object):
 
         def extract_video_object(e):
             assert e['@type'] == 'VideoObject'
+            author = e.get('author')
             info.update({
                 'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
@@ -1283,7 +1283,11 @@ class InfoExtractor(object):
                 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': try_get(e, lambda x: x['author']['name'], compat_str),
+                # author can be an instance of 'Organization' or 'Person' types.
+                # both types can have 'name' property(inherited from 'Thing' type). [1]
+                # however some websites are using 'Text' type instead.
+                # 1. https://schema.org/VideoObject
+                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
                 'filesize': float_or_none(e.get('contentSize')),
                 'tbr': int_or_none(e.get('bitrate')),
                 'width': int_or_none(e.get('width')),