From 0de168f7ed2da440f6a1bcb614abd26ff73bb840 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 03:29:07 +0700 Subject: [PATCH] [extractor/generic] Detect schema.org/VideoObject embeds --- youtube_dl/extractor/generic.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 31527d1c6..62da9bbc0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1313,6 +1313,23 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # TODO: find another test + # http://schema.org/VideoObject + # 'url': 'https://flipagram.com/f/nyvTSJMKId', + # 'md5': '888dcf08b7ea671381f00fab74692755', + # 'info_dict': { + # 'id': 'nyvTSJMKId', + # 'ext': 'mp4', + # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction', + # 'description': '#love for cats.', + # 'timestamp': 1461244995, + # 'upload_date': '20160421', + # }, + # 'params': { + # 'force_generic_extractor': True, + # }, + } ] def report_following_redirect(self, new_url): @@ -2157,6 +2174,19 @@ class GenericIE(InfoExtractor): if embed_url: return self.url_result(embed_url) + # Looking for http://schema.org/VideoObject + json_ld = self._search_json_ld( + webpage, video_id, default=None, expected_type='VideoObject') + if json_ld and json_ld.get('url'): + info_dict.update({ + 'title': video_title or info_dict['title'], + 'description': video_description, + 'thumbnail': video_thumbnail, + 'age_limit': age_limit + }) + info_dict.update(json_ld) + return info_dict + def check_video(vurl): if YoutubeIE.suitable(vurl): return True -- 2.22.2