From 0646e34c7d511a02d8d93e840bceaa3521c2204e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Jun 2017 22:25:34 +0700 Subject: [PATCH] [facebook] Add support for plugin video embeds and multiple embeds (closes #13493) --- youtube_dl/extractor/buzzfeed.py | 7 ++++--- youtube_dl/extractor/facebook.py | 20 ++++++++++---------- youtube_dl/extractor/generic.py | 6 +++--- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index 75fa92d7c..ec411091e 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -84,9 +84,10 @@ class BuzzFeedIE(InfoExtractor): continue entries.append(self.url_result(video['url'])) - facebook_url = FacebookIE._extract_url(webpage) - if facebook_url: - entries.append(self.url_result(facebook_url)) + facebook_urls = FacebookIE._extract_urls(webpage) + entries.extend([ + self.url_result(facebook_url) + for facebook_url in facebook_urls]) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b69c1ede0..4b3f6cc86 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -203,19 +203,19 @@ class FacebookIE(InfoExtractor): }] @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) - if mobj is not None: - return mobj.group('url') - + def _extract_urls(webpage): + urls = [] + for mobj in re.finditer( + r']+?src=(["\'])(?Phttps?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1', + webpage): + urls.append(mobj.group('url')) # Facebook API embed # see https://developers.facebook.com/docs/plugins/embedded-video-player - mobj = re.search(r'''(?x)]+ + for mobj in re.finditer(r'''(?x)]+ class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ - data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) - if mobj is not None: - return mobj.group('url') + data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage): + urls.append(mobj.group('url')) + return urls def _login(self): (useremail, password) = self._get_login_info() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef1a2980..760a7f9c2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2222,9 +2222,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for embedded Facebook player - facebook_url = FacebookIE._extract_url(webpage) - if facebook_url is not None: - return self.url_result(facebook_url, 'Facebook') + facebook_urls = FacebookIE._extract_urls(webpage) + if facebook_urls: + return self.playlist_from_matches(facebook_urls, video_id, video_title) # Look for embedded VK player mobj = re.search(r']+?src=(["\'])(?Phttps?://vk\.com/video_ext\.php.+?)\1', webpage) -- 2.22.2