[facebook] Improve Facebook embedded detection
authorYen Chi Hsuan <yan12125@gmail.com>
Sat, 2 Jul 2016 13:33:23 +0000 (21:33 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Sat, 2 Jul 2016 13:58:07 +0000 (21:58 +0800)
Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

youtube_dl/extractor/facebook.py
youtube_dl/extractor/generic.py

index 9b87b37ae54da724c360e85429de804f29413bc6..6eaa22d894f4d8c2e8415cef17006f590cba7644 100644 (file)
@@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
+        if mobj is not None:
+            return mobj.group('url')
+
+        # Facebook API embed
+        # see https://developers.facebook.com/docs/plugins/embedded-video-player
+        mobj = re.search(r'''(?x)<div[^>]+
+                class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+
+                data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage)
+        if mobj is not None:
+            return mobj.group('url')
+
     def _login(self):
         (useremail, password) = self._get_login_info()
         if useremail is None:
index 9315b9e21ff11ccaf6717638502ba0b303e966ac..7212e0edd04a00138c6f09c78b0720cad6168b05 100644 (file)
@@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE
 from .vessel import VesselIE
 from .kaltura import KalturaIE
 from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
 
 
 class GenericIE(InfoExtractor):
@@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):
                 'uploader': 'TheAtlantic',
             },
             'add_ie': ['BrightcoveLegacy'],
+        },
+        # Facebook <iframe> embed
+        {
+            'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+            'info_dict': {
+                'id': '599637780109885',
+                'ext': 'mp4',
+                'title': 'Facebook video #599637780109885',
+            },
+        },
+        # Facebook API embed
+        {
+            'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+            'info_dict': {
+                'id': '10153467542406923',
+                'ext': 'mp4',
+                'title': 'Facebook video #10153467542406923',
+            },
         }
     ]
 
@@ -1759,10 +1778,9 @@ class GenericIE(InfoExtractor):
             return self.url_result(mobj.group('url'))
 
         # Look for embedded Facebook player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'Facebook')
+        facebook_url = FacebookIE._extract_url(webpage)
+        if facebook_url is not None:
+            return self.url_result(facebook_url, 'Facebook')
 
         # Look for embedded VK player
         mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)