youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9834872bf63b4e03b66c5e3b8f306556e735d8c5
parent 94a23d2a1ed94af8bb80898194f03c38a5dcdb1d
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue, 21 Jan 2014 18:10:14 +0100

[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

Diffstat:
Myoutube_dl/extractor/facebook.py | 9+++++++--
Myoutube_dl/extractor/generic.py | 6++++++
2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py @@ -17,7 +17,12 @@ from ..utils import ( class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:[^#?]*#!/)?(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' + _VALID_URL = r'''(?x) + (?:https?://)?(?:\w+\.)?facebook\.com/ + (?:[^#?]*\#!/)? + (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) + (?:v|video_id)=(?P<id>[0-9]+) + (?:.*)''' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' _NETRC_MACHINE = 'facebook' @@ -90,7 +95,7 @@ class FacebookIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) - video_id = mobj.group('ID') + video_id = mobj.group('id') url = 'https://www.facebook.com/video/video.php?v=%s' % video_id webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -319,6 +319,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Novamov') + # Look for embedded Facebook player + mobj = re.search( + r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Facebook') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: