+from __future__ import unicode_literals
+
from .common import FileDownloader
from .hls import HlsFD
from .http import HttpFD
def get_suitable_downloader(info_dict):
"""Get the downloader class that can handle the info dict."""
url = info_dict['url']
+ protocol = info_dict.get('protocol')
if url.startswith('rtmp'):
return RtmpFD
- if determine_ext(url) == u'm3u8':
+ if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
return HlsFD
if url.startswith('mms') or url.startswith('rtsp'):
return MplayerFD
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
+from .huffpost import HuffPostIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import (
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
- "http", "https", "rtsp", "rtmp" or so.
+ "http", "https", "rtsp", "rtmp", "m3u8" or so.
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field.
# Look for embedded Facebook player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Facebook')
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class HuffPostIE(InfoExtractor):
+ IE_DESC = 'Huffington Post'
+ _VALID_URL = r'''(?x)
+ https?://(embed\.)?live\.huffingtonpost\.com/
+ (?:
+ r/segment/[^/]+/|
+ HPLEmbedPlayer/\?segmentId=
+ )
+ (?P<id>[0-9a-f]+)'''
+
+ _TEST = {
+ 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
+ 'file': '52dd3e4b02a7602131000677.mp4',
+ 'md5': 'TODO',
+ 'info_dict': {
+ 'title': 'TODO',
+ 'description': 'TODO',
+ 'duration': 1549,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
+ data = self._download_json(api_url, video_id)['data']
+
+ video_title = data['title']
+ duration = parse_duration(data['running_time'])
+ upload_date = unified_strdate(data['schedule']['started_at'])
+
+ thumbnails = []
+ for url in data['images'].values():
+ m = re.match('.*-([0-9]+x[0-9]+)\.', url)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': url,
+ 'resolution': m.group(1),
+ })
+
+ formats = [{
+ 'format': key,
+ 'format_id': key.replace('/', '.'),
+ 'ext': 'mp4',
+ 'url': url,
+ 'vcodec': 'none' if key.startswith('audio/') else None,
+ } for key, url in data['sources']['live'].items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'formats': formats,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ }