youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit e5d39886ec8e4e40b2b7257d16cc5d8505cc1f69
parent 751c89a27d68c54375e96789cc90d4c8a3ce3dbc
Author: Sergey M․ <dstftw@gmail.com>
Date:   Mon, 17 Apr 2017 00:23:16 +0700

[limelight] Improve embeds extraction (closes #12761)
* Move extraction code to extractor
* Add extraction for LimelightEmbeddedPlayerFlash embeds
* Extract multiple video

Diffstat:
Myoutube_dl/extractor/generic.py | 6++++++
Myoutube_dl/extractor/limelight.py | 37+++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -85,6 +85,7 @@ from .ustream import UstreamIE from .openload import OpenloadIE from .videopress import VideoPressIE from .rutube import RutubeIE +from .limelight import LimelightBaseIE class GenericIE(InfoExtractor): @@ -2483,6 +2484,11 @@ class GenericIE(InfoExtractor): return self.url_result(piksel_url, PikselIE.ie_key()) # Look for Limelight embeds + limelight_urls = LimelightBaseIE._extract_urls(webpage, url) + if limelight_urls: + return self.playlist_result( + limelight_urls, video_id, video_title, video_description) + mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) if mobj: lm = { diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + smuggle_url, unsmuggle_url, ExtractorError, ) @@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor): _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s' _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json' + @classmethod + def _extract_urls(cls, webpage, source_url): + lm = { + 'Media': 'media', + 'Channel': 'channel', + 'ChannelList': 'channel_list', + } + entries = [] + for kind, video_id in re.findall( + r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', + webpage): + print('video_id', video_id) + entries.append(cls.url_result( + smuggle_url( + 'limelight:%s:%s' % (lm[kind], video_id), + {'source_url': source_url}), + 'Limelight%s' % kind, video_id)) + for mobj in re.finditer( + # As per [1] class attribute should be exactly equal to + # LimelightEmbeddedPlayerFlash but numerous examples seen + # that don't exactly match it (e.g. [2]). + # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage + # 2. http://www.sedona.com/FacilitatorTraining2017 + r'''(?sx) + <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*? + <param[^>]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32}) + ''', webpage): + entries.append(cls.url_result( + smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': source_url}), + 'LimelightMedia', mobj.group('id'))) + return entries + def _call_playlist_service(self, item_id, method, fatal=True, referer=None): headers = {} if referer: