youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit e186a9ec0394400a6996f98d197d0d14937a60c9
parent 69677f3ee216b99ada7ac97ddd4d506245541b27
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 31 Dec 2016 22:04:29 +0700

[videa] Add support for videa embeds

Diffstat:
Myoutube_dl/extractor/generic.py | 15+++++++++++++++
Myoutube_dl/extractor/videa.py | 8++++++++
2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -76,6 +76,7 @@ from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE from .dbtv import DBTVIE from .piksel import PikselIE +from .videa import VideaIE class GenericIE(InfoExtractor): @@ -1422,6 +1423,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Videa embeds + 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html', + 'info_dict': { + 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style', + 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum', + }, + 'playlist_mincount': 2, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2358,6 +2368,11 @@ class GenericIE(InfoExtractor): if dbtv_urls: return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Look for Videa embeds + videa_urls = VideaIE._extract_urls(webpage) + if videa_urls: + return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -43,6 +45,12 @@ class VideaIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url)