youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 4e415288d73f3ec15a0b2854de79c7359d1ae6fe
parent fada438acf7220fbff6450800833585d0b0a1843
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Fri, 11 Jul 2014 13:21:32 +0200

[criterion] Simplify and modernize

Diffstat:
Myoutube_dl/extractor/criterion.py | 51+++++++++++++++++++++++++++------------------------
1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py @@ -1,40 +1,43 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext + class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />', - webpage, 'video title') - description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') + final_url = self._search_regex( + r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'<meta name="description" content="(.+?)" />', + webpage, 'video description') + thumbnail = self._search_regex( + r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': determine_ext(final_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + }