From 6d6536acb27483087a8a4ff73f501eaca313ec6a Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 4 Mar 2016 10:25:16 +0100 Subject: [PATCH] [fivemin] improve extraction - skip m3u8 formats(404 error) - skip unavailable test - download embed page only when it's needed - update _VALID_URL regex(joystiq.com redirect to engadget.com) --- youtube_dl/extractor/fivemin.py | 51 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 2955965d9..67d50a386 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_urllib_parse, @@ -16,12 +18,7 @@ from ..utils import ( class FiveMinIE(InfoExtractor): IE_NAME = '5min' - _VALID_URL = r'''(?x) - (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| - https?://(?:(?:massively|www)\.)?joystiq\.com/video/| - 5min:) - (?P\d+) - ''' + _VALID_URL = r'(?:5min:(?P\d+)(?::(?P\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P.*))' _TESTS = [ { @@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor): 'title': 'How to Make a Next-Level Fruit Salad', 'duration': 184, }, + 'skip': 'no longer available', }, ] _ERRORS = { @@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + sid = mobj.group('sid') + + if mobj.group('query'): + qs = compat_parse_qs(mobj.group('query')) + if not qs.get('playList'): + raise ExtractorError('Invalid URL', expected=True) + video_id = qs['playList'][0] + if qs.get('sid'): + sid = qs['sid'][0] + embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id - embed_page = self._download_webpage(embed_url, video_id, - 'Downloading embed page') - sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') - query = compat_urllib_parse.urlencode({ - 'func': 'GetResults', - 'playlist': video_id, - 'sid': sid, - 'isPlayerSeed': 'true', - 'url': embed_url, - }) + if not sid: + embed_page = self._download_webpage(embed_url, video_id, + 'Downloading embed page') + sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') + response = self._download_json( - 'https://syn.5min.com/handlers/SenseHandler.ashx?' + query, + 'https://syn.5min.com/handlers/SenseHandler.ashx?' + + compat_urllib_parse.urlencode({ + 'func': 'GetResults', + 'playlist': video_id, + 'sid': sid, + 'isPlayerSeed': 'true', + 'url': embed_url, + }), video_id) if not response['success']: raise ExtractorError( @@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor): parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) for rendition in info['Renditions']: - if rendition['RenditionType'] == 'm3u8': - formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls')) - elif rendition['RenditionType'] == 'aac': + if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8': continue else: rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) -- 2.22.2