youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit b0f001a6cbd220c8b10c0ce359f17072d6347a8f
parent dd67702a3ea007369109ee8e4b67043064e1f759
Author: remitamine <remitamine@gmail.com>
Date:   Mon, 21 Sep 2015 15:52:36 +0100

[canalc2] fix info extraction

Diffstat:
Myoutube_dl/extractor/canalc2.py | 30++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py @@ -8,34 +8,40 @@ from .common import InfoExtractor class Canalc2IE(InfoExtractor): IE_NAME = 'canalc2.tv' - _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)' + _VALID_URL = r'https?://(www\.)?canalc2\.tv/video/(?P<id>\d+)' _TEST = { - 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', + 'url': 'http://www.canalc2.tv/video/12163', 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { 'id': '12163', 'ext': 'mp4', 'title': 'Terrasses du Numérique' + }, + 'params': { + 'skip_download': True, # Requires rtmpdump } } def _real_extract(self, url): - video_id = re.match(self._VALID_URL, url).group('id') - # We need to set the voir field for getting the file name - url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - file_name = self._search_regex( - r"so\.addVariable\('file','(.*?)'\);", - webpage, 'file name') - video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name + video_url = self._search_regex( + r'jwplayer\("Player"\).setup\({[^}]*file: "([^"]+)"', + webpage, 'video_url') + formats = [{'url': video_url}] + if video_url.startswith('rtmp://'): + rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<play_path>mp4:.+)$', video_url) + formats[0].update({ + 'app': rtmp.group('app'), + 'play_path': rtmp.group('play_path'), + }) title = self._html_search_regex( - r'class="evenement8">(.*?)</a>', webpage, 'title') + r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.*?)</h3>', webpage, 'title') return { 'id': video_id, - 'ext': 'mp4', - 'url': video_url, + 'formats': formats, 'title': title, }