youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9271bc835546a8bd11c645018e9daabd54522855
parent 968ed2a7779fc7337fdcaa12da2b12e288e7eeb6
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu,  3 Apr 2014 16:21:21 +0200

[cnet] Add new extractor (Fixes #2679)

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Ayoutube_dl/extractor/cnet.py | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Myoutube_dl/utils.py | 8++++----
3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -40,6 +40,7 @@ from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE from .cmt import CMTIE +from .cnet import CNETIE from .cnn import ( CNNIE, CNNBlogsIE, diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class CNETIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' + _TEST = { + 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', + 'md5': '041233212a0d06b179c87cbcca1577b8', + 'info_dict': { + 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', + 'ext': 'mp4', + 'title': 'Hands-on with Microsoft Windows 8.1 Update', + 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', + 'thumbnail': 're:^http://.*/flmswindows8.jpg$', + 'uploader_id': 'sarah.mitroff@cbsinteractive.com', + 'uploader': 'Sarah Mitroff', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + data_json = self._html_search_regex( + r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'", + webpage, 'data json') + data = json.loads(data_json) + vdata = data['video'] + + video_id = vdata['id'] + title = vdata['headline'] + description = vdata.get('dek') + thumbnail = vdata.get('image', {}).get('path') + author = vdata.get('author') + if author: + uploader = '%s %s' % (author['firstName'], author['lastName']) + uploader_id = author.get('email') + else: + uploader = None + uploader_id = None + + formats = [{ + 'format_id': '%s-%s-%s' % ( + f['type'], f['format'], + int_or_none(f.get('bitrate'), 1000, default='')), + 'url': f['uri'], + 'tbr': int_or_none(f.get('bitrate'), 1000), + } for f in vdata['files']['data']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1176,12 +1176,12 @@ class HEADRequest(compat_urllib_request.Request): return "HEAD" -def int_or_none(v, scale=1): - return v if v is None else (int(v) // scale) +def int_or_none(v, scale=1, default=None): + return default if v is None else (int(v) // scale) -def float_or_none(v, scale=1): - return v if v is None else (float(v) / scale) +def float_or_none(v, scale=1, default=None): + return default if v is None else (float(v) / scale) def parse_duration(s):