youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 9933857f679392a2e3b2a8d53f89b61ca8e13a00
parent 65939effb55087f584ecd5d4b304eadbdef875d1
Author: Sergey M․ <dstftw@gmail.com>
Date:   Sat, 11 Apr 2015 20:27:53 +0600

Merge branch 'fstirlitz-crooksandliars'

Diffstat:
Myoutube_dl/extractor/__init__.py | 1+
Ayoutube_dl/extractor/crooksandliars.py | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Myoutube_dl/extractor/generic.py | 32++++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py @@ -90,6 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, +) + + +class CrooksAndLiarsIE(InfoExtractor): + _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + 'duration': 236, + } + }, { + 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) + + manifest = self._parse_json( + self._search_regex( + r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), + video_id) + + quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) + + formats = [{ + 'url': item['url'], + 'format_id': item['type'], + 'quality': quality(item['type']), + } for item in manifest['flavors'] if item['mime'].startswith('video/')] + self._sort_formats(formats) + + return { + 'url': url, + 'id': video_id, + 'title': manifest['title'], + 'description': manifest.get('description'), + 'thumbnail': self._proto_relative_url(manifest.get('poster')), + 'timestamp': int_or_none(manifest.get('created')), + 'uploader': manifest.get('author'), + 'duration': int_or_none(manifest.get('duration')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py @@ -642,6 +642,32 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # Crooks and Liars embed + { + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }, + # Crooks and Liars external embed + { + 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/', + 'info_dict': { + 'id': 'MTE3MjUtMzQ2MzA', + 'ext': 'mp4', + 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5', + 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec', + 'timestamp': 1265032391, + 'upload_date': '20100201', + 'uploader': 'Heather', + }, + }, # NBC Sports vplayer embed { 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', @@ -1275,6 +1301,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for Crooks and Liars embeds + mobj = re.search( + r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for NBC Sports VPlayer embeds nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) if nbc_sports_url: