youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 3798eadccdb2d681ee071403c5185ca014fb842e
parent 2537186d43f631ef81536801d7124db1a8e56ad9
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue,  7 Jan 2014 10:04:48 +0100

More unicode literals

Diffstat:
Myoutube_dl/extractor/academicearth.py | 3++-
Myoutube_dl/extractor/appletrailers.py | 62++++++++++++++++++++++++++++++++------------------------------
Myoutube_dl/extractor/archiveorg.py | 20+++++++++++---------
Myoutube_dl/extractor/arte.py | 46++++++++++++++++++++++++----------------------
Myoutube_dl/extractor/auengine.py | 17++++++++++-------
Myoutube_dl/extractor/bambuser.py | 30++++++++++++++++--------------
Myoutube_dl/extractor/bandcamp.py | 60+++++++++++++++++++++++++++++++-----------------------------
Myoutube_dl/extractor/cnn.py | 20+++++++++++---------
8 files changed, 137 insertions(+), 121 deletions(-)

diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py @@ -1,3 +1,4 @@ +from __future__ import unicode_literals import re from .common import InfoExtractor @@ -5,7 +6,7 @@ from .common import InfoExtractor class AcademicEarthCourseIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)' - IE_NAME = u'AcademicEarth:Course' + IE_NAME = 'AcademicEarth:Course' def _real_extract(self, url): m = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -11,46 +13,46 @@ from ..utils import ( class AppleTrailersIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _TEST = { - u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", - u"playlist": [ + "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + "playlist": [ { - u"file": u"manofsteel-trailer4.mov", - u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", - u"info_dict": { - u"duration": 111, - u"title": u"Trailer 4", - u"upload_date": u"20130523", - u"uploader_id": u"wb", + "file": "manofsteel-trailer4.mov", + "md5": "d97a8e575432dbcb81b7c3acb741f8a8", + "info_dict": { + "duration": 111, + "title": "Trailer 4", + "upload_date": "20130523", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer3.mov", - u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", - u"info_dict": { - u"duration": 182, - u"title": u"Trailer 3", - u"upload_date": u"20130417", - u"uploader_id": u"wb", + "file": "manofsteel-trailer3.mov", + "md5": "b8017b7131b721fb4e8d6f49e1df908c", + "info_dict": { + "duration": 182, + "title": "Trailer 3", + "upload_date": "20130417", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer.mov", - u"md5": u"d0f1e1150989b9924679b441f3404d48", - u"info_dict": { - u"duration": 148, - u"title": u"Trailer", - u"upload_date": u"20121212", - u"uploader_id": u"wb", + "file": "manofsteel-trailer.mov", + "md5": "d0f1e1150989b9924679b441f3404d48", + "info_dict": { + "duration": 148, + "title": "Trailer", + "upload_date": "20121212", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-teaser.mov", - u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", - u"info_dict": { - u"duration": 93, - u"title": u"Teaser", - u"upload_date": u"20120721", - u"uploader_id": u"wb", + "file": "manofsteel-teaser.mov", + "md5": "5fe08795b943eb2e757fa95cb6def1cb", + "info_dict": { + "duration": 93, + "title": "Teaser", + "upload_date": "20120721", + "uploader_id": "wb", }, } ] diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -13,14 +15,14 @@ class ArchiveOrgIE(InfoExtractor): IE_DESC = 'archive.org videos' _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' _TEST = { - u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", - u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', - u'md5': u'8af1d4cf447933ed3c7f4871162602db', - u'info_dict': { - u"title": u"1968 Demo - FJCC Conference Presentation Reel #1", - u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", - u"upload_date": u"19681210", - u"uploader": u"SRI International" + "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", + 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', + 'md5': '8af1d4cf447933ed3c7f4871162602db', + 'info_dict': { + "title": "1968 Demo - FJCC Conference Presentation Reel #1", + "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", + "upload_date": "19681210", + "uploader": "SRI International" } } @@ -29,7 +31,7 @@ class ArchiveOrgIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - json_url = url + (u'?' if u'?' in url else '&') + u'output=json' + json_url = url + ('?' if '?' in url else '&') + 'output=json' json_data = self._download_webpage(json_url, video_id) data = json.loads(json_data) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import re import json @@ -22,7 +24,7 @@ class ArteTvIE(InfoExtractor): _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVE_URL = r'index-[0-9]+\.html$' - IE_NAME = u'arte.tv' + IE_NAME = 'arte.tv' @classmethod def suitable(cls, url): @@ -37,7 +39,7 @@ class ArteTvIE(InfoExtractor): # r'src="(.*?/videothek_js.*?\.js)', # 0, # [ - # (1, 'url', u'Invalid URL: %s' % url) + # (1, 'url', 'Invalid URL: %s' % url) # ] # ) # http_host = url.split('/')[2] @@ -49,12 +51,12 @@ class ArteTvIE(InfoExtractor): # '(rtmp://.*?)\'', # re.DOTALL, # [ - # (1, 'path', u'could not extract video path: %s' % url), - # (2, 'player', u'could not extract video player: %s' % url), - # (3, 'url', u'could not extract video url: %s' % url) + # (1, 'path', 'could not extract video path: %s' % url), + # (2, 'player', 'could not extract video player: %s' % url), + # (3, 'url', 'could not extract video url: %s' % url) # ] # ) - # video_url = u'%s/%s' % (info.get('url'), info.get('path')) + # video_url = '%s/%s' % (info.get('url'), info.get('path')) def _real_extract(self, url): mobj = re.match(self._VIDEOS_URL, url) @@ -107,9 +109,9 @@ class ArteTvIE(InfoExtractor): def _extract_liveweb(self, url, name, lang): """Extract form http://liveweb.arte.tv/""" webpage = self._download_webpage(url, name) - video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') + video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id') config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, - video_id, u'Downloading information') + video_id, 'Downloading information') event_doc = config_doc.find('event') url_node = event_doc.find('video').find('urlHd') if url_node is None: @@ -124,7 +126,7 @@ class ArteTvIE(InfoExtractor): class ArteTVPlus7IE(InfoExtractor): - IE_NAME = u'arte.tv:+7' + IE_NAME = 'arte.tv:+7' _VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' @classmethod @@ -207,7 +209,7 @@ class ArteTVPlus7IE(InfoExtractor): if bitrate is not None: quality += '-%d' % bitrate if format_info.get('versionCode') is not None: - format_id = u'%s-%s' % (quality, format_info['versionCode']) + format_id = '%s-%s' % (quality, format_info['versionCode']) else: format_id = quality info = { @@ -216,7 +218,7 @@ class ArteTVPlus7IE(InfoExtractor): 'width': format_info.get('width'), 'height': height, } - if format_info['mediaType'] == u'rtmp': + if format_info['mediaType'] == 'rtmp': info['url'] = format_info['streamer'] info['play_path'] = 'mp4:' + format_info['url'] info['ext'] = 'flv' @@ -231,27 +233,27 @@ class ArteTVPlus7IE(InfoExtractor): # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:creative' + IE_NAME = 'arte.tv:creative' _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)' _TEST = { - u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', - u'file': u'050489-002.mp4', - u'info_dict': { - u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', + 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + 'file': '050489-002.mp4', + 'info_dict': { + 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design', }, } class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:future' + IE_NAME = 'arte.tv:future' _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)' _TEST = { - u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', - u'file': u'050940-003.mp4', - u'info_dict': { - u'title': u'Les champignons au secours de la planète', + 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', + 'file': '050940-003.mp4', + 'info_dict': { + 'title': 'Les champignons au secours de la planète', }, } @@ -263,7 +265,7 @@ class ArteTVFutureIE(ArteTVPlus7IE): class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = u'arte.tv:ddc' + IE_NAME = 'arte.tv:ddc' _VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)' def _real_extract(self, url): diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -7,13 +9,14 @@ from ..utils import ( ExtractorError, ) + class AUEngineIE(InfoExtractor): _TEST = { - u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', - u'file': u'lfvlytY6.mp4', - u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', - u'info_dict': { - u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" + 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', + 'file': 'lfvlytY6.mp4', + 'md5': '48972bdbcf1a3a2f5533e62425b41d4f', + 'info_dict': { + 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]' } } _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?' @@ -23,7 +26,7 @@ class AUEngineIE(InfoExtractor): video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', - webpage, u'title') + webpage, 'title') title = title.strip() links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) links = map(compat_urllib_parse.unquote, links) @@ -37,7 +40,7 @@ class AUEngineIE(InfoExtractor): video_url = link if not video_url: raise ExtractorError(u'Could not find video URL') - ext = u'.' + determine_ext(video_url) + ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: title = title[:-len(ext)] diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json import itertools @@ -9,26 +11,26 @@ from ..utils import ( class BambuserIE(InfoExtractor): - IE_NAME = u'bambuser' + IE_NAME = 'bambuser' _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)' _API_KEY = '005f64509e19a868399060af746a00aa' _TEST = { - u'url': u'http://bambuser.com/v/4050584', + 'url': 'http://bambuser.com/v/4050584', # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388 - #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', - u'info_dict': { - u'id': u'4050584', - u'ext': u'flv', - u'title': u'Education engineering days - lightning talks', - u'duration': 3741, - u'uploader': u'pixelversity', - u'uploader_id': u'344706', + #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641', + 'info_dict': { + 'id': '4050584', + 'ext': 'flv', + 'title': 'Education engineering days - lightning talks', + 'duration': 3741, + 'uploader': 'pixelversity', + 'uploader_id': '344706', }, - u'params': { + 'params': { # It doesn't respect the 'Range' header, it would download the whole video # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59 - u'skip_download': True, + 'skip_download': True, }, } @@ -53,7 +55,7 @@ class BambuserIE(InfoExtractor): class BambuserChannelIE(InfoExtractor): - IE_NAME = u'bambuser:channel' + IE_NAME = 'bambuser:channel' _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)' # The maximum number we can get with each request _STEP = 50 @@ -72,7 +74,7 @@ class BambuserChannelIE(InfoExtractor): # Without setting this header, we wouldn't get any result req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) info_json = self._download_webpage(req, user, - u'Downloading page %d' % i) + 'Downloading page %d' % i) results = json.loads(info_json)['result'] if len(results) == 0: break diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -12,14 +14,14 @@ from ..utils import ( class BandcampIE(InfoExtractor): _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _TESTS = [{ - u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', - u'file': u'1812978515.mp3', - u'md5': u'c557841d5e50261777a6585648adf439', - u'info_dict': { - u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", - u"duration": 10, + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + 'file': '1812978515.mp3', + 'md5': 'c557841d5e50261777a6585648adf439', + 'info_dict': { + "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", + "duration": 10, }, - u'skip': u'There is a limit of 200 free downloads / month for the test song' + '_skip': 'There is a limit of 200 free downloads / month for the test song' }] def _real_extract(self, url): @@ -58,7 +60,7 @@ class BandcampIE(InfoExtractor): 'duration': duration, } else: - raise ExtractorError(u'No free songs found') + raise ExtractorError('No free songs found') download_link = m_download.group(1) video_id = re.search( @@ -72,9 +74,9 @@ class BandcampIE(InfoExtractor): download_webpage, re.MULTILINE).group(1) info = json.loads(info)[0] # We pick mp3-320 for now, until format selection can be easily implemented. - mp3_info = info[u'downloads'][u'mp3-320'] + mp3_info = info['downloads']['mp3-320'] # If we try to use this url it says the link has expired - initial_url = mp3_info[u'url'] + initial_url = mp3_info['url'] re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' m_url = re.match(re_url, initial_url) #We build the url we will use to get the final track url @@ -87,41 +89,41 @@ class BandcampIE(InfoExtractor): return { 'id': video_id, - 'title': info[u'title'], + 'title': info['title'], 'ext': 'mp3', 'vcodec': 'none', 'url': final_url, - 'thumbnail': info[u'thumb_url'], - 'uploader': info[u'artist'], + 'thumbnail': info['thumb_url'], + 'uploader': info['artist'], } class BandcampAlbumIE(InfoExtractor): - IE_NAME = u'Bandcamp:album' + IE_NAME = 'Bandcamp:album' _VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)' _TEST = { - u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', - u'playlist': [ + 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', + 'playlist': [ { - u'file': u'1353101989.mp3', - u'md5': u'39bc1eded3476e927c724321ddf116cf', - u'info_dict': { - u'title': u'Intro', + 'file': '1353101989.mp3', + 'md5': '39bc1eded3476e927c724321ddf116cf', + 'info_dict': { + 'title': 'Intro', } }, { - u'file': u'38097443.mp3', - u'md5': u'1a2c32e2691474643e912cc6cd4bffaa', - u'info_dict': { - u'title': u'Kero One - Keep It Alive (Blazo remix)', + 'file': '38097443.mp3', + 'md5': '1a2c32e2691474643e912cc6cd4bffaa', + 'info_dict': { + 'title': 'Kero One - Keep It Alive (Blazo remix)', } }, ], - u'params': { - u'playlistend': 2 + 'params': { + 'playlistend': 2 }, - u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' + 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' } def _real_extract(self, url): @@ -130,11 +132,11 @@ class BandcampAlbumIE(InfoExtractor): webpage = self._download_webpage(url, title) tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) if not tracks_paths: - raise ExtractorError(u'The page doesn\'t contain any tracks') + raise ExtractorError('The page doesn\'t contain any tracks') entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title') + title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title') return { '_type': 'playlist', 'title': title, diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,14 +14,14 @@ class CNNIE(InfoExtractor): (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' _TESTS = [{ - u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', - u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', - u'md5': u'3e6121ea48df7e2259fe73a0628605c4', - u'info_dict': { - u'title': u'Nadal wins 8th French Open title', - u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', - u'duration': 135, - u'upload_date': u'20130609', + 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', + 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4', + 'md5': '3e6121ea48df7e2259fe73a0628605c4', + 'info_dict': { + 'title': 'Nadal wins 8th French Open title', + 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + 'duration': 135, + 'upload_date': '20130609', }, }, { @@ -36,7 +38,7 @@ class CNNIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) path = mobj.group('path') page_title = mobj.group('title') - info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path + info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path info = self._download_xml(info_url, page_title) formats = []