+from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
- IE_NAME = u'AcademicEarth:Course'
+ IE_NAME = 'AcademicEarth:Course'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
+from __future__ import unicode_literals
+
import re
import json
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
- u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
- u"playlist": [
+ "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+ "playlist": [
{
- u"file": u"manofsteel-trailer4.mov",
- u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
- u"info_dict": {
- u"duration": 111,
- u"title": u"Trailer 4",
- u"upload_date": u"20130523",
- u"uploader_id": u"wb",
+ "file": "manofsteel-trailer4.mov",
+ "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
+ "info_dict": {
+ "duration": 111,
+ "title": "Trailer 4",
+ "upload_date": "20130523",
+ "uploader_id": "wb",
},
},
{
- u"file": u"manofsteel-trailer3.mov",
- u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
- u"info_dict": {
- u"duration": 182,
- u"title": u"Trailer 3",
- u"upload_date": u"20130417",
- u"uploader_id": u"wb",
+ "file": "manofsteel-trailer3.mov",
+ "md5": "b8017b7131b721fb4e8d6f49e1df908c",
+ "info_dict": {
+ "duration": 182,
+ "title": "Trailer 3",
+ "upload_date": "20130417",
+ "uploader_id": "wb",
},
},
{
- u"file": u"manofsteel-trailer.mov",
- u"md5": u"d0f1e1150989b9924679b441f3404d48",
- u"info_dict": {
- u"duration": 148,
- u"title": u"Trailer",
- u"upload_date": u"20121212",
- u"uploader_id": u"wb",
+ "file": "manofsteel-trailer.mov",
+ "md5": "d0f1e1150989b9924679b441f3404d48",
+ "info_dict": {
+ "duration": 148,
+ "title": "Trailer",
+ "upload_date": "20121212",
+ "uploader_id": "wb",
},
},
{
- u"file": u"manofsteel-teaser.mov",
- u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
- u"info_dict": {
- u"duration": 93,
- u"title": u"Teaser",
- u"upload_date": u"20120721",
- u"uploader_id": u"wb",
+ "file": "manofsteel-teaser.mov",
+ "md5": "5fe08795b943eb2e757fa95cb6def1cb",
+ "info_dict": {
+ "duration": 93,
+ "title": "Teaser",
+ "upload_date": "20120721",
+ "uploader_id": "wb",
},
}
]
+from __future__ import unicode_literals
+
import json
import re
IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = {
- u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
- u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
- u'md5': u'8af1d4cf447933ed3c7f4871162602db',
- u'info_dict': {
- u"title": u"1968 Demo - FJCC Conference Presentation Reel #1",
- u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
- u"upload_date": u"19681210",
- u"uploader": u"SRI International"
+ "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
+ 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+ 'md5': '8af1d4cf447933ed3c7f4871162602db',
+ 'info_dict': {
+ "title": "1968 Demo - FJCC Conference Presentation Reel #1",
+ "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
+ "upload_date": "19681210",
+ "uploader": "SRI International"
}
}
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- json_url = url + (u'?' if u'?' in url else '&') + u'output=json'
+ json_url = url + ('?' if '?' in url else '&') + 'output=json'
json_data = self._download_webpage(json_url, video_id)
data = json.loads(json_data)
# encoding: utf-8
+from __future__ import unicode_literals
+
import re
import json
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
- IE_NAME = u'arte.tv'
+ IE_NAME = 'arte.tv'
@classmethod
def suitable(cls, url):
# r'src="(.*?/videothek_js.*?\.js)',
# 0,
# [
- # (1, 'url', u'Invalid URL: %s' % url)
+ # (1, 'url', 'Invalid URL: %s' % url)
# ]
# )
# http_host = url.split('/')[2]
# '(rtmp://.*?)\'',
# re.DOTALL,
# [
- # (1, 'path', u'could not extract video path: %s' % url),
- # (2, 'player', u'could not extract video player: %s' % url),
- # (3, 'url', u'could not extract video url: %s' % url)
+ # (1, 'path', 'could not extract video path: %s' % url),
+ # (2, 'player', 'could not extract video player: %s' % url),
+ # (3, 'url', 'could not extract video url: %s' % url)
# ]
# )
- # video_url = u'%s/%s' % (info.get('url'), info.get('path'))
+ # video_url = '%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url):
mobj = re.match(self._VIDEOS_URL, url)
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
- video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
+ video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
- video_id, u'Downloading information')
+ video_id, 'Downloading information')
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
class ArteTVPlus7IE(InfoExtractor):
- IE_NAME = u'arte.tv:+7'
+ IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod
if bitrate is not None:
quality += '-%d' % bitrate
if format_info.get('versionCode') is not None:
- format_id = u'%s-%s' % (quality, format_info['versionCode'])
+ format_id = '%s-%s' % (quality, format_info['versionCode'])
else:
format_id = quality
info = {
'width': format_info.get('width'),
'height': height,
}
- if format_info['mediaType'] == u'rtmp':
+ if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer']
info['play_path'] = 'mp4:' + format_info['url']
info['ext'] = 'flv'
# It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:creative'
+ IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_TEST = {
- u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
- u'file': u'050489-002.mp4',
- u'info_dict': {
- u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design',
+ 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
+ 'file': '050489-002.mp4',
+ 'info_dict': {
+ 'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
},
}
class ArteTVFutureIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:future'
+ IE_NAME = 'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
_TEST = {
- u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
- u'file': u'050940-003.mp4',
- u'info_dict': {
- u'title': u'Les champignons au secours de la planète',
+ 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
+ 'file': '050940-003.mp4',
+ 'info_dict': {
+ 'title': 'Les champignons au secours de la planète',
},
}
class ArteTVDDCIE(ArteTVPlus7IE):
- IE_NAME = u'arte.tv:ddc'
+ IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
ExtractorError,
)
+
class AUEngineIE(InfoExtractor):
_TEST = {
- u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
- u'file': u'lfvlytY6.mp4',
- u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f',
- u'info_dict': {
- u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]"
+ 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
+ 'file': 'lfvlytY6.mp4',
+ 'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
+ 'info_dict': {
+ 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
}
}
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
- webpage, u'title')
+ webpage, 'title')
title = title.strip()
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = map(compat_urllib_parse.unquote, links)
video_url = link
if not video_url:
raise ExtractorError(u'Could not find video URL')
- ext = u'.' + determine_ext(video_url)
+ ext = '.' + determine_ext(video_url)
if ext == title[-len(ext):]:
title = title[:-len(ext)]
+from __future__ import unicode_literals
+
import re
import json
import itertools
class BambuserIE(InfoExtractor):
- IE_NAME = u'bambuser'
+ IE_NAME = 'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
_TEST = {
- u'url': u'http://bambuser.com/v/4050584',
+ 'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
- #u'md5': u'fba8f7693e48fd4e8641b3fd5539a641',
- u'info_dict': {
- u'id': u'4050584',
- u'ext': u'flv',
- u'title': u'Education engineering days - lightning talks',
- u'duration': 3741,
- u'uploader': u'pixelversity',
- u'uploader_id': u'344706',
+ #u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
+ 'info_dict': {
+ 'id': '4050584',
+ 'ext': 'flv',
+ 'title': 'Education engineering days - lightning talks',
+ 'duration': 3741,
+ 'uploader': 'pixelversity',
+ 'uploader_id': '344706',
},
- u'params': {
+ 'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
- u'skip_download': True,
+ 'skip_download': True,
},
}
class BambuserChannelIE(InfoExtractor):
- IE_NAME = u'bambuser:channel'
+ IE_NAME = 'bambuser:channel'
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
- u'Downloading page %d' % i)
+ 'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
break
+from __future__ import unicode_literals
+
import json
import re
class BandcampIE(InfoExtractor):
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TESTS = [{
- u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
- u'file': u'1812978515.mp3',
- u'md5': u'c557841d5e50261777a6585648adf439',
- u'info_dict': {
- u"title": u"youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
- u"duration": 10,
+ 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
+ 'file': '1812978515.mp3',
+ 'md5': 'c557841d5e50261777a6585648adf439',
+ 'info_dict': {
+ "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+ "duration": 10,
},
- u'skip': u'There is a limit of 200 free downloads / month for the test song'
+ '_skip': 'There is a limit of 200 free downloads / month for the test song'
}]
def _real_extract(self, url):
'duration': duration,
}
else:
- raise ExtractorError(u'No free songs found')
+ raise ExtractorError('No free songs found')
download_link = m_download.group(1)
video_id = re.search(
download_webpage, re.MULTILINE).group(1)
info = json.loads(info)[0]
# We pick mp3-320 for now, until format selection can be easily implemented.
- mp3_info = info[u'downloads'][u'mp3-320']
+ mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired
- initial_url = mp3_info[u'url']
+ initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url)
#We build the url we will use to get the final track url
return {
'id': video_id,
- 'title': info[u'title'],
+ 'title': info['title'],
'ext': 'mp3',
'vcodec': 'none',
'url': final_url,
- 'thumbnail': info[u'thumb_url'],
- 'uploader': info[u'artist'],
+ 'thumbnail': info['thumb_url'],
+ 'uploader': info['artist'],
}
class BandcampAlbumIE(InfoExtractor):
- IE_NAME = u'Bandcamp:album'
+ IE_NAME = 'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
- u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
- u'playlist': [
+ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+ 'playlist': [
{
- u'file': u'1353101989.mp3',
- u'md5': u'39bc1eded3476e927c724321ddf116cf',
- u'info_dict': {
- u'title': u'Intro',
+ 'file': '1353101989.mp3',
+ 'md5': '39bc1eded3476e927c724321ddf116cf',
+ 'info_dict': {
+ 'title': 'Intro',
}
},
{
- u'file': u'38097443.mp3',
- u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
- u'info_dict': {
- u'title': u'Kero One - Keep It Alive (Blazo remix)',
+ 'file': '38097443.mp3',
+ 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
+ 'info_dict': {
+ 'title': 'Kero One - Keep It Alive (Blazo remix)',
}
},
],
- u'params': {
- u'playlistend': 2
+ 'params': {
+ 'playlistend': 2
},
- u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+ 'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
- raise ExtractorError(u'The page doesn\'t contain any tracks')
+ raise ExtractorError('The page doesn\'t contain any tracks')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
- title = self._search_regex(r'album_title : "(.*?)"', webpage, u'title')
+ title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
return {
'_type': 'playlist',
'title': title,
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{
- u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
- u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
- u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
- u'info_dict': {
- u'title': u'Nadal wins 8th French Open title',
- u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
- u'duration': 135,
- u'upload_date': u'20130609',
+ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+ 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+ 'md5': '3e6121ea48df7e2259fe73a0628605c4',
+ 'info_dict': {
+ 'title': 'Nadal wins 8th French Open title',
+ 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+ 'duration': 135,
+ 'upload_date': '20130609',
},
},
{
mobj = re.match(self._VALID_URL, url)
path = mobj.group('path')
page_title = mobj.group('title')
- info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+ info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
info = self._download_xml(info_url, page_title)
formats = []