From: Philipp Hagemeister Date: Wed, 28 Aug 2013 02:25:38 +0000 (+0200) Subject: [addanime] improve X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=2eabb80254f48d63a464247140d8e04a73418ded;p=youtube-dl [addanime] improve --- 2eabb80254f48d63a464247140d8e04a73418ded diff --cc youtube_dl/extractor/__init__.py index 2f86f2aca,28dcb2cc4..c76b99a81 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@@ -1,4 -1,5 +1,5 @@@ - -from .AddAnime import AddAnimeIE +from .appletrailers import AppleTrailersIE ++from .addanime import AddAnimeIE from .archiveorg import ArchiveOrgIE from .ard import ARDIE from .arte import ArteTvIE diff --cc youtube_dl/extractor/appletrailers.py index 7d126e2d2,000000000..b3bdb2955 mode 100644,000000..100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@@ -1,167 -1,0 +1,167 @@@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, +) + + +class AppleTrailersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P[^/]+)/(?P[^/]+)' + _TEST = { + u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", + u"playlist": [ + { + u"file": u"manofsteel-trailer4.mov", + u"md5": u"11874af099d480cc09e103b189805d5f", + u"info_dict": { + u"duration": 111, + u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", + u"title": u"Trailer 4", + u"upload_date": u"20130523", + u"uploader_id": u"wb", + }, + }, + { + u"file": u"manofsteel-trailer3.mov", + u"md5": u"07a0a262aae5afe68120eed61137ab34", + u"info_dict": { + u"duration": 182, + u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", + u"title": u"Trailer 3", + u"upload_date": u"20130417", + u"uploader_id": u"wb", + }, + }, + { + u"file": u"manofsteel-trailer.mov", + u"md5": u"e401fde0813008e3307e54b6f384cff1", + u"info_dict": { + u"duration": 148, + u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", + u"title": u"Trailer", + u"upload_date": u"20121212", + u"uploader_id": u"wb", + }, + }, + { + u"file": u"manofsteel-teaser.mov", + u"md5": u"76b392f2ae9e7c98b22913c10a639c97", + u"info_dict": { + u"duration": 93, + u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", + u"title": u"Teaser", + u"upload_date": u"20120721", + u"uploader_id": u"wb", + }, + } + ] + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + movie = mobj.group('movie') + uploader_id = mobj.group('company') + + playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' + playlist_snippet = self._download_webpage(playlist_url, movie) + playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet) + playlist_html = u'' + playlist_cleaned + u'' + + size_cache = {} + + doc = xml.etree.ElementTree.fromstring(playlist_html) + playlist = [] + for li in doc.findall('./div/ul/li'): + title = li.find('.//h3').text + video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() + thumbnail = li.find('.//img').attrib['src'] + + date_el = li.find('.//p') + upload_date = None + m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text) + if m: + upload_date = u'20' + m.group('year') + m.group('month') + m.group('day') + runtime_el = date_el.find('./br') + m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail) + duration = None + if m: + duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) + + formats = [] - for formats_el in li.findall('.//li/a'): ++ for formats_el in li.findall('.//a'): + if formats_el.attrib['class'] != 'OverlayPanel': + continue + target = formats_el.attrib['target'] + + format_code = formats_el.text + if 'Automatic' in format_code: + continue + + size_q = formats_el.attrib['href'] + size_id = size_q.rpartition('#videos-')[2] + if size_id not in size_cache: + size_url = url + size_q + sizepage_html = self._download_webpage( + size_url, movie, + note=u'Downloading size info %s' % size_id, + errnote=u'Error while downloading size info %s' % size_id, + ) + _doc = xml.etree.ElementTree.fromstring(sizepage_html) + size_cache[size_id] = _doc + + sizepage_doc = size_cache[size_id] + links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') + for vid_a in links: + href = vid_a.get('href') + if not href.endswith(target): + continue + detail_q = href.partition('#')[0] + detail_url = url + '/' + detail_q + + m = re.match(r'includes/(?P[^/]+)/', detail_q) + detail_id = m.group('detail_id') + + detail_html = self._download_webpage( + detail_url, movie, + note=u'Downloading detail %s %s' % (detail_id, size_id), + errnote=u'Error while downloading detail %s %s' % (detail_id, size_id) + ) + detail_doc = xml.etree.ElementTree.fromstring(detail_html) + movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a') + assert movie_link_el.get('class') == 'movieLink' + movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h') + ext = determine_ext(movie_link) + assert ext == 'mov' + + formats.append({ + 'format': format_code, + 'ext': ext, + 'url': movie_link, + }) + + info = { + '_type': 'video', + 'id': video_id, + 'title': title, + 'formats': formats, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'user_agent': 'QuickTime compatible (youtube-dl)', + } + # TODO: Remove when #980 has been merged + info['url'] = formats[-1]['url'] + info['ext'] = formats[-1]['ext'] + + playlist.append(info) + + return { + '_type': 'playlist', + 'id': movie, + 'entries': playlist, + } diff --cc youtube_dl/extractor/common.py index 52c4483c9,da50abfc1..12169b2bb --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@@ -129,7 -122,7 +129,7 @@@ class InfoExtractor(object) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: if errnote is None: errnote = u'Unable to download webpage' -- raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2]) ++ raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): """ Returns a tuple (page content as string, URL handle) """ diff --cc youtube_dl/utils.py index 52cfb8a6d,59eeaf4a8..f78b5fe78 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@@ -60,6 -60,6 +60,11 @@@ try except ImportError: # Python 2 import httplib as compat_http_client ++try: ++ from http.error import HTTPError as compat_HTTPError ++except ImportError: # Python 2 ++ from urllib2 import HTTPError as compat_HTTPError ++ try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL @@@ -489,7 -489,7 +494,7 @@@ def make_HTTPS_handler(opts) class ExtractorError(Exception): """Error during info extraction.""" -- def __init__(self, msg, tb=None, expected=False): ++ def __init__(self, msg, tb=None, expected=False, cause=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. """ @@@ -502,6 -502,6 +507,7 @@@ self.traceback = tb self.exc_info = sys.exc_info() # preserve original exception ++ self.cause = cause def format_traceback(self): if self.traceback is None: