From: Jaime Marquínez Ferrándiz Date: Tue, 26 Nov 2013 17:48:52 +0000 (+0100) Subject: Use the new '_download_xml' helper in more extractors X-Git-Url: http://git.oshgnacknak.de/?a=commitdiff_plain;h=e26f8712289c727a43d74a4669aee4924b9f75f2;p=youtube-dl Use the new '_download_xml' helper in more extractors --- diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 691d5a844..2b019daa9 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor @@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor): key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, u'key') - webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, + config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) - config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8')) video_title = config_xml.find('title').text diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 44d0b5d70..8b62ee774 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,7 +1,6 @@ # encoding: utf-8 import re import json -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor): """Extract from videos.arte.tv""" ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata') - ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml) + ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata') config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) config_xml_url = config_node.attrib['ref'] config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') @@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor): """Extract form http://liveweb.arte.tv/""" webpage = self._download_webpage(url, name) video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') - config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, + config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, video_id, u'Downloading information') - config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) event_doc = config_doc.find('event') url_node = event_doc.find('video').find('urlHd') if url_node is None: diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index bfa2a8b40..7cdcd8399 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,6 +1,5 @@ # encoding: utf-8 import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import unified_strdate @@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor): webpage = self._download_webpage(url, mobj.group('path')) video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') info_url = self._VIDEO_INFO_TEMPLATE % video_id - info_page = self._download_webpage(info_url,video_id, + doc = self._download_xml(info_url,video_id, u'Downloading video info') self.report_extraction(video_id) - doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8')) video_info = [video for video in doc if video.find('ID').text == video_id][0] infos = video_info.find('INFOS') media = video_info.find('MEDIA') diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 95449da3c..5f0b5602f 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,6 +1,5 @@ import re import time -import xml.etree.ElementTree from .common import InfoExtractor @@ -25,9 +24,8 @@ class ClipfishIE(InfoExtractor): info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % (video_id, int(time.time()))) - info_xml = self._download_webpage( + doc = self._download_xml( info_url, video_id, note=u'Downloading info page') - doc = xml.etree.ElementTree.fromstring(info_xml) title = doc.find('title').text video_url = doc.find('filename').text thumbnail = doc.find('imageurl').text diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 34adf6dda..a034bb2fb 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import determine_ext @@ -33,8 +32,7 @@ class CNNIE(InfoExtractor): path = mobj.group('path') page_title = mobj.group('title') info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path - info_xml = self._download_webpage(info_url, page_title) - info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) + info = self._download_xml(info_url, page_title) formats = [] for f in info.findall('files/file'): diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 725849d2e..23647f99e 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor from .mtv import MTVIE, _media_xml_tag @@ -158,13 +157,12 @@ class ComedyCentralShowsIE(InfoExtractor): uri = mMovieParams[0][1] indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) - indexXml = self._download_webpage(indexUrl, epTitle, + idoc = self._download_xml(indexUrl, epTitle, u'Downloading show index', u'unable to download episode index') results = [] - idoc = xml.etree.ElementTree.fromstring(indexXml) itemEls = idoc.findall('.//item') for partNum,itemEl in enumerate(itemEls): mediaId = itemEl.findall('./guid')[0].text @@ -175,10 +173,9 @@ class ComedyCentralShowsIE(InfoExtractor): configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + compat_urllib_parse.urlencode({'uri': mediaId})) - configXml = self._download_webpage(configUrl, epTitle, + cdoc = self._download_xml(configUrl, epTitle, u'Downloading configuration for %s' % shortMediaId) - cdoc = xml.etree.ElementTree.fromstring(configXml) turls = [] for rendition in cdoc.findall('.//rendition'): finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index a804e83bd..3d1dcb793 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -1,6 +1,5 @@ # encoding: utf-8 import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -32,14 +31,12 @@ class DaumIE(InfoExtractor): full_id = self._search_regex(r'(.*?)', - xml_config, u'JSON information') + info_json = config.find('format.json').text info = json.loads(info_json)['versions'][0] video_url = 'http://video720.jeuxvideo.com/' + info['file'] diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py index f60017992..e9bde0c18 100644 --- a/youtube_dl/extractor/justintv.py +++ b/youtube_dl/extractor/justintv.py @@ -1,7 +1,6 @@ import json import os import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor): archive_id = m.group(1) api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id - chapter_info_xml = self._download_webpage(api, chapter_id, + doc = self._download_xml(api, chapter_id, note=u'Downloading chapter information', errnote=u'Chapter information download failed') - doc = xml.etree.ElementTree.fromstring(chapter_info_xml) for a in doc.findall('.//archive'): if archive_id == a.find('./id').text: break diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5f548437c..9bc35b115 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -1,6 +1,5 @@ import re import json -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor): user = mobj.group('user') api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) - api_response = self._download_webpage(api_url, video_id) - info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8')) + info = self._download_xml(api_url, video_id) item = info.find('channel').find('item') ns = {'media': 'http://search.yahoo.com/mrss'} thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 04afd6c4c..42aee58be 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -109,9 +109,8 @@ class MTVIE(InfoExtractor): def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) data = compat_urllib_parse.urlencode({'uri': uri}) - infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id, + idoc = self._download_xml(self._FEED_URL +'?' + data, video_id, u'Downloading info') - idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8')) return [self._get_video_info(item) for item in idoc.findall('.//item')] def _real_extract(self, url): diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 107665d15..0067bf134 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -1,5 +1,4 @@ import os.path -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor): # get metadata metadata_url = META_DATA_URL_TEMPLATE % video_id - metadata_text = self._download_webpage(metadata_url, video_id) - metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) + metadata = self._download_xml(metadata_url, video_id) # extract values from metadata url_flv_el = metadata.find('url_flv') diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 9df236d69..d290397c7 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,6 +1,5 @@ # encoding: utf-8 import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -38,14 +37,12 @@ class NaverIE(InfoExtractor): 'protocol': 'p2p', 'inKey': key, }) - info_xml = self._download_webpage( + info = self._download_xml( 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, video_id, u'Downloading video info') - urls_xml = self._download_webpage( + urls = self._download_xml( 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, video_id, u'Downloading video formats info') - info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) - urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8')) formats = [] for format_el in urls.findall('EncodingOptions/EncodingOption'): diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3bc9dae6d..e8bbfff7b 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import find_xpath_attr, compat_str @@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) - info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video') + all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) + info = all_info.find('video') return {'id': video_id, 'title': info.find('headline').text, diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 458fe4063..2edd806a3 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -1,6 +1,5 @@ import re import json -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor): 'path': initial_video_url.replace('.mp4', '_sd.mp4'), }) path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data - path_response = self._download_webpage(path_url, video_id, + path_doc = self._download_xml(path_url, video_id, u'Downloading final video url') - path_doc = xml.etree.ElementTree.fromstring(path_response) video_url = path_doc.find('path').text join = compat_urlparse.urljoin diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 729607ea3..46774317c 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -2,7 +2,6 @@ import re import socket -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor): # the cookies in order to be able to download the info webpage self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) - video_info_webpage = self._download_webpage( + video_info = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note=u'Downloading video info page') @@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor): video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information - video_info = xml.etree.ElementTree.fromstring(video_info_webpage) video_title = video_info.find('.//title').text video_extension = video_info.find('.//movie_type').text video_format = video_extension.upper() @@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor): video_uploader = video_uploader_id url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id try: - user_info_webpage = self._download_webpage( + user_info = self._download_xml( url, video_id, note=u'Downloading user information') + video_uploader = user_info.find('.//nickname').text except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err)) - else: - user_info = xml.etree.ElementTree.fromstring(user_info_webpage) - video_uploader = user_info.find('.//nickname').text return { 'id': video_id, diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index 14b1c656c..74a87fe56 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -1,7 +1,6 @@ # coding: utf-8 import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -35,12 +34,11 @@ class SinaIE(InfoExtractor): def _extract_video(self, video_id): data = compat_urllib_parse.urlencode({'vid': video_id}) - url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data, + url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, video_id, u'Downloading video url') image_page = self._download_webpage( 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, video_id, u'Downloading thumbnail info') - url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8')) return {'id': video_id, 'url': url_doc.find('./durl/url').text, diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 19ce585cf..695520524 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor @@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor): r'
(.*?)
', webpage, u'title') xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' - xml_code = self._download_webpage( + idoc = self._download_xml( xml_url, video_id, note=u'Downloading XML', errnote=u'Failed to download XML') - idoc = xml.etree.ElementTree.fromstring(xml_code) - formats = [ { 'format_id': n.tag.rpartition('type')[2], diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 165d9f88b..2bf26d056 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor): self.report_extraction(video_id) data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id - data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage') - data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8')) + data = self._download_xml(data_url, video_id, 'Downloading data webpage') qualities = ['500k', '480p', '1000k', '720p', '1080p'] diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 2f728d3dc..1e9598ef6 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -1,6 +1,5 @@ # coding: utf-8 import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor): r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId - streams_webpage = self._download_webpage( + streams_doc = self._download_xml( streams_url, video_id, note=u'Downloading stream list') - streams_doc = xml.etree.ElementTree.fromstring( - streams_webpage.encode('utf-8')) video_url = next(n.text for n in streams_doc.findall('.//choice/url') if u'//ad.doubleclick' not in n.text) diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py index 0bf028f61..1c49e580d 100644 --- a/youtube_dl/extractor/trilulilu.py +++ b/youtube_dl/extractor/trilulilu.py @@ -1,6 +1,5 @@ import json import re -import xml.etree.ElementTree from .common import InfoExtractor @@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor): format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/' u'video-formats2' % log) - format_str = self._download_webpage( + format_doc = self._download_xml( format_url, video_id, note=u'Downloading formats', errnote=u'Error while downloading formats') - - format_doc = xml.etree.ElementTree.fromstring(format_str) video_url_template = ( u'http://fs%(server)s.trilulilu.ro/stream.php?type=video' diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py index 94f64ffa5..912802d9a 100644 --- a/youtube_dl/extractor/videofyme.py +++ b/youtube_dl/extractor/videofyme.py @@ -1,5 +1,4 @@ import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, + config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id, video_id) - config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) video = config.find('video') sources = video.find('sources') url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4c43d5739..a76a9071a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -11,7 +11,6 @@ import socket import string import struct import traceback -import xml.etree.ElementTree import zlib from .common import InfoExtractor, SearchInfoExtractor @@ -1144,8 +1143,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'asrs': 1, }) list_url = caption_url + '&' + list_params - list_page = self._download_webpage(list_url, video_id) - caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8')) + caption_list = self._download_xml(list_url, video_id) original_lang_node = caption_list.find('track') if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' : self._downloader.report_warning(u'Video doesn\'t have automatic captions')