youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 8f1ea7cbb6cb365e4ffd75bdc2d901afcbfdf72f
parent a204c854083bd5aed79e41191f613275960eb600
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Tue, 26 Aug 2014 15:49:15 +0200

[empflix] Revert to XML parser

Don't rely on the XML being broken (if they fix it, our code wouldn't work anymore).
Instead, use the transform function we already have :)

This partially reverts commit c7bee2a7254d31b7c478c0ac33bf23bdeba1c53c.

Diffstat:
Myoutube_dl/extractor/empflix.py | 18++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import fix_xml_ampersands class EmpflixIE(InfoExtractor): @@ -35,20 +36,17 @@ class EmpflixIE(InfoExtractor): r'flashvars\.config = escape\("([^"]+)"', webpage, 'flashvars.config') - # XML is malformed - cfg_xml = self._download_webpage( - cfg_url, video_id, note='Downloading metadata') + cfg_xml = self._download_xml( + cfg_url, video_id, note='Downloading metadata', + transform_source=fix_xml_ampersands) formats = [ { - 'url': item[1], - 'format_id': item[0], - } for item in re.findall( - r'<item>\s*<res>([^>]+)</res>\s*<videoLink>([^<]+)</videoLink>\s*</item>', cfg_xml) + 'url': item.find('videoLink').text, + 'format_id': item.find('res').text, + } for item in cfg_xml.findall('./quality/item') ] - - thumbnail = self._html_search_regex( - r'<startThumb>([^<]+)</startThumb>', cfg_xml, 'thumbnail', fatal=False) + thumbnail = cfg_xml.find('./startThumb').text return { 'id': video_id,