youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 1b0427e6c433c0b6db5e210db6e3173e19e702ed
parent 2aa64b89b3ac8f387d4c0c27ce7de64bc0ff68de
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date:   Tue, 19 May 2015 00:45:01 +0800

[utils] Support TTML without default namespace

In a strict sense such TTML is invalid, but Yahoo uses it.

Diffstat:
Mtest/test_utils.py | 15+++++++++++++++
Myoutube_dl/utils.py | 9++++++---
2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -621,6 +621,21 @@ Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) + dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?> + <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">The first line</p> + </div> + </body> + </tt>''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' + self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data): out = str_or_empty(node.text) for child in node: - if child.tag == _x('ttml:br'): + if child.tag in (_x('ttml:br'), 'br'): out += '\n' + str_or_empty(child.tail) - elif child.tag == _x('ttml:span'): + elif child.tag in (_x('ttml:span'), 'span'): out += str_or_empty(parse_node(child)) else: out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data): dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + + if not paras: + raise ValueError('Invalid dfxp/TTML subtitle') for para, index in zip(paras, itertools.count(1)): begin_time = parse_dfxp_time_expr(para.attrib['begin'])