youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit d631d5f9f27f93767226192e4288990413fa9dbd
parent 4f29fa99069760dc47ef9ca5dbf607a567d2982f
Author: Yen Chi Hsuan <yan12125@gmail.com>
Date:   Sat, 19 Dec 2015 18:21:42 +0800

[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

Diffstat:
Mtest/test_utils.py | 7+++++--
Myoutube_dl/utils.py | 11++++++++---
2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -661,8 +661,8 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') {'like_count': 190, 'dislike_count': 10})) def test_parse_dfxp_time_expr(self): - self.assertEqual(parse_dfxp_time_expr(None), 0.0) - self.assertEqual(parse_dfxp_time_expr(''), 0.0) + self.assertEqual(parse_dfxp_time_expr(None), None) + self.assertEqual(parse_dfxp_time_expr(''), None) self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) @@ -676,6 +676,9 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="1" end="2">第二行<br/>♪♪</p> <p begin="2" dur="1"><span>Third<br/>Line</span></p> + <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> + <p begin="-1" end="-1">Ignore, two</p> + <p begin="3" dur="-1">Ignored, three</p> </div> </body> </tt>''' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1976,7 +1976,7 @@ def match_filter_func(filter_str): def parse_dfxp_time_expr(time_expr): if not time_expr: - return 0.0 + return mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) if mobj: @@ -2020,10 +2020,15 @@ def dfxp2srt(dfxp_data): raise ValueError('Invalid dfxp/TTML subtitle') for para, index in zip(paras, itertools.count(1)): - begin_time = parse_dfxp_time_expr(para.attrib['begin']) + begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) end_time = parse_dfxp_time_expr(para.attrib.get('end')) + dur = parse_dfxp_time_expr(para.attrib.get('dur')) + if begin_time is None: + continue if not end_time: - end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) + if not dur: + continue + end_time = begin_time + dur out.append('%d\n%s --> %s\n%s\n\n' % ( index, srt_subtitles_timecode(begin_time),