youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit e8df5cee12378acd708b6686130a73c5edc06f0e
parent ab07963b5cc79812c6fb7e4f9e363533d8123830
Author: Philipp Hagemeister <phihag@phihag.de>
Date:   Thu,  4 Dec 2014 17:35:40 +0100

[minhateca] Fix duration parsing

Diffstat:
Mtest/test_utils.py | 3+++
Myoutube_dl/extractor/minhateca.py | 5+++--
Myoutube_dl/utils.py | 19+++++++++++++++----
3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -220,6 +220,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('0s'), 0) self.assertEqual(parse_duration('01:02:03.05'), 3723.05) self.assertEqual(parse_duration('T30M38S'), 1838) + self.assertEqual(parse_duration('5 s'), 5) + self.assertEqual(parse_duration('3 min'), 180) + self.assertEqual(parse_duration('2.5 hours'), 9000) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py @@ -8,6 +8,7 @@ from ..compat import ( ) from ..utils import ( int_or_none, + parse_duration, parse_filesize, ) @@ -52,8 +53,8 @@ class MinhatecaIE(InfoExtractor): filesize_approx = parse_filesize(self._html_search_regex( r'<p class="fileSize">(.*?)</p>', webpage, 'file size approximation', fatal=False)) - duration = int_or_none(self._html_search_regex( - r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s', + duration = parse_duration(self._html_search_regex( + r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<', webpage, 'duration', fatal=False)) view_count = int_or_none(self._html_search_regex( r'<p class="downloadsCounter">([0-9]+)</p>', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1206,18 +1206,29 @@ def parse_duration(s): m = re.match( r'''(?ix)T? + (?: + (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*| + (?P<only_hours>[0-9.]+)\s*(?:hours?)| + (?: (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)? (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* )? - (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s) + (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)? + )$''', s) if not m: return None - res = int(m.group('secs')) + res = 0 + if m.group('only_mins'): + return float_or_none(m.group('only_mins'), invscale=60) + if m.group('only_hours'): + return float_or_none(m.group('only_hours'), invscale=60 * 60) + if m.group('secs'): + res += int(m.group('secs')) if m.group('mins'): res += int(m.group('mins')) * 60 - if m.group('hours'): - res += int(m.group('hours')) * 60 * 60 + if m.group('hours'): + res += int(m.group('hours')) * 60 * 60 if m.group('ms'): res += float(m.group('ms')) return res