youtube-dl

Another place where youtube-dl lives on
git clone git://git.oshgnacknak.de/youtube-dl.git
Log | Files | Refs | README | LICENSE

commit 02dc0a36b72b7312996d59b9ec96768f925cb4a4
parent 639e3b5c9985aacf7c0dc018c211a78161bbafd2
Author: Sergey M․ <dstftw@gmail.com>
Date:   Wed,  2 Nov 2016 02:14:01 +0700

[utils] Introduce base_url

Diffstat:
Mtest/test_utils.py | 8++++++++
Myoutube_dl/extractor/common.py | 5+++--
Myoutube_dl/utils.py | 4++++
3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py @@ -69,6 +69,7 @@ from youtube_dl.utils import ( uppercase_escape, lowercase_escape, url_basename, + base_url, urlencode_postdata, urshift, update_url_query, @@ -437,6 +438,13 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') + def test_base_url(self): + self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py @@ -30,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, + base_url, bug_reports_message, clean_html, compiled_regex_type, @@ -1539,7 +1540,7 @@ class InfoExtractor(object): if res is False: return [] mpd, urlh = res - mpd_base_url = re.match(r'https?://[^?#&]+/', urlh.geturl()).group() + mpd_base_url = base_url(urlh.geturl()) return self._parse_mpd_formats( compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, @@ -1797,7 +1798,7 @@ class InfoExtractor(object): if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None: return [] - ism_base_url = re.match(r'https?://.+/', ism_url).group() + ism_base_url = base_url(ism_url) duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py @@ -1691,6 +1691,10 @@ def url_basename(url): return path.strip('/').split('/')[-1] +def base_url(url): + return re.match(r'https?://[^?#&]+/', url).group() + + class HEADRequest(compat_urllib_request.Request): def get_method(self): return 'HEAD'