Allow non-ASCII characters in simplified titles(Closes #220)
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Nov 2011 20:50:39 +0000 (21:50 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Nov 2011 20:50:39 +0000 (21:50 +0100)
test/test_div.py
youtube_dl/__init__.py

index 4525c8be6abc6e884c5d1ba4f1d5614d3c7b6375..4d4819b3c74fca250eba89f4ad3b88971bbcb39a 100644 (file)
@@ -16,13 +16,14 @@ def test_simplify_title():
        assert u'/' not in youtube_dl._simplify_title(u'abc/de')
        assert u'abc' in youtube_dl._simplify_title(u'abc/de')
        assert u'de' in youtube_dl._simplify_title(u'abc/de')
+       assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
 
        assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
        assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
        assert u'de' in youtube_dl._simplify_title(u'abc\\de')
 
-       # TODO: Fix #220
-       #assert youtube_dl._simplify_title(u'ä') == u'ä'
+       assert youtube_dl._simplify_title(u'ä') == u'ä'
+       assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
 
        # Strip underlines
        assert youtube_dl._simplify_title(u'\'a_') == u'a'
index d4eadc9059d36a2f396bc207cfb13826eb4f15b4..36520c5940b2c5d7f7575f387639d5f7e4c8f2bc 100755 (executable)
@@ -278,7 +278,8 @@ def timeconvert(timestr):
        return timestamp
 
 def _simplify_title(title):
-       return re.sub(ur'[^\w\d_\-]+', u'_', title).strip(u'_')
+       expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
+       return expr.sub(u'_', title).strip(u'_')
 
 class DownloadError(Exception):
        """Download Error exception.
@@ -2937,6 +2938,7 @@ class BlipTVIE(InfoExtractor):
                        if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
                                basename = url.split('/')[-1]
                                title,ext = os.path.splitext(basename)
+                               title = title.decode('UTF-8')
                                ext = ext.replace('.', '')
                                self.report_direct_download(title)
                                info = {
@@ -3089,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor):
 
                if mobj.group('shortname'):
                        if mobj.group('shortname') in ('tds', 'thedailyshow'):
-                               url = 'http://www.thedailyshow.com/full-episodes/'
+                               url = u'http://www.thedailyshow.com/full-episodes/'
                        else:
-                               url = 'http://www.colbertnation.com/full-episodes/'
+                               url = u'http://www.colbertnation.com/full-episodes/'
                        mobj = re.match(self._VALID_URL, url)
                        assert mobj is not None
 
@@ -3177,7 +3179,7 @@ class ComedyCentralIE(InfoExtractor):
 
                        self._downloader.increment_downloads()
 
-                       effTitle = showId + '-' + epTitle
+                       effTitle = showId + u'-' + epTitle
                        info = {
                                'id': shortMediaId,
                                'url': video_url,