[utils] Transliterate "þ" as "th" (#20897)
authorJakub Wilk <jwilk@jwilk.net>
Fri, 10 May 2019 18:42:32 +0000 (20:42 +0200)
committerSergey M <dstftw@gmail.com>
Fri, 10 May 2019 18:42:31 +0000 (01:42 +0700)
Despite visual similarity "þ" is unrelated to "p".
It is normally transliterated as "th":

    $ echo þ-Þ | iconv -t ASCII//TRANSLIT
    th-TH

test/test_utils.py
youtube_dl/utils.py

index ca6d832a4e74418cd8b643fec645f88322d7ed87..9ef0e422b8afc0eeefc2e1a9073a7793a54420a9 100644 (file)
@@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase):
 
         self.assertEqual(sanitize_filename(
             'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
-            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+            'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
 
     def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
index 71713f63aff9b451863b109428df0b475892b802..99ee5494293423645e990624348a4fc0258f86e0 100644 (file)
@@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
 
 # needed for sanitizing filenames in restricted mode
 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
-                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
-                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 
 DATE_FORMATS = (
     '%d %B %Y',