Switch back to underline for invalid characters, and make restricted ASCII-only

author Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)
diff --git a/README.md b/README.md

index 14acddbd00cc9df81c5b444fc982f4f3f6ed64c7..5cf082a7ccb7b9659f51eb9514389fb497cbd4c2 100644 (file)
--- a/README.md
+++ b/README.md
@@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
                               %(extractor)s for the provider (youtube, metacafe,
                               etc), %(id)s for the video id and %% for a literal
                               percent. Use - to output to stdout.
-    --restrict-filenames     Avoid some characters such as "&" and spaces in
-                             filenames
+    --restrict-filenames     Restrict filenames to only ASCII characters, and
+                             avoid "&" and spaces in filenames
      -a, --batch-file FILE    file containing URLs to download ('-' for stdin)
      -w, --no-overwrites      do not overwrite files
      -c, --continue           resume partially downloaded files
diff --git a/test/test_utils.py b/test/test_utils.py

index 0a435ddc547ef28a5aab9444ce466c4ccd75beaa..0b57e016c22a6a7b2ef53b307cb5c53c8d4bcfef 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase):
  
                 self.assertEqual(sanitize_filename(u'123'), u'123')
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
                 self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
                 self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
                 self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
                 self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
@@ -45,14 +45,17 @@ class TestUtil(unittest.TestCase):
  
                 self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
                 self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
  
-               self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
+               self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
                 self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
                 self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
                 self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
  
+               self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b')
+               self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
+
                 forbidden = u'"\0\\/&: \'\t\n'
                 for fc in forbidden:
                         print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
diff --git a/youtube-dl.1 b/youtube-dl.1

index 64120a8d24a3f397108d9433400d8b6e0f2e3a4d..ae303b6727a38007e227ea76660805e1c49f6c4e 100644 (file)
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -59,8 +59,8 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
---restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
+--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
  -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
  -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
  -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index cbf1dd1a722570d5709b6a0dd741d33337201e07..c3e0f78e5f12bb1176a0f1105096e776391a1fff 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -274,7 +274,7 @@ def parseOpts():
                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
         filesystem.add_option('--restrict-filenames',
                         action='store_true', dest='restrictfilenames',
-                       help='Avoid some characters such as "&" and spaces in filenames', default=False)
+                       help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
         filesystem.add_option('-a', '--batch-file',
                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
         filesystem.add_option('-w', '--no-overwrites',
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 1f60d34ae23ad2f36b54072e0b133d3cd2a22149..3339f56ec114fab13b39afeef2fcef60506c8ec5 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False):
                 elif char == ':':
                         return '_-' if restricted else ' -'
                 elif char in '\\/|*<>':
-                       return '-'
+                       return '_'
                 if restricted and (char in '&\'' or char.isspace()):
                         return '_'
+               if restricted and ord(char) > 127:
+                       return '_'
                 return char
  
         result = u''.join(map(replace_insane, s))
-       while '--' in result:
-               result = result.replace('--', '-')
-       return result.strip('-')
+       while '__' in result:
+               result = result.replace('__', '_')
+       result = result.strip('_')
+       if not result:
+               result = '_'
+       return result
  
  def orderedSet(iterable):
         """ Remove all duplicates from the input iterable """
author	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Tue, 27 Nov 2012 11:46:09 +0000 (12:46 +0100)
README.md		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube-dl.1		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history