Added '--xattrs' option which writes metadata to the file's extended attributes using...
authorepitron <chris@ill-logic.com>
Thu, 2 Jan 2014 12:47:28 +0000 (07:47 -0500)
committerepitron <chris@ill-logic.com>
Thu, 2 Jan 2014 12:47:28 +0000 (07:47 -0500)
Works on Linux, OSX, and Windows.

README.md
youtube_dl/PostProcessor.py
youtube_dl/__init__.py
youtube_dl/utils.py

index caed9484672d8844890d43d98708f83044e6b3d2..5fa0103dfeae44a44fdf9cecdce3ab53eae12fc9 100644 (file)
--- a/README.md
+++ b/README.md
@@ -189,7 +189,9 @@ which means you can modify it, redistribute it or use it however you like.
                                processed files are overwritten by default
     --embed-subs               embed subtitles in the video (only for mp4
                                videos)
-    --add-metadata             add metadata to the files
+    --add-metadata             write metadata to the video file
+    --xattrs                   write metadata to the video file's xattrs (using
+                               dublin core and xdg standards)
 
 # CONFIGURATION
 
index 69aedf87a44c72060e2af135cd95f6f820e9ab0c..da95f1a87dbe63fc648e061aa5671bb3dba1d740 100644 (file)
@@ -62,6 +62,7 @@ class FFmpegPostProcessorError(PostProcessingError):
 class AudioConversionError(PostProcessingError):
     pass
 
+
 class FFmpegPostProcessor(PostProcessor):
     def __init__(self,downloader=None):
         PostProcessor.__init__(self, downloader)
@@ -107,6 +108,7 @@ class FFmpegPostProcessor(PostProcessor):
             return u'./' + fn
         return fn
 
+
 class FFmpegExtractAudioPP(FFmpegPostProcessor):
     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
         FFmpegPostProcessor.__init__(self, downloader)
@@ -232,6 +234,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         information['filepath'] = new_path
         return self._nopostoverwrites,information
 
+
 class FFmpegVideoConvertor(FFmpegPostProcessor):
     def __init__(self, downloader=None,preferedformat=None):
         super(FFmpegVideoConvertor, self).__init__(downloader)
@@ -509,3 +512,120 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
         os.remove(encodeFilename(filename))
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
         return True, info
+
+
+class XAttrMetadataPP(PostProcessor):
+
+    #
+    # More info about extended attributes for media:
+    #   http://freedesktop.org/wiki/CommonExtendedAttributes/
+    #   http://www.freedesktop.org/wiki/PhreedomDraft/
+    #   http://dublincore.org/documents/usageguide/elements.shtml
+    #
+    # TODO:
+    #  * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
+    #  * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
+    #
+
+    def run(self, info):
+        """ Set extended attributes on downloaded file (if xattr support is found). """
+
+        from .utils import hyphenate_date
+
+        # This mess below finds the best xattr tool for the job and creates a
+        # "write_xattr" function.
+        try:
+            # try the pyxattr module...
+            import xattr
+            def write_xattr(path, key, value):
+                return xattr.setxattr(path, key, value)
+
+        except ImportError:
+
+            if os.name == 'posix':
+                def which(bin):
+                    for dir in os.environ["PATH"].split(":"):
+                        path = os.path.join(dir, bin)
+                        if os.path.exists(path):
+                            return path
+
+                user_has_setfattr = which("setfattr")
+                user_has_xattr    = which("xattr")
+
+                if user_has_setfattr or user_has_xattr:
+
+                    def write_xattr(path, key, value):
+                        import errno
+                        potential_errors = {
+                            # setfattr: /tmp/blah: Operation not supported
+                            "Operation not supported": errno.EOPNOTSUPP,
+                            # setfattr: ~/blah: No such file or directory
+                            # xattr: No such file: ~/blah
+                            "No such file": errno.ENOENT,
+                        }
+
+                        if user_has_setfattr:
+                            cmd = ['setfattr', '-n', key, '-v', value, path]
+                        elif user_has_xattr:
+                            cmd = ['xattr', '-w', key, value, path]
+
+                        try:
+                            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+                        except subprocess.CalledProcessError as e:
+                            errorstr = e.output.strip().decode()
+                            for potential_errorstr, potential_errno in potential_errors.items():
+                                if errorstr.find(potential_errorstr) > -1:
+                                    e = OSError(potential_errno, potential_errorstr)
+                                    e.__cause__ = None
+                                    raise e
+                            raise # Reraise unhandled error
+
+                else:
+                    # On Unix, and can't find pyxattr, setfattr, or xattr.
+                    if sys.platform.startswith('linux'):
+                        self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).")
+                    elif sys.platform == 'darwin':
+                        self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.")
+            else:
+                # Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+                def write_xattr(path, key, value):
+                    assert(key.find(":") < 0)
+                    assert(path.find(":") < 0)
+                    assert(os.path.exists(path))
+
+                    f = open(path+":"+key, "w")
+                    f.write(value)
+                    f.close()
+
+        # Write the metadata to the file's xattrs
+        self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...')
+
+        filename = info['filepath']
+
+        try:
+            xattr_mapping = {
+                'user.xdg.referrer.url':       'webpage_url',
+                # 'user.xdg.comment':            'description',
+                'user.dublincore.title':       'title',
+                'user.dublincore.date':        'upload_date',
+                'user.dublincore.description': 'description',
+                'user.dublincore.contributor': 'uploader',
+                'user.dublincore.format':      'format',
+            }
+
+            for xattrname, infoname in xattr_mapping.items():
+
+                value = info.get(infoname)
+
+                if value:
+                    if infoname == "upload_date":
+                        value = hyphenate_date(value)
+
+                    write_xattr(filename, xattrname, value)
+
+            return True, info
+
+        except OSError:
+            self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
+            return False, info
+
index 63437301b6fb43f360856646184f7161e2d76c3b..03f98f5040fc4dbdc95a783404d8b1a4465d2495 100644 (file)
@@ -38,6 +38,7 @@ __authors__  = (
     'Takuya Tsuchida',
     'Sergey M.',
     'Michael Orlitzky',
+    'Chris Gahan',
 )
 
 __license__ = 'Public Domain'
@@ -78,6 +79,7 @@ from .PostProcessor import (
     FFmpegVideoConvertor,
     FFmpegExtractAudioPP,
     FFmpegEmbedSubtitlePP,
+    XAttrMetadataPP,
 )
 
 
@@ -412,7 +414,9 @@ def parseOpts(overrideArguments=None):
     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
             help='embed subtitles in the video (only for mp4 videos)')
     postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
-            help='add metadata to the files')
+            help='write metadata to the video file')
+    postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
+            help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
 
 
     parser.add_option_group(general)
@@ -709,6 +713,8 @@ def _real_main(argv=None):
             ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
         if opts.embedsubtitles:
             ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
+        if opts.xattrs:
+            ydl.add_post_processor(XAttrMetadataPP())
 
         # Update version
         if opts.update_self:
index 2e48f187e665dad81caa663efdb9d0c33f088936..20ebea38cc52da906b034609d2866e2681344532 100644 (file)
@@ -809,6 +809,15 @@ def date_from_str(date_str):
         return today + delta
     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
     
+def hyphenate_date(date_str):
+    """
+    Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
+    match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
+    if match is not None:
+        return '-'.join(match.groups())
+    else:
+        return date_str
+
 class DateRange(object):
     """Represents a time interval between two dates"""
     def __init__(self, start=None, end=None):