Rework upload date mechanism after detecting problems in several tests
authorRicardo Garcia <sarbalap+freshmeat@gmail.com>
Fri, 19 Nov 2010 18:31:26 +0000 (19:31 +0100)
committerRicardo Garcia <sarbalap+freshmeat@gmail.com>
Fri, 19 Nov 2010 18:31:26 +0000 (19:31 +0100)
youtube-dl

index e164d5c8caa5a287eadcf02d1cc6fdbcc55dca80..3d20a9d6d5fb279e9ca9ce366609521bdb024608 100755 (executable)
@@ -5,7 +5,6 @@
 # Author: Benjamin Johnson
 # License: Public domain code
 import cookielib
-import datetime
 import htmlentitydefs
 import httplib
 import locale
@@ -37,6 +36,21 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
+month_name_to_number = {
+       'January':      '01',
+       'February':     '02',
+       'March':        '03',
+       'April':        '04',
+       'May':          '05',
+       'June':         '06',
+       'July':         '07',
+       'August':       '08',
+       'September':    '09',
+       'October':      '10',
+       'November':     '11',
+       'December':     '12',
+}
+
 def preferredencoding():
        """Get preferred encoding.
 
@@ -899,13 +913,18 @@ class YoutubeIE(InfoExtractor):
                upload_date = u'NA'
                mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
                if mobj is not None:
-                       upload_date = mobj.group(1).split()
-                       format_expressions = ['%d %B %Y', '%B %d, %Y']
-                       for expression in format_expressions:
-                               try:
-                                       upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
-                               except:
-                                       pass
+                       try:
+                               if ',' in mobj.group(1):
+                                       # Month Day, Year
+                                       m, d, y = mobj.group(1).replace(',', '').split()
+                               else:
+                                       # Day Month Year, we'll suppose
+                                       d, m, y = mobj.group(1).split()
+                               m = month_name_to_number[m]
+                               d = '%02d' % (long(d))
+                               upload_date = '%s%s%s' % (y, m, d)
+                       except:
+                               upload_date = u'NA'
 
                # description
                video_description = 'No description available.'
@@ -961,7 +980,7 @@ class YoutubeIE(InfoExtractor):
                                        'id':           video_id.decode('utf-8'),
                                        'url':          video_real_url.decode('utf-8'),
                                        'uploader':     video_uploader.decode('utf-8'),
-                                       'uploaddate':   upload_date,
+                                       'upload_date':  upload_date,
                                        'title':        video_title,
                                        'stitle':       simple_title,
                                        'ext':          video_extension.decode('utf-8'),
@@ -1108,7 +1127,7 @@ class MetacafeIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url.decode('utf-8'),
                                'uploader':     video_uploader.decode('utf-8'),
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),
@@ -1197,7 +1216,7 @@ class DailymotionIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url.decode('utf-8'),
                                'uploader':     video_uploader.decode('utf-8'),
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),
@@ -1307,7 +1326,7 @@ class GoogleIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url.decode('utf-8'),
                                'uploader':     u'NA',
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),
@@ -1389,7 +1408,7 @@ class PhotobucketIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url.decode('utf-8'),
                                'uploader':     video_uploader,
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),
@@ -1544,7 +1563,7 @@ class YahooIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url,
                                'uploader':     video_uploader,
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),
@@ -1647,7 +1666,7 @@ class GenericIE(InfoExtractor):
                                'id':           video_id.decode('utf-8'),
                                'url':          video_url.decode('utf-8'),
                                'uploader':     video_uploader,
-                               'uploaddate':   u'NA',
+                               'upload_date':  u'NA',
                                'title':        video_title,
                                'stitle':       simple_title,
                                'ext':          video_extension.decode('utf-8'),