# Author: Benjamin Johnson
# License: Public domain code
import cookielib
+import datetime
import htmlentitydefs
import httplib
import locale
simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
-month_name_to_number = {
- 'January': '01',
- 'February': '02',
- 'March': '03',
- 'April': '04',
- 'May': '05',
- 'June': '06',
- 'July': '07',
- 'August': '08',
- 'September': '09',
- 'October': '10',
- 'November': '11',
- 'December': '12',
-}
-
def preferredencoding():
"""Get preferred encoding.
upload_date = u'NA'
mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None:
- try:
- if ',' in mobj.group(1):
- # Month Day, Year
- m, d, y = mobj.group(1).replace(',', '').split()
- else:
- # Day Month Year, we'll suppose
- d, m, y = mobj.group(1).split()
- m = month_name_to_number[m]
- d = '%02d' % (long(d))
- upload_date = '%s%s%s' % (y, m, d)
- except:
- upload_date = u'NA'
+ upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
+ format_expressions = ['%d %B %Y', '%B %d %Y']
+ for expression in format_expressions:
+ try:
+ upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
+ except:
+ pass
# description
video_description = 'No description available.'