video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
# upload date
- upload_date = u'NA'
+ upload_date = None
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
return
video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
- video_uploader = u'NA'
+ video_uploader = None
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None:
# lookin for official user
else:
video_uploader = mobj.group(1)
- video_upload_date = u'NA'
+ video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
if mobj is not None:
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
return [{
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
'id': video_id.decode('utf-8'),
'url': video_url,
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'thumbnail': video_thumbnail.decode('utf-8'),
else: video_description = ''
# Extract upload date
- video_upload_date = u'NA'
+ video_upload_date = None
mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
if mobj is not None:
video_upload_date = mobj.group(1)
'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'),
'uploader': video_uploader,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': video_extension.decode('utf-8'),
}]
return [{
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': file_title,
'ext': file_extension.decode('utf-8'),
}]
video_thumbnail = video_info['thumbnail']
# upload date
- upload_date = u'NA'
+ upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
info = {
'id': title,
'url': url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': title,
'ext': ext,
'urlhandle': urlh
return [{
'id': video_id,
'url': video_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': u'flv',
}]
'id': videoId,
'url': videoUrl,
'uploader': showName,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': showName,
'ext': 'flv',
'thumbnail': imgUrl,
info = {
'id': video_id,
'internal_id': internal_video_id,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_extraction(video_id)
info = {
'id': video_id,
'url': video_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
description = mobj.group(1)
# upload date
- upload_date = u'NA'
+ upload_date = None
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
if mobj:
try:
info = {
'id': video_id,
'url': video_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': extension, # Extension is always(?) mp4, but seems to be flv
'thumbnail': None,
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
'uploader': uploader.decode('utf-8'),
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': json_data['name'],
'ext': file_url.split('.')[-1].decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
video = mobj.group('video')
info = {
'id': course + '_' + video,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_extraction(info['id'])
info = {
'id': course,
'type': 'playlist',
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_download_webpage(info['id'])
info = {
'id': 'Stanford OpenClassroom',
'type': 'playlist',
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
}
self.report_download_webpage(info['id'])
'id': video_id,
'url': video_url,
'uploader': performer,
- 'upload_date': u'NA',
+ 'upload_date': None,
'title': video_title,
'ext': ext,
'format': format,
info = {
'id': '%s_part%02d' % (video_id, index),
'url': download_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': ext,
}
return [{
'id': video_id,
'url': video_url,
- 'uploader': u'NA',
- 'upload_date': u'NA',
+ 'uploader': None,
+ 'upload_date': None,
'title': video_title,
'ext': 'flv',
'thumbnail': video_thumbnail,
return
# Extract update date
- upload_date = u'NA'
+ upload_date = None
pattern = 'title="Timestamp">(.*?)</a>'
mobj = re.search(pattern, webpage)
if mobj:
self.report_date(upload_date)
# Extract uploader
- uploader = u'NA'
+ uploader = None
pattern = r'rel\="author".*?>(.*?)</a>'
mobj = re.search(pattern, webpage)
if mobj: