Rework upload date mechanism after detecting problems in several tests

This commit is contained in:
Ricardo Garcia 2010-11-19 19:31:26 +01:00
parent 05df0c1d4a
commit 138b11f36e
1 changed files with 34 additions and 15 deletions

View File

@ -5,7 +5,6 @@
# Author: Benjamin Johnson # Author: Benjamin Johnson
# License: Public domain code # License: Public domain code
import cookielib import cookielib
import datetime
import htmlentitydefs import htmlentitydefs
import httplib import httplib
import locale import locale
@ -37,6 +36,21 @@ std_headers = {
simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
month_name_to_number = {
'January': '01',
'February': '02',
'March': '03',
'April': '04',
'May': '05',
'June': '06',
'July': '07',
'August': '08',
'September': '09',
'October': '10',
'November': '11',
'December': '12',
}
def preferredencoding(): def preferredencoding():
"""Get preferred encoding. """Get preferred encoding.
@ -899,13 +913,18 @@ class YoutubeIE(InfoExtractor):
upload_date = u'NA' upload_date = u'NA'
mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL) mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
if mobj is not None: if mobj is not None:
upload_date = mobj.group(1).split() try:
format_expressions = ['%d %B %Y', '%B %d, %Y'] if ',' in mobj.group(1):
for expression in format_expressions: # Month Day, Year
try: m, d, y = mobj.group(1).replace(',', '').split()
upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') else:
except: # Day Month Year, we'll suppose
pass d, m, y = mobj.group(1).split()
m = month_name_to_number[m]
d = '%02d' % (long(d))
upload_date = '%s%s%s' % (y, m, d)
except:
upload_date = u'NA'
# description # description
video_description = 'No description available.' video_description = 'No description available.'
@ -961,7 +980,7 @@ class YoutubeIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'), 'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'),
'uploaddate': upload_date, 'upload_date': upload_date,
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1108,7 +1127,7 @@ class MetacafeIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'),
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1197,7 +1216,7 @@ class DailymotionIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'),
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1307,7 +1326,7 @@ class GoogleIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': u'NA', 'uploader': u'NA',
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1389,7 +1408,7 @@ class PhotobucketIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader, 'uploader': video_uploader,
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1544,7 +1563,7 @@ class YahooIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url, 'url': video_url,
'uploader': video_uploader, 'uploader': video_uploader,
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),
@ -1647,7 +1666,7 @@ class GenericIE(InfoExtractor):
'id': video_id.decode('utf-8'), 'id': video_id.decode('utf-8'),
'url': video_url.decode('utf-8'), 'url': video_url.decode('utf-8'),
'uploader': video_uploader, 'uploader': video_uploader,
'uploaddate': u'NA', 'upload_date': u'NA',
'title': video_title, 'title': video_title,
'stitle': simple_title, 'stitle': simple_title,
'ext': video_extension.decode('utf-8'), 'ext': video_extension.decode('utf-8'),