FIX: Fix for some import errors related to cbz files containing no metadata, FIX: Fixed some grouping errors on the import results page, FIX: Added in an exception check during the metadata process to gracefully capture errors

This commit is contained in:
evilhero 2017-03-23 11:54:26 -04:00
parent 4359de96a8
commit 3c36d33eb7
2 changed files with 253 additions and 220 deletions

View File

@ -1370,93 +1370,114 @@ def filesafe(comic):
return comicname_filesafe
def IssueDetails(filelocation, IssueID=None):
def IssueDetails(filelocation, IssueID=None, justinfo=False):
import zipfile, logger
from xml.dom.minidom import parseString
dstlocation = os.path.join(mylar.CACHE_DIR, 'temp.zip')
issuedetails = []
if filelocation.endswith('.cbz'):
logger.fdebug('CBZ file detected. Checking for .xml within file')
shutil.copy(filelocation, dstlocation)
else:
logger.fdebug('filename is not a cbz : ' + filelocation)
return
cover = "notfound"
issuetag = None
pic_extensions = ('.jpg','.png','.webp')
modtime = os.path.getmtime(dstlocation)
low_infile = 999999
try:
with zipfile.ZipFile(dstlocation, 'r') as inzipfile:
for infile in sorted(inzipfile.namelist()):
tmp_infile = re.sub("[^0-9]","", infile).strip()
if tmp_infile == '':
pass
elif int(tmp_infile) < int(low_infile):
low_infile = tmp_infile
low_infile_name = infile
if infile == 'ComicInfo.xml':
logger.fdebug('Extracting ComicInfo.xml to display.')
dst = os.path.join(mylar.CACHE_DIR, 'ComicInfo.xml')
data = inzipfile.read(infile)
#print str(data)
issuetag = 'xml'
#looks for the first page and assumes it's the cover. (Alternate covers handled later on)
elif any(['000.' in infile, '00.' in infile]) and infile.endswith(pic_extensions) and cover == "notfound":
logger.fdebug('Extracting primary image ' + infile + ' as coverfile for display.')
if justinfo is False:
dstlocation = os.path.join(mylar.CACHE_DIR, 'temp.zip')
if filelocation.endswith('.cbz'):
logger.fdebug('CBZ file detected. Checking for .xml within file')
shutil.copy(filelocation, dstlocation)
else:
logger.fdebug('filename is not a cbz : ' + filelocation)
return
cover = "notfound"
pic_extensions = ('.jpg','.png','.webp')
modtime = os.path.getmtime(dstlocation)
low_infile = 999999
try:
with zipfile.ZipFile(dstlocation, 'r') as inzipfile:
for infile in sorted(inzipfile.namelist()):
tmp_infile = re.sub("[^0-9]","", infile).strip()
if tmp_infile == '':
pass
elif int(tmp_infile) < int(low_infile):
low_infile = tmp_infile
low_infile_name = infile
if infile == 'ComicInfo.xml':
logger.fdebug('Extracting ComicInfo.xml to display.')
dst = os.path.join(mylar.CACHE_DIR, 'ComicInfo.xml')
data = inzipfile.read(infile)
#print str(data)
issuetag = 'xml'
#looks for the first page and assumes it's the cover. (Alternate covers handled later on)
elif any(['000.' in infile, '00.' in infile]) and infile.endswith(pic_extensions) and cover == "notfound":
logger.fdebug('Extracting primary image ' + infile + ' as coverfile for display.')
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.close
cover = "found"
elif any(['00a' in infile, '00b' in infile, '00c' in infile, '00d' in infile, '00e' in infile]) and infile.endswith(pic_extensions) and cover == "notfound":
logger.fdebug('Found Alternate cover - ' + infile + ' . Extracting.')
altlist = ('00a', '00b', '00c', '00d', '00e')
for alt in altlist:
if alt in infile:
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.close
cover = "found"
break
elif any(['001.jpg' in infile, '001.png' in infile, '001.webp' in infile, '01.jpg' in infile, '01.png' in infile, '01.webp' in infile]) and cover == "notfound":
logger.fdebug('Extracting primary image ' + infile + ' as coverfile for display.')
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.close
cover = "found"
if cover != "found":
logger.fdebug('Invalid naming sequence for jpgs discovered. Attempting to find the lowest sequence and will use as cover (it might not work). Currently : ' + str(low_infile))
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.write(inzipfile.read(low_infile_name))
local_file.close
cover = "found"
elif any(['00a' in infile, '00b' in infile, '00c' in infile, '00d' in infile, '00e' in infile]) and infile.endswith(pic_extensions) and cover == "notfound":
logger.fdebug('Found Alternate cover - ' + infile + ' . Extracting.')
altlist = ('00a', '00b', '00c', '00d', '00e')
for alt in altlist:
if alt in infile:
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.close
cover = "found"
break
cover = "found"
elif any(['001.jpg' in infile, '001.png' in infile, '001.webp' in infile, '01.jpg' in infile, '01.png' in infile, '01.webp' in infile]) and cover == "notfound":
logger.fdebug('Extracting primary image ' + infile + ' as coverfile for display.')
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(infile))
local_file.close
cover = "found"
except:
logger.info('ERROR. Unable to properly retrieve the cover for displaying. It\'s probably best to re-tag this file.')
return
if cover != "found":
logger.fdebug('Invalid naming sequence for jpgs discovered. Attempting to find the lowest sequence and will use as cover (it might not work). Currently : ' + str(low_infile))
local_file = open(os.path.join(mylar.CACHE_DIR, 'temp.jpg'), "wb")
local_file.write(inzipfile.read(low_infile_name))
local_file.close
cover = "found"
ComicImage = os.path.join('cache', 'temp.jpg?' +str(modtime))
IssueImage = replacetheslash(ComicImage)
except:
logger.info('ERROR. Unable to properly retrieve the cover for displaying. It\'s probably best to re-tag this file.')
return
ComicImage = os.path.join('cache', 'temp.jpg?' +str(modtime))
IssueImage = replacetheslash(ComicImage)
else:
IssueImage = "None"
try:
with zipfile.ZipFile(filelocation, 'r') as inzipfile:
for infile in sorted(inzipfile.namelist()):
if infile == 'ComicInfo.xml':
logger.fdebug('Found ComicInfo.xml - now retrieving information.')
data = inzipfile.read(infile)
issuetag = 'xml'
break
except:
logger.info('ERROR. Unable to properly retrieve the cover for displaying. It\'s probably best to re-tag this file.')
return
if issuetag is None:
import subprocess
from subprocess import CalledProcessError, check_output
unzip_cmd = "/usr/bin/unzip"
data = None
try:
#unzip -z will extract the zip comment field.
data = subprocess.check_output([unzip_cmd, '-z', dstlocation])
# return data is encoded in bytes, not unicode. Need to figure out how to run check_output returning utf-8
issuetag = 'comment'
except CalledProcessError as e:
dz = zipfile.ZipFile(filelocation, 'r')
data = dz.comment
except:
logger.warn('Unable to extract comment field from zipfile.')
return
else:
if data:
issuetag = 'comment'
else:
logger.warn('No metadata available in zipfile comment field.')
return
logger.info('Tag returned as being: ' + str(issuetag))
#logger.info('data:' + str(data))
@ -1549,28 +1570,30 @@ def IssueDetails(filelocation, IssueID=None):
except:
pagecount = 0
i = 0
#not used atm.
#to validate a front cover if it's tagged as one within the zip (some do this)
#i = 0
#try:
# pageinfo = result.getElementsByTagName('Page')[0].attributes
# if pageinfo: pageinfo_test == True
#except:
# pageinfo_test = False
try:
pageinfo = result.getElementsByTagName('Page')[0].attributes
if pageinfo: pageinfo_test == True
except:
pageinfo_test = False
#if pageinfo_test:
# while (i < int(pagecount)):
# pageinfo = result.getElementsByTagName('Page')[i].attributes
# attrib = pageinfo.getNamedItem('Image')
# #logger.fdebug('Frontcover validated as being image #: ' + str(attrib.value))
# att = pageinfo.getNamedItem('Type')
# #logger.fdebug('pageinfo: ' + str(pageinfo))
# if att.value == 'FrontCover':
# #logger.fdebug('FrontCover detected. Extracting.')
# break
# i+=1
if pageinfo_test:
while (i < int(pagecount)):
pageinfo = result.getElementsByTagName('Page')[i].attributes
attrib = pageinfo.getNamedItem('Image')
#logger.fdebug('Frontcover validated as being image #: ' + str(attrib.value))
att = pageinfo.getNamedItem('Type')
logger.fdebug('pageinfo: ' + str(pageinfo))
if att.value == 'FrontCover':
#logger.fdebug('FrontCover detected. Extracting.')
break
i+=1
elif issuetag == 'comment':
logger.info('CBL Tagging.')
stripline = 'Archive: ' + dstlocation
stripline = 'Archive: ' + filelocation
data = re.sub(stripline, '', data.encode("utf-8")).strip()
if data is None or data == '':
return

View File

@ -271,146 +271,156 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None,
if i['ComicLocation'].endswith('.cbz'):
logger.fdebug('[IMPORT-CBZ] Metatagging checking enabled.')
logger.info('[IMPORT-CBZ} Attempting to read tags present in filename: ' + i['ComicLocation'])
issueinfo = helpers.IssueDetails(i['ComicLocation'])
logger.info('issueinfo: ' + str(issueinfo))
if issueinfo is None:
logger.fdebug('[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.')
try:
issueinfo = helpers.IssueDetails(i['ComicLocation'], justinfo=True)
except:
logger.fdebug('[IMPORT-CBZ] Unable to retrieve metadata - possibly doesn\'t exist. Ignoring meta-retrieval')
pass
else:
issuenotes_id = None
logger.info('[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.')
comicname = issueinfo[0]['series']
if comicname is not None:
logger.fdebug('[IMPORT-CBZ] Series Name: ' + comicname)
as_d = filechecker.FileChecker()
as_dyninfo = as_d.dynamic_replace(comicname)
logger.fdebug('Dynamic-ComicName: ' + as_dyninfo['mod_seriesname'])
else:
logger.fdebug('[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.')
issueinfo = None
issue_number = None
logger.info('issueinfo: ' + str(issueinfo))
if issueinfo is not None:
try:
issueyear = issueinfo[0]['year']
except:
issueyear = None
#if the issue number is a non-numeric unicode string, this will screw up along with impID
issue_number = issueinfo[0]['issue_number']
if issue_number is not None:
logger.fdebug('[IMPORT-CBZ] Issue Number: ' + issue_number)
else:
issue_number = i['parsed']['issue_number']
if 'annual' in comicname.lower() or 'annual' in comfilename.lower():
if issue_number is None or issue_number == 'None':
logger.info('Annual detected with no issue number present within metadata. Assuming year as issue.')
try:
issue_number = 'Annual ' + str(issueyear)
except:
issue_number = 'Annual ' + i['parsed']['issue_year']
else:
logger.info('Annual detected with issue number present within metadata.')
if 'annual' not in issue_number.lower():
issue_number = 'Annual ' + issue_number
mod_series = re.sub('annual', '', comicname, flags=re.I).strip()
else:
mod_series = comicname
logger.fdebug('issue number SHOULD Be: ' + issue_number)
try:
issuetitle = issueinfo[0]['title']
except:
issuetitle = None
try:
issueyear = issueinfo[0]['year']
except:
issueyear = None
try:
issuevolume = str(issueinfo[0]['volume'])
if all([issuevolume is not None, issuevolume != 'None']) and not issuevolume.lower().startswith('v'):
issuevolume = 'v' + str(issuevolume)
logger.fdebug('[TRY]issue volume is: ' + str(issuevolume))
except:
logger.fdebug('[EXCEPT]issue volume is: ' + str(issuevolume))
issuevolume = None
if any([comicname is None, comicname == 'None', issue_number is None, issue_number == 'None']):
logger.fdebug('[IMPORT-CBZ] Improperly tagged file as the metatagging is invalid. Ignoring meta and just parsing the filename.')
issueinfo = None
if issueinfo is None:
logger.fdebug('[IMPORT-CBZ] No valid metadata contained within filename. Dropping down to parsing the filename itself.')
pass
else:
# if used by ComicTagger, Notes field will have the IssueID.
issuenotes = issueinfo[0]['notes']
logger.fdebug('[IMPORT-CBZ] Notes: ' + issuenotes)
if issuenotes is not None and issuenotes != 'None':
if 'Issue ID' in issuenotes:
st_find = issuenotes.find('Issue ID')
tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
if tmp_issuenotes_id.isdigit():
issuenotes_id = tmp_issuenotes_id
logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
elif 'CVDB' in issuenotes:
st_find = issuenotes.find('CVDB')
tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
if tmp_issuenotes_id.isdigit():
issuenotes_id = tmp_issuenotes_id
logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
else:
logger.fdebug('[IMPORT-CBZ] Unable to retrieve IssueID from meta-tagging. If there is other metadata present I will use that.')
issuenotes_id = None
logger.info('[IMPORT-CBZ] Successfully retrieved some tags. Lets see what I can figure out.')
comicname = issueinfo[0]['series']
if comicname is not None:
logger.fdebug('[IMPORT-CBZ] Series Name: ' + comicname)
as_d = filechecker.FileChecker()
as_dyninfo = as_d.dynamic_replace(comicname)
logger.fdebug('Dynamic-ComicName: ' + as_dyninfo['mod_seriesname'])
else:
logger.fdebug('[IMPORT-CBZ] No series name found within metadata. This is bunk - dropping down to file parsing for usable information.')
issueinfo = None
issue_number = None
logger.fdebug('[IMPORT-CBZ] Adding ' + comicname + ' to the import-queue!')
#impid = comicname + '-' + str(issueyear) + '-' + str(issue_number) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss)
impid = str(random.randint(1000000,99999999))
logger.fdebug('[IMPORT-CBZ] impid: ' + str(impid))
#make sure we only add in those issueid's which don't already have a comicid attached via the cvinfo scan above (this is for reverse-lookup of issueids)
issuepopulated = False
if cvinfo_CID is None:
if issuenotes_id is None:
logger.info('[IMPORT-CBZ] No ComicID detected where it should be. Bypassing this metadata entry and going the parsing route [' + comfilename + ']')
else:
#we need to store the impid here as well so we can look it up.
issueid_list.append({'issueid': issuenotes_id,
'importinfo': {'impid': impid,
'comicid': None,
'comicname': comicname,
'dynamicname': as_dyninfo['mod_seriesname'],
'comicyear': issueyear,
'issuenumber': issue_number,
'volume': issuevolume,
'comfilename': comfilename,
'comlocation': comlocation.decode(mylar.SYS_ENCODING)}
})
mylar.IMPORT_CID_COUNT +=1
issuepopulated = True
if issueinfo is not None:
try:
issueyear = issueinfo[0]['year']
except:
issueyear = None
if issuepopulated == False:
if cvscanned_loc == os.path.dirname(comlocation):
cv_cid = cvinfo_CID
logger.fdebug('[IMPORT-CBZ] CVINFO_COMICID attached : ' + str(cv_cid))
#if the issue number is a non-numeric unicode string, this will screw up along with impID
issue_number = issueinfo[0]['issue_number']
if issue_number is not None:
logger.fdebug('[IMPORT-CBZ] Issue Number: ' + issue_number)
else:
cv_cid = None
import_by_comicids.append({
"impid": impid,
"comicid": cv_cid,
"watchmatch": None,
"displayname": mod_series,
"comicname": comicname,
"dynamicname": as_dyninfo['mod_seriesname'],
"comicyear": issueyear,
"issuenumber": issue_number,
"volume": issuevolume,
"issueid": issuenotes_id,
"comfilename": comfilename,
"comlocation": comlocation.decode(mylar.SYS_ENCODING)
})
issue_number = i['parsed']['issue_number']
mylar.IMPORT_CID_COUNT +=1
else:
pass
#logger.fdebug(i['ComicFilename'] + ' is not in a metatagged format (cbz). Bypassing reading of the metatags')
if 'annual' in comicname.lower() or 'annual' in comfilename.lower():
if issue_number is None or issue_number == 'None':
logger.info('Annual detected with no issue number present within metadata. Assuming year as issue.')
try:
issue_number = 'Annual ' + str(issueyear)
except:
issue_number = 'Annual ' + i['parsed']['issue_year']
else:
logger.info('Annual detected with issue number present within metadata.')
if 'annual' not in issue_number.lower():
issue_number = 'Annual ' + issue_number
mod_series = re.sub('annual', '', comicname, flags=re.I).strip()
else:
mod_series = comicname
logger.fdebug('issue number SHOULD Be: ' + issue_number)
try:
issuetitle = issueinfo[0]['title']
except:
issuetitle = None
try:
issueyear = issueinfo[0]['year']
except:
issueyear = None
try:
issuevolume = str(issueinfo[0]['volume'])
if all([issuevolume is not None, issuevolume != 'None', not issuevolume.lower().startswith('v')]):
issuevolume = 'v' + str(issuevolume)
if any([issuevolume is None, issuevolume == 'None']):
logger.info('EXCEPT] issue volume is NONE')
issuevolume = None
else:
logger.fdebug('[TRY]issue volume is: ' + str(issuevolume))
except:
logger.fdebug('[EXCEPT]issue volume is: ' + str(issuevolume))
issuevolume = None
if any([comicname is None, comicname == 'None', issue_number is None, issue_number == 'None']):
logger.fdebug('[IMPORT-CBZ] Improperly tagged file as the metatagging is invalid. Ignoring meta and just parsing the filename.')
issueinfo = None
pass
else:
# if used by ComicTagger, Notes field will have the IssueID.
issuenotes = issueinfo[0]['notes']
logger.fdebug('[IMPORT-CBZ] Notes: ' + issuenotes)
if issuenotes is not None and issuenotes != 'None':
if 'Issue ID' in issuenotes:
st_find = issuenotes.find('Issue ID')
tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
if tmp_issuenotes_id.isdigit():
issuenotes_id = tmp_issuenotes_id
logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
elif 'CVDB' in issuenotes:
st_find = issuenotes.find('CVDB')
tmp_issuenotes_id = re.sub("[^0-9]", " ", issuenotes[st_find:]).strip()
if tmp_issuenotes_id.isdigit():
issuenotes_id = tmp_issuenotes_id
logger.fdebug('[IMPORT-CBZ] Successfully retrieved CV IssueID for ' + comicname + ' #' + issue_number + ' [' + str(issuenotes_id) + ']')
else:
logger.fdebug('[IMPORT-CBZ] Unable to retrieve IssueID from meta-tagging. If there is other metadata present I will use that.')
logger.fdebug('[IMPORT-CBZ] Adding ' + comicname + ' to the import-queue!')
#impid = comicname + '-' + str(issueyear) + '-' + str(issue_number) #com_NAME + "-" + str(result_comyear) + "-" + str(comiss)
impid = str(random.randint(1000000,99999999))
logger.fdebug('[IMPORT-CBZ] impid: ' + str(impid))
#make sure we only add in those issueid's which don't already have a comicid attached via the cvinfo scan above (this is for reverse-lookup of issueids)
issuepopulated = False
if cvinfo_CID is None:
if issuenotes_id is None:
logger.info('[IMPORT-CBZ] No ComicID detected where it should be. Bypassing this metadata entry and going the parsing route [' + comfilename + ']')
else:
#we need to store the impid here as well so we can look it up.
issueid_list.append({'issueid': issuenotes_id,
'importinfo': {'impid': impid,
'comicid': None,
'comicname': comicname,
'dynamicname': as_dyninfo['mod_seriesname'],
'comicyear': issueyear,
'issuenumber': issue_number,
'volume': issuevolume,
'comfilename': comfilename,
'comlocation': comlocation.decode(mylar.SYS_ENCODING)}
})
mylar.IMPORT_CID_COUNT +=1
issuepopulated = True
if issuepopulated == False:
if cvscanned_loc == os.path.dirname(comlocation):
cv_cid = cvinfo_CID
logger.fdebug('[IMPORT-CBZ] CVINFO_COMICID attached : ' + str(cv_cid))
else:
cv_cid = None
import_by_comicids.append({
"impid": impid,
"comicid": cv_cid,
"watchmatch": None,
"displayname": mod_series,
"comicname": comicname,
"dynamicname": as_dyninfo['mod_seriesname'],
"comicyear": issueyear,
"issuenumber": issue_number,
"volume": issuevolume,
"issueid": issuenotes_id,
"comfilename": comfilename,
"comlocation": comlocation.decode(mylar.SYS_ENCODING)
})
mylar.IMPORT_CID_COUNT +=1
else:
pass
#logger.fdebug(i['ComicFilename'] + ' is not in a metatagged format (cbz). Bypassing reading of the metatags')
if issueinfo is None:
if i['parsedinfo']['issue_number'] is None: