From 001c30a310cf6ce44d98212c7664d70f22ad7886 Mon Sep 17 00:00:00 2001 From: evilhero Date: Tue, 5 Aug 2014 15:37:36 -0400 Subject: [PATCH] FIX:(#778) Fix for scanning filenames in which a series contains numerics other than a year - also some added handling for filenames in the format of --- mylar/filechecker.py | 84 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/mylar/filechecker.py b/mylar/filechecker.py index 1882949f..5ec9ed6e 100755 --- a/mylar/filechecker.py +++ b/mylar/filechecker.py @@ -135,6 +135,9 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non bracketsinseries = 'False' for i in watchcomic.split(): + if i.isdigit(): + numberinseries = 'True' + if ('20' in i or '19' in i): if i.isdigit(): numberinseries = 'True' @@ -186,11 +189,12 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non #if the series has digits this f's it up. if numberinseries == 'True' or decimalinseries == 'True': #we need to remove the series from the subname and then search the remainder. - watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.]', '', watchcomic) #remove spec chars for watchcomic match. + watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', watchcomic) #remove spec chars for watchcomic match. logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname) subthis = re.sub('.cbr', '', subname) subthis = re.sub('.cbz', '', subthis) - subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.]', '', subthis) + subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', subthis) + subthis = re.sub('\s+',' ', subthis) logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis) #we need to make sure the file is part of the correct series or else will match falsely if watchname not in subthis: @@ -199,8 +203,21 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non subthis = subthis[len(watchname):] #remove watchcomic #we need to now check the remainder of the string for digits assuming it's a possible year logger.fdebug('[FILECHECKER] new subname: ' + str(subthis)) - subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subthis) - subname = watchcomic + subname + if subthis.startswith('('): + # if it startswith a bracket, then it's probably a year - let's check. + for i in subthis.split(): + tmpi = re.sub('[\(\)]','',i).strip() + if tmpi.isdigit(): + if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4: + logger.fdebug('[FILECHECKER] year detected: ' + str(tmpi)) + subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis) + subname = re.sub('\(\)', '', subname).strip() + subname = watchcomic + ' ' + subname + logger.fdebug('[FILECHECKER] new subname reversed: ' + str(subname)) + break + else: + subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\2 (\\3)', subthis) + subnm = re.findall('[^()]+', subname) else: subit = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\3 (\\2)', subname).replace('( )', '') @@ -219,21 +236,39 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non else: if numberinseries == 'True' or decimalinseries == 'True': #we need to remove the series from the subname and then search the remainder. - watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.]', '', watchcomic) #remove spec chars for watchcomic match. + watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', watchcomic) #remove spec chars for watchcomic match. logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname) subthis = re.sub('.cbr', '', subname) subthis = re.sub('.cbz', '', subthis) - subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.]', '', subthis) + subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', subthis) + subthis = re.sub('\s+',' ', subthis) logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis) #we need to make sure the file is part of the correct series or else will match falsely if watchname not in subthis: logger.fdebug('[FILECHECKER] this is a false match. Ignoring this result.') continue - subthis = subthis[len(watchname):] #remove watchcomic + subthis = subthis[len(watchname):].strip() #remove watchcomic #we need to now check the remainder of the string for digits assuming it's a possible year logger.fdebug('[FILECHECKER] new subname: ' + str(subthis)) - subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subthis) - subname = watchname + subname + if subthis.startswith('('): + # if it startswith a bracket, then it's probably a year and the format is incorrect to continue - let's check. + for i in subthis.split(): + tmpi = re.sub('[\(\)]','',i).strip() + if tmpi.isdigit(): + if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4: + logger.fdebug('[FILECHECKER] Year detected: ' + str(tmpi)) + subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis) + subname = re.sub('\(\)', '', subname).strip() + logger.fdebug('[FILECHECKER] Flipping the issue with the year: ' + str(subname)) + break + else: + subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis) + subname = re.sub('\(\)', '', subname).strip() + + subname = watchname + ' ' + subname + subname = re.sub('\s+', ' ', subname).strip() + + logger.fdebug('[FILECHECKER] New subname reversed: ' + str(subname)) subnm = re.findall('[^()]+', subname) @@ -274,13 +309,32 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non #If the Year comes before the Issue # the subname is passed with no Issue number. #This logic checks for numbers before the extension in the format of 1 01 001 #and adds to the subname. (Cases where comic name is $Series_$Year_$Issue) - if len(subnm) > 1: - if (re.search('(19\d{2}|20\d{2})',subnm[1]) is not None): - logger.fdebug('[FILECHECKER] subnm0: ' + str(subnm[0])) - logger.fdebug('[FILECHECKER] subnm1: ' + str(subnm[1])) + +# if len(subnm) > 1: +# if (re.search('(19\d{2}|20\d{2})',subnm[1]) is not None): +# logger.info('subnm[1]: ' + str(subnm[1])) +# for i in subnm: +# tmpi = i.strip() +# if tmpi.isdigit(): +# if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4: +# logger.info('[FILECHECKER] year detected: ' + str(tmpi)) +# #strip out all the brackets in the subnm[2] if it exists so we're left with just the issue # in most cases +# subremoved = re.findall('[^()]+', subnm[2]).strip() +# if len(subremoved) > 5: +# logger.info('[FILECHECKER] something is wrong with the parsing - better report the issue on github.') +# break +# subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 ' + str(subremoved) + ' (\\2)', subname) +# subname = re.sub('\(\)', '', subname).strip() +# logger.info('[FILECHECKER] THE new subname reversed: ' + str(subname)) +# break +# else: +# subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\2 (\\3)', subname) + +# subnm = re.findall('[^()]+', subname) # we need to regenerate this here. +# logger.fdebug('[FILECHECKER] subnm0: ' + str(subnm[0])) +# logger.fdebug('[FILECHECKER] subnm1: ' + str(subnm[1])) # logger.fdebug('subnm2: ' + str(subnm[2])) - subname = str(subnm[0]).lstrip() + ' (' + str(subnm[1]).strip() + ') ' - subnm = re.findall('[^()]+', subname) # we need to regenerate this here. +# subname = str(subnm[0]).lstrip() + ' (' + str(subnm[1]).strip() + ') ' subname = subnm[0]