mylar/mylar/filechecker.py

731 lines
37 KiB
Python
Executable File

#/usr/bin/env python
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
import os
import os.path
import pprint
import subprocess
import re
import logger
import mylar
import sys
def file2comicmatch(watchmatch):
#print ("match: " + str(watchmatch))
pass
def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=None):
# use AlternateSearch to check for filenames that follow that naming pattern
# ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
# checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
# we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()
logger.fdebug('[FILECHECKER] comic: ' + watchcomic)
basedir = dir
logger.fdebug('[FILECHECKER] Looking in: ' + dir)
watchmatch = {}
comiclist = []
comiccnt = 0
not_these = ['#',
',',
'\/',
':',
'\;',
'.',
'-',
'!',
'\$',
'\%',
'\+',
'\'',
'\?',
'\@']
issue_exceptions = ['AU',
'.INH',
'.NOW',
'AI',
'A',
'B',
'C']
extensions = ('.cbr', '.cbz')
for item in os.listdir(basedir):
if item == 'cover.jpg' or item == 'cvinfo': continue
if not item.endswith(extensions):
logger.fdebug('[FILECHECKER] filename not a valid cbr/cbz - ignoring: ' + item)
continue
#print item
#subname = os.path.join(basedir, item)
subname = item
#versioning - remove it
subsplit = subname.replace('_', ' ').split()
volrem = None
for subit in subsplit:
if subit[0].lower() == 'v':
vfull = 0
if subit[1:].isdigit():
#if in format v1, v2009 etc...
if len(subit) > 3:
# if it's greater than 3 in length, then the format is Vyyyy
vfull = 1 # add on 1 character length to account for extra space
subname = re.sub(subit, '', subname)
volrem = subit
elif subit.lower()[:3] == 'vol':
#if in format vol.2013 etc
#because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
logger.fdebug('[FILECHECKER] volume indicator detected as version #:' + str(subit))
subname = re.sub(subit, '', subname)
volrem = subit
#check if a year is present in series title (ie. spider-man 2099)
numberinseries = 'False'
for i in watchcomic.split():
if ('20' in i or '19' in i):
if i.isdigit():
numberinseries = 'True'
else:
find20 = i.find('20')
if find20:
stf = i[find20:4].strip()
find19 = i.find('19')
if find19:
stf = i[find19:4].strip()
logger.fdebug('[FILECHECKER] stf is : ' + str(stf))
if stf.isdigit():
numberinseries = 'True'
logger.fdebug('[FILECHECKER] numberinseries: ' + numberinseries)
#remove the brackets..
subnm = re.findall('[^()]+', subname)
logger.fdebug('[FILECHECKER] subnm len : ' + str(len(subnm)))
if len(subnm) == 1:
logger.fdebug('[FILECHECKER] ' + str(len(subnm)) + ': detected invalid filename - attempting to detect year to continue')
#if the series has digits this f's it up.
if numberinseries == 'True':
#we need to remove the series from the subname and then search the remainder.
watchname = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', watchcomic) #remove spec chars for watchcomic match.
logger.fdebug('[FILECHECKER] watch-cleaned: ' + str(watchname))
subthis = re.sub('.cbr', '', subname)
subthis = re.sub('.cbz', '', subthis)
subthis = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis)
logger.fdebug('[FILECHECKER] sub-cleaned: ' + str(subthis))
subthis = subthis[len(watchname):] #remove watchcomic
#we need to now check the remainder of the string for digits assuming it's a possible year
logger.fdebug('[FILECHECKER] new subname: ' + str(subthis))
subname = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subthis)
subname = watchcomic + subname
subnm = re.findall('[^()]+', subname)
else:
subit = re.sub('(.*)\s+(19\d{2}|20\d{2})(.*)', '\\1 (\\2) \\3', subname)
subthis2 = re.sub('.cbr', '', subit)
subthis1 = re.sub('.cbz', '', subthis2)
subname = re.sub('[-\:\;\!\'\/\?\+\=\_\%\.]', '', subthis1)
subnm = re.findall('[^()]+', subname)
if Publisher.lower() in subname.lower():
#if the Publisher is given within the title or filename even (for some reason, some people
#have this to distinguish different titles), let's remove it entirely.
lenm = len(subnm)
cnt = 0
pub_removed = None
while (cnt < lenm):
if subnm[cnt] is None: break
if subnm[cnt] == ' ':
pass
else:
logger.fdebug(str(cnt) + ". Bracket Word: " + str(subnm[cnt]))
if Publisher.lower() in subnm[cnt].lower() and cnt >= 1:
logger.fdebug('Publisher detected within title : ' + str(subnm[cnt]))
logger.fdebug('cnt is : ' + str(cnt) + ' --- Publisher is: ' + Publisher)
pub_removed = subnm[cnt]
#-strip publisher if exists here-
logger.fdebug('removing publisher from title')
subname_pubremoved = re.sub(pub_removed, '', subname)
logger.fdebug('pubremoved : ' + str(subname_pubremoved))
subname_pubremoved = re.sub('\(\)', '', subname_pubremoved) #remove empty brackets
subname_pubremoved = re.sub('\s+', ' ', subname_pubremoved) #remove spaces > 1
logger.fdebug('blank brackets removed: ' + str(subname_pubremoved))
subnm = re.findall('[^()]+', subname_pubremoved)
break
cnt+=1
subname = subnm[0]
if len(subnm):
# if it still has no year (brackets), check setting and either assume no year needed.
subname = subname
logger.fdebug('[FILECHECKER] subname no brackets: ' + str(subname))
subname = re.sub('\_', ' ', subname)
nonocount = 0
charpos = 0
detneg = "no"
leavehyphen = False
should_restart = True
while should_restart:
should_restart = False
for nono in not_these:
if nono in subname:
subcnt = subname.count(nono)
charpos = indices(subname,nono) # will return a list of char positions in subname
#print "charpos: " + str(charpos)
if nono == '-':
i=0
while (i < len(charpos)):
for i,j in enumerate(charpos):
if j+2 > len(subname):
sublimit = subname[j+1:]
else:
sublimit = subname[j+1:j+2]
if sublimit.isdigit():
logger.fdebug('[FILECHECKER] possible negative issue detected.')
nonocount = nonocount + subcnt - 1
detneg = "yes"
elif '-' in watchcomic and i < len(watchcomic):
logger.fdebug('[FILECHECKER] - appears in series title.')
logger.fdebug('[FILECHECKER] up to - :' + subname[:j+1].replace('-', ' '))
logger.fdebug('[FILECHECKER] after - :' + subname[j+1:])
subname = subname[:j+1].replace('-', ' ') + subname[j+1:]
logger.fdebug('[FILECHECKER] new subname is : ' + str(subname))
should_restart = True
leavehyphen = True
i+=1
if detneg == "no" or leavehyphen == False:
subname = re.sub(str(nono), ' ', subname)
nonocount = nonocount + subcnt
#logger.fdebug('[FILECHECKER] (str(nono) + " detected " + str(subcnt) + " times.")
# segment '.' having a . by itself will denote the entire string which we don't want
elif nono == '.':
x = 0
fndit = 0
dcspace = 0
while x < subcnt:
fndit = subname.find(nono, fndit)
if subname[fndit-1:fndit].isdigit() and subname[fndit+1:fndit+2].isdigit():
logger.fdebug('[FILECHECKER] decimal issue detected.')
dcspace+=1
x+=1
if dcspace == 1:
nonocount = nonocount + subcnt + dcspace
else:
subname = re.sub('\.', ' ', subname)
nonocount = nonocount + subcnt - 1 #(remove the extension from the length)
else:
#this is new - if it's a symbol seperated by a space on each side it drags in an extra char.
x = 0
fndit = 0
blspc = 0
while x < subcnt:
fndit = subname.find(nono, fndit)
#print ("space before check: " + str(subname[fndit-1:fndit]))
#print ("space after check: " + str(subname[fndit+1:fndit+2]))
if subname[fndit-1:fndit] == ' ' and subname[fndit+1:fndit+2] == ' ':
logger.fdebug('[FILECHECKER] blankspace detected before and after ' + str(nono))
blspc+=1
x+=1
subname = re.sub(str(nono), ' ', subname)
nonocount = nonocount + subcnt + blspc
#subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\!\$\%\'\?\@\-]', ' ', u_watchcomic)
#if leavehyphen == False:
# logger.fdebug('[FILECHECKER] ('removing hyphen for comparisons')
# modwatchcomic = re.sub('-', ' ', modwatchcomic)
# subname = re.sub('-', ' ', subname)
detectand = False
detectthe = False
modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
if ' the ' in modwatchcomic.lower():
modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower())
logger.fdebug('[FILECHECKER] new modwatchcomic: ' + str(modwatchcomic))
detectthe = True
modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
if '&' in subname:
subname = re.sub('\&', ' and ', subname)
detectand = True
if ' the ' in subname.lower():
subname = re.sub("\\bthe\\b", "", subname.lower())
detectthe = True
subname = re.sub('\s+', ' ', str(subname)).strip()
AS_Alt = []
if AlternateSearch is not None:
chkthealt = AlternateSearch.split('##')
if chkthealt == 0:
AS_Alternate = AlternateSearch
for calt in chkthealt:
AS_Alternate = re.sub('##','',calt)
#same = encode.
u_altsearchcomic = AS_Alternate.encode('ascii', 'ignore').strip()
altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_altsearchcomic)
altsearchcomic = re.sub('\&', ' and ', altsearchcomic)
altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()
AS_Alt.append(altsearchcomic)
else:
#create random characters so it will never match.
altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
AS_Alt.append(altsearchcomic)
#if '_' in subname:
# subname = subname.replace('_', ' ')
logger.fdebug('[FILECHECKER] watchcomic:' + str(modwatchcomic) + ' ..comparing to found file: ' + str(subname))
if modwatchcomic.lower() in subname.lower() or any(x.lower() in subname.lower() for x in AS_Alt):#altsearchcomic.lower() in subname.lower():
comicpath = os.path.join(basedir, item)
logger.fdebug('[FILECHECKER] ' + modwatchcomic + ' - watchlist match on : ' + comicpath)
comicsize = os.path.getsize(comicpath)
#print ("Comicsize:" + str(comicsize))
comiccnt+=1
stann = 0
if 'annual' in subname.lower():
logger.fdebug('[FILECHECKER] Annual detected - proceeding')
jtd_len = subname.lower().find('annual')
cchk = modwatchcomic
else:
if modwatchcomic.lower() in subname.lower():
cchk = modwatchcomic
else:
cchk_ls = [x for x in AS_Alt if x.lower() in subname.lower()]
cchk = cchk_ls[0]
#print "something: " + str(cchk)
logger.fdebug('[FILECHECKER] we should remove ' + str(nonocount) + ' characters')
findtitlepos = subname.find('-')
if charpos != 0:
logger.fdebug('[FILECHECKER] detected ' + str(len(charpos)) + ' special characters')
i=0
while (i < len(charpos)):
for i,j in enumerate(charpos):
#print i,j
#print subname
#print "digitchk: " + str(subname[j:])
if j >= len(subname):
logger.fdebug('[FILECHECKER] end reached. ignoring remainder.')
break
elif subname[j:] == '-':
if i <= len(subname) and subname[i+1].isdigit():
logger.fdebug('[FILECHECKER] negative issue detected.')
#detneg = "yes"
elif j > findtitlepos:
if subname[j:] == '#':
if subname[i+1].isdigit():
logger.fdebug('[FILECHECKER] # detected denoting issue#, ignoring.')
else:
nonocount-=1
elif '-' in watchcomic and i < len(watchcomic):
logger.fdebug('[FILECHECKER] - appears in series title, ignoring.')
else:
logger.fdebug('[FILECHECKER] special character appears outside of title - ignoring @ position: ' + str(charpos[i]))
nonocount-=1
i+=1
#remove versioning here
if volrem != None:
jtd_len = len(cchk)# + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol #
else:
jtd_len = len(cchk)# + nonocount
if sarc and mylar.READ2FILENAME:
removest = subname.find(' ') # the - gets removed above so we test for the first blank space...
if subname[:removest].isdigit():
jtd_len += removest + 1 # +1 to account for space in place of -
logger.fdebug('[FILECHECKER] adjusted jtd_len to : ' + str(removest) + ' because of story-arc reading order tags')
logger.fdebug('[FILECHECKER] nonocount [' + str(nonocount) + '] cchk [' + cchk + '] length [' + str(len(cchk)) + ']')
#if detectand:
# jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
#if detectthe:
# jtd_len = jtd_len - 3 # char subsitiution diff between 'the' and '' = 3 chars
#justthedigits = item[jtd_len:]
logger.fdebug('[FILECHECKER] final jtd_len to prune [' + str(jtd_len) + ']')
logger.fdebug('[FILECHECKER] before title removed from FILENAME [' + str(item) + ']')
logger.fdebug('[FILECHECKER] after title removed from FILENAME [' + str(item[jtd_len:]) + ']')
logger.fdebug('[FILECHECKER] creating just the digits using SUBNAME, pruning first [' + str(jtd_len) + '] chars from [' + subname + ']')
justthedigits_1 = subname[jtd_len:].strip()
logger.fdebug('[FILECHECKER] after title removed from SUBNAME [' + justthedigits_1 + ']')
#remove the title if it appears
#findtitle = justthedigits.find('-')
#if findtitle > 0 and detneg == "no":
# justthedigits = justthedigits[:findtitle]
# logger.fdebug('[FILECHECKER] ("removed title from name - is now : " + str(justthedigits))
justthedigits = justthedigits_1.split(' ', 1)[0]
digitsvalid = "false"
for jdc in list(justthedigits):
#logger.fdebug('[FILECHECKER] ('jdc:' + str(jdc))
if not jdc.isdigit():
#logger.fdebug('[FILECHECKER] ('alpha')
jdc_start = justthedigits.find(jdc)
alpha_isschk = justthedigits[jdc_start:]
#logger.fdebug('[FILECHECKER] ('alpha_isschk:' + str(alpha_isschk))
for issexcept in issue_exceptions:
if issexcept.lower() in alpha_isschk.lower() and len(alpha_isschk) <= len(issexcept):
logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION : [' + justthedigits + ']')
digitsvalid = "true"
break
if digitsvalid == "true": break
try:
tmpthedigits = justthedigits_1.split(' ', 1)[1]
logger.fdebug('[FILECHECKER] If the series has a decimal, this should be a number [' + tmpthedigits + ']')
if 'cbr' in tmpthedigits.lower() or 'cbz' in tmpthedigits.lower():
tmpthedigits = tmpthedigits[:-3].strip()
logger.fdebug('[FILECHECKER] Removed extension - now we should just have a number [' + tmpthedigits + ']')
poss_alpha = tmpthedigits
if poss_alpha.isdigit():
digitsvalid = "true"
if justthedigits.lower() == 'annual':
logger.fdebug('[FILECHECKER] ANNUAL DETECTED [' + poss_alpha + ']')
justthedigits += ' ' + poss_alpha
else:
justthedigits += '.' + poss_alpha
logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']')
else:
for issexcept in issue_exceptions:
decimalexcept = False
if '.' in issexcept:
decimalexcept = True
issexcept = issexcept[1:] #remove the '.' from comparison...
if issexcept.lower() in poss_alpha.lower() and len(poss_alpha) <= len(issexcept):
if decimalexcept:
issexcept = '.' + issexcept
justthedigits += issexcept #poss_alpha
logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']')
digitsvalid = "true"
break
except:
tmpthedigits = None
# justthedigits = justthedigits.split(' ', 1)[0]
#if the issue has an alphanumeric (issue_exceptions, join it and push it through)
logger.fdebug('[FILECHECKER] JUSTTHEDIGITS [' + justthedigits + ']' )
if digitsvalid == "true":
pass
else:
if justthedigits.isdigit():
digitsvalid = "true"
else:
if '.' in justthedigits:
tmpdec = justthedigits.find('.')
b4dec = justthedigits[:tmpdec]
a4dec = justthedigits[tmpdec+1:]
if a4dec.isdigit() and b4dec.isdigit():
logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED')
digitsvalid = "true"
else:
try:
x = float(justthedigits)
#validity check
if x < 0:
logger.info("I've encountered a negative issue #: " + str(justthedigits) + ". Trying to accomodate.")
digitsvalid = "true"
else: raise ValueError
except ValueError, e:
logger.info('Cannot determine issue number from given issue #: ' + str(justthedigits))
# else:
# logger.fdebug('[FILECHECKER] NO DECIMALS DETECTED')
# digitsvalid = "false"
# if justthedigits.lower() == 'annual':
# logger.fdebug('[FILECHECKER] ANNUAL [' + tmpthedigits.split(' ', 1)[1] + ']')
# justthedigits += ' ' + tmpthedigits.split(' ', 1)[1]
# digitsvalid = "true"
# else:
# try:
# if tmpthedigits.isdigit(): #.split(' ', 1)[1] is not None:
# poss_alpha = tmpthedigits#.split(' ', 1)[1]
# if poss_alpha.isdigit():
# digitsvalid = "true"
# justthedigits += '.' + poss_alpha
# logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']')
# for issexcept in issue_exceptions:
# if issexcept.lower() in poss_alpha.lower() and len(poss_alpha) <= len(issexcept):
# justthedigits += poss_alpha
# logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']')
# digitsvalid = "true"
# break
# except:
# pass
logger.fdebug('[FILECHECKER] final justthedigits [' + justthedigits + ']')
if digitsvalid == "false":
logger.fdebug('[FILECHECKER] Issue number not properly detected...ignoring.')
comiccnt -=1 # remove the entry from the list count as it was incorrrectly tallied.
continue
if manual is not None:
#this is needed for Manual Run to determine matches
#without this Batman will match on Batman Incorporated, and Batman and Robin, etc..
# in case it matches on an Alternate Search pattern, set modwatchcomic to the cchk value
modwatchcomic = cchk
logger.fdebug('[FILECHECKER] cchk = ' + cchk.lower())
logger.fdebug('[FILECHECKER] modwatchcomic = ' + modwatchcomic.lower())
logger.fdebug('[FILECHECKER] subname = ' + subname.lower())
comyear = manual['SeriesYear']
issuetotal = manual['Total']
comicvolume = manual['ComicVersion']
logger.fdebug('[FILECHECKER] SeriesYear: ' + str(comyear))
logger.fdebug('[FILECHECKER] IssueTotal: ' + str(issuetotal))
logger.fdebug('[FILECHECKER] Comic Volume: ' + str(comicvolume))
logger.fdebug('[FILECHECKER] volume detected: ' + str(volrem))
if comicvolume:
ComVersChk = re.sub("[^0-9]", "", comicvolume)
if ComVersChk == '' or ComVersChk == '1':
ComVersChk = 0
else:
ComVersChk = 0
# even if it's a V1, we need to pull the date for the given issue ID and get the publication year
# for the issue. Because even if it's a V1, if there are additional Volumes then it's possible that
# it will take the incorrect series. (ie. Detective Comics (1937) & Detective Comics (2011).
# If issue #28 (2013) is found, it exists in both series, and because DC 1937 is a V1, it will bypass
# the year check which will result in the incorrect series being picked (1937)
#set the issue/year threshold here.
# 2013 - (24issues/12) = 2011.
#minyear = int(comyear) - (int(issuetotal) / 12)
maxyear = manual['LatestDate'][:4] # yyyy-mm-dd
#subnm defined at being of module.
len_sm = len(subnm)
#print ("there are " + str(lenm) + " words.")
cnt = 0
yearmatch = "none"
vers4year = "no"
vers4vol = "no"
for ct in subsplit:
if ct.lower().startswith('v') and ct[1:].isdigit():
logger.fdebug('[FILECHECKER] possible versioning..checking')
#we hit a versioning # - account for it
if ct[1:].isdigit():
if len(ct[1:]) == 4: #v2013
logger.fdebug('[FILECHECKER] Version detected as ' + str(ct))
vers4year = "yes" #re.sub("[^0-9]", " ", str(ct)) #remove the v
break
else:
if len(ct) < 4:
logger.fdebug('[FILECHECKER] Version detected as ' + str(ct))
vers4vol = str(ct)
break
logger.fdebug('[FILECHECKER] false version detection..ignoring.')
versionmatch = "false"
if vers4year is not "no" or vers4vol is not "no":
if comicvolume: #is not "None" and comicvolume is not None:
D_ComicVersion = re.sub("[^0-9]", "", comicvolume)
if D_ComicVersion == '':
D_ComicVersion = 0
else:
D_ComicVersion = 0
F_ComicVersion = re.sub("[^0-9]", "", volrem)
S_ComicVersion = str(comyear)
logger.fdebug('[FILECHECKER] FCVersion: ' + str(F_ComicVersion))
logger.fdebug('[FILECHECKER] DCVersion: ' + str(D_ComicVersion))
logger.fdebug('[FILECHECKER] SCVersion: ' + str(S_ComicVersion))
#if annualize == "true" and int(ComicYear) == int(F_ComicVersion):
# logger.fdebug('[FILECHECKER] ("We matched on versions for annuals " + str(volrem))
if int(F_ComicVersion) == int(D_ComicVersion) or int(F_ComicVersion) == int(S_ComicVersion):
logger.fdebug('[FILECHECKER] We matched on versions...' + str(volrem))
versionmatch = "true"
else:
logger.fdebug('[FILECHECKER] Versions wrong. Ignoring possible match.')
#else:
while (cnt < len_sm):
if subnm[cnt] is None: break
if subnm[cnt] == ' ':
pass
else:
logger.fdebug('[FILECHECKER] ' + str(cnt) + ' Bracket Word: ' + str(subnm[cnt]))
#if ComVersChk == 0:
# logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check')
# yearmatch = "true"
# break
if subnm[cnt][:-2] == '19' or subnm[cnt][:-2] == '20':
logger.fdebug('[FILECHECKER] year detected: ' + str(subnm[cnt]))
result_comyear = subnm[cnt]
if int(result_comyear) <= int(maxyear):
logger.fdebug('[FILECHECKER] ' + str(result_comyear) + ' is within the series range of ' + str(comyear) + '-' + str(maxyear))
#still possible for incorrect match if multiple reboots of series end/start in same year
yearmatch = "true"
break
else:
logger.fdebug('[FILECHECKER] ' + str(result_comyear) + ' - not right - year not within series range of ' + str(comyear) + '-' + str(maxyear))
yearmatch = "false"
break
cnt+=1
if versionmatch == "false":
if yearmatch == "false":
logger.fdebug('[FILECHECKER] Failed to match on both version and issue year.')
continue
else:
logger.fdebug('[FILECHECKER] Matched on versions, not on year - continuing.')
else:
if yearmatch == "false":
logger.fdebug('[FILECHECKER] Matched on version, but not on year - continuing.')
else:
logger.fdebug('[FILECHECKER] Matched on both version, and issue year - continuing.')
if yearmatch == "none":
if ComVersChk == 0:
logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check.')
yearmatch = "true"
else:
continue
if 'annual' in subname.lower():
subname = re.sub('annual', '', subname.lower())
subname = re.sub('\s+', ' ', subname)
#tmpitem = item[:jtd_len]
# if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this.
substring_removal = None
poss_alpha = subname.split(' ')[-1:]
logger.fdebug('[FILECHECKER] poss_alpha: ' + str(poss_alpha))
logger.fdebug('[FILECHECKER] lenalpha: ' + str(len(''.join(poss_alpha))))
for issexcept in issue_exceptions:
if issexcept.lower()in str(poss_alpha).lower() and len(''.join(poss_alpha)) <= len(issexcept):
#get the last 2 words so that we can remove them cleanly
substring_removal = ' '.join(subname.split(' ')[-2:])
substring_join = ''.join(subname.split(' ')[-2:])
logger.fdebug('[FILECHECKER] substring_removal: ' + str(substring_removal))
logger.fdebug('[FILECHECKER] substring_join: ' + str(substring_join))
break
if substring_removal is not None:
sub_removed = subname.replace('_', ' ').replace(substring_removal, substring_join)
else:
sub_removed = subname.replace('_', ' ')
logger.fdebug('[FILECHECKER] sub_removed: ' + str(sub_removed))
split_sub = sub_removed.rsplit(' ',1)[0].split(' ') #removes last word (assuming it's the issue#)
split_mod = modwatchcomic.replace('_', ' ').split() #batman
logger.fdebug('[FILECHECKER] split_sub: ' + str(split_sub))
logger.fdebug('[FILECHECKER] split_mod: ' + str(split_mod))
x = len(split_sub)-1
scnt = 0
if x > len(split_mod)-1:
logger.fdebug('[FILECHECKER] number of words do not match...aborting.')
else:
while ( x > -1 ):
print str(split_sub[x]) + ' comparing to ' + str(split_mod[x])
if str(split_sub[x]).lower() == str(split_mod[x]).lower():
scnt+=1
logger.fdebug('[FILECHECKER] word match exact. ' + str(scnt) + '/' + str(len(split_mod)))
x-=1
wordcnt = int(scnt)
logger.fdebug('[FILECHECKER] scnt:' + str(scnt))
totalcnt = int(len(split_mod))
logger.fdebug('[FILECHECKER] split_mod length:' + str(totalcnt))
try:
spercent = (wordcnt/totalcnt) * 100
except ZeroDivisionError:
spercent = 0
logger.fdebug('[FILECHECKER] we got ' + str(spercent) + ' percent.')
if int(spercent) >= 80:
logger.fdebug('[FILECHECKER] this should be considered an exact match.Justthedigits:' + justthedigits)
else:
logger.fdebug('[FILECHECKER] failure - not an exact match.')
continue
if manual:
print item
print comicpath
print comicsize
print result_comyear
print justthedigits
comiclist.append({
'ComicFilename': item,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'ComicYear': result_comyear,
'JusttheDigits': justthedigits
})
print('appended.')
else:
comiclist.append({
'ComicFilename': item,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'JusttheDigits': justthedigits
})
watchmatch['comiclist'] = comiclist
else:
pass
#print ("directory found - ignoring")
logger.fdebug('[FILECHECKER] you have a total of ' + str(comiccnt) + ' ' + watchcomic + ' comics')
watchmatch['comiccount'] = comiccnt
return watchmatch
def validateAndCreateDirectory(dir, create=False):
if os.path.exists(dir):
logger.info('Found comic directory: ' + dir)
return True
else:
logger.warn('Could not find comic directory: ' + dir)
if create:
if dir.strip():
logger.info('Creating comic directory (' + str(mylar.CHMOD_DIR) + ') : ' + dir)
try:
permission = int(mylar.CHMOD_DIR, 8)
os.umask(0) # this is probably redudant, but it doesn't hurt to clear the umask here.
os.makedirs(dir, permission )
except OSError:
raise SystemExit('Could not create data directory: ' + mylar.DATA_DIR + '. Exiting....')
return True
else:
logger.warn('Provided directory is blank, aborting')
return False
return False
def indices(string, char):
return [ i for i,c in enumerate(string) if c == char ]