mylar/mylar/filechecker.py

1176 lines
63 KiB
Python
Executable File

#/usr/bin/env python
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
import os
import os.path
import zlib
import pprint
import subprocess
import re
#import logger
import mylar
from mylar import logger, helpers
import unicodedata
import sys
import platform
def file2comicmatch(watchmatch):
#print ("match: " + str(watchmatch))
pass
def listFiles(dir, watchcomic, Publisher, AlternateSearch=None, manual=None, sarc=None):
# use AlternateSearch to check for filenames that follow that naming pattern
# ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
# checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
# we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
u_watchcomic = unicodedata.normalize('NFKD', watchcomic).encode('ASCII', 'ignore') #watchcomic.encode('ascii', 'ignore').strip()
logger.fdebug('[FILECHECKER] comic: ' + u_watchcomic)
basedir = dir
logger.fdebug('[FILECHECKER] Looking in: ' + dir)
watchmatch = {}
comiclist = []
comiccnt = 0
not_these = ['#',
',',
'\/',
':',
'\;',
'.',
'-',
'!',
'\$',
'\%',
'\+',
'\'',
'\?',
'\@']
issue_exceptions = ['AU',
'.INH',
'.NOW',
'AI',
'A',
'B',
'C',
'X',
'O']
extensions = ('.cbr', '.cbz', '.cb7')
# #get the entire tree here
dirlist = traverse_directories(basedir)
# for item in os.listdir(basedir):
for fname in dirlist:
moddir = None
# at a later point, we should store the basedir and scan it in for additional info, since some users
# have their structure setup as 'Batman v2 (2011)/Batman #1.cbz' or 'Batman/V2-(2011)/Batman #1.cbz'
if fname['directory'] == '':
basedir = dir
else:
basedir = fname['directory']
#if it's a subdir, strip out the main dir and retain the remainder for the filechecker to find it.
#start at position 1 so the initial slash is removed since it's a sub, and os.path.join will choke.
moddir = basedir.replace(dir, '')[1:].rstrip()
item = fname['filename']
#for mac OS metadata ignoring.
if item.startswith('._'):
logger.info('ignoring os metadata for ' + item)
continue
if item == 'cover.jpg' or item == 'cvinfo': continue
if not item.lower().endswith(extensions):
#logger.fdebug('[FILECHECKER] filename not a valid cbr/cbz - ignoring: ' + item)
continue
#print item
#subname = os.path.join(basedir, item)
subname = item
subname = re.sub('\_', ' ', subname)
#Remove html code for ( )
subname = re.sub(r'%28', '(', subname)
subname = re.sub(r'%29', ')', subname)
#versioning - remove it
subsplit = subname.replace('_', ' ').split()
volrem = None
vers4year = "no"
vers4vol = "no"
digitchk = 0
if sarc and mylar.READ2FILENAME:
logger.fdebug('[SARC] subname: ' + subname)
removest = subname.find('-') # the - gets removed above so we test for the first blank space...
logger.fdebug('[SARC] Checking filename for Reading Order sequence - removest: ' + str(removest))
logger.fdebug('removestdig: ' + subname[:removest -1])
if subname[:removest].isdigit() and removest == 3:
subname = subname[4:]
logger.fdebug('[SARC] Removed Reading Order sequence from subname. Now set to : ' + subname)
for subit in subsplit:
if subit[0].lower() == 'v':
vfull = 0
if subit[1:].isdigit():
#if in format v1, v2009 etc...
if len(subit[1:]) == 4: #v2013
# if it's greater than 3 in length, then the format is Vyyyy
logger.fdebug('[FILECHECKER] Version detected as : ' + str(subit))
vers4year = "yes"
else:
if len(subit) < 4:
logger.fdebug('[FILECHECKER] Version detected as : ' + str(subit))
vers4vol = str(subit)
subname = re.sub(subit, '', subname)
volrem = subit
vers4vol = volrem
break
elif subit.lower()[:3] == 'vol':
tsubit = re.sub('vol', '', subit.lower())
try:
if any([tsubit.isdigit(), len(tsubit) > 5]):
#if in format vol.2013 etc
#because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
logger.fdebug('[FILECHECKER] volume indicator detected as version #:' + str(subit))
subname = re.sub(subit, '', subname)
volrem = subit
vers4year = "yes"
except:
continue
#check if a year is present in series title (ie. spider-man 2099)
#also check if decimal present in series title (ie. batman beyond 2.0)
#- check if brackets present in series title
numberinseries = 'False'
decimalinseries = 'False'
bracketsinseries = 'False'
for i in watchcomic.split():
if i.isdigit():
numberinseries = 'True'
if ('20' in i or '19' in i):
if i.isdigit():
numberinseries = 'True'
else:
find20 = i.find('20')
if find20:
stf = i[find20:4].strip()
find19 = i.find('19')
if find19:
stf = i[find19:4].strip()
logger.fdebug('[FILECHECKER] stf is : ' + str(stf))
if stf.isdigit():
numberinseries = 'True'
if ('.' in i):
try:
float(i)
decimalinseries = 'True'
std = i
logger.fdebug('[FILECHECKER] std is : ' + str(std))
except:
pass
#logger.fdebug('[FILECHECKER] i : ' + str(i))
if ('(' in i):
bracketsinseries = 'True'
bracket_length_st = watchcomic.find('(')
bracket_length_en = watchcomic.find(')', bracket_length_st)
bracket_length = bracket_length_en - bracket_length_st
bracket_word = watchcomic[bracket_length_st:bracket_length_en +1]
logger.fdebug('[FILECHECKER] bracketinseries: ' + str(bracket_word))
logger.fdebug('[FILECHECKER] numberinseries: ' + str(numberinseries))
logger.fdebug('[FILECHECKER] decimalinseries: ' + str(decimalinseries))
logger.fdebug('[FILECHECKER] bracketinseries: ' + str(bracketsinseries))
#iniitate the alternate list here so we can add in the different flavours based on above
AS_Alt = []
#remove the brackets..
if bracketsinseries == 'True':
logger.fdebug('[FILECHECKER] modifying subname to accomodate brackets within series title.')
#subnm_mod2 = re.findall('[^()]+', subname[bracket_length_en:])
#logger.fdebug('[FILECHECKER] subnm_mod : ' + str(subnm_mod2))
#subnm_mod = re.sub('[\(\)]',' ', subname[:bracket_length_st]) + str(subname[bracket_length_en:])
#logger.fdebug('[FILECHECKER] subnm_mod_st: ' + str(subname[:bracket_length_st]))
#logger.fdebug('[FILECHECKER] subnm_mod_en: ' + str(subname[bracket_length_en:]))
#logger.fdebug('[FILECHECKER] modified subname is now : ' + str(subnm_mod))
if bracket_word in subname:
nobrackets_word = re.sub('[\(\)]', '', bracket_word).strip()
subname = re.sub(nobrackets_word, '', subname).strip()
subnm = re.findall('[^()]+', subname)
logger.fdebug('[FILECHECKER] subnm len : ' + str(len(subnm)))
if len(subnm) == 1:
logger.fdebug('[FILECHECKER] ' + str(len(subnm)) + ': detected invalid filename - attempting to detect year to continue')
#if the series has digits this f's it up.
if numberinseries == 'True' or decimalinseries == 'True':
#we need to remove the series from the subname and then search the remainder.
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', watchcomic) #remove spec chars for watchcomic match.
logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname)
subthis = re.sub('.cbr', '', subname)
subthis = re.sub('.cbz', '', subthis)
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', subthis)
subthis = re.sub('\s+', ' ', subthis)
logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis)
#we need to make sure the file is part of the correct series or else will match falsely
if watchname.lower() not in subthis.lower():
logger.fdebug('[FILECHECKER] ' + watchname + ' this is a false match to ' + subthis + ' - Ignoring this result.')
continue
ogsubthis = subthis
subthis = subthis[len(watchname):] #remove watchcomic
#we need to now check the remainder of the string for digits assuming it's a possible year
logger.fdebug('[FILECHECKER] new subname: ' + subthis)
if subthis.startswith('('):
# if it startswith a bracket, then it's probably a year - let's check.
for i in subthis.split():
tmpi = re.sub('[\(\)]', '', i).strip()
if tmpi.isdigit():
if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4:
logger.fdebug('[FILECHECKER] year detected: ' + str(tmpi))
subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis)
subname = re.sub('\(\)', '', subname).strip()
subname = watchcomic + ' ' + subname
logger.fdebug('[FILECHECKER] new subname reversed: ' + subname)
break
else:
year = None
for i in subthis.split():
if ('20' in i or '19' in i):
if i.isdigit():
year = i[:4]
else:
findyr20 = i.find('20')
if findyr20:
styear = i[findyr20:4].strip()
findyr19 = i.find('19')
if findyr19:
styear = i[findyr19:4].strip()
if styear.isdigit() and len(styear) == 4:
year = styear
logger.fdebug('[FILECHECKER] stf is : ' + str(styear))
if year:
subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\2 (\\3)', subthis)
else:
#unable to find year in filename
logger.fdebug('[FILECHECKER] Unable to detect year within filename. Continuing as is and assuming this is a volume 1 and will work itself out later.')
subname = ogsubthis
subnm = re.findall('[^()]+', subname)
else:
subit = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\3 (\\2)', subname).replace('( )', '')
subthis2 = re.sub('.cbr', '', subit)
subthis1 = re.sub('.cbz', '', subthis2)
subname = re.sub('[\:\;\!\'\/\?\+\=\_\%]', '', subthis1)
#if '.' appears more than once at this point, then it's being used in place of spaces.
#if '.' only appears once at this point, it's a decimal issue (since decimalinseries is False within this else stmt).
if len(str(subname.count('.'))) == 1:
logger.fdebug('[FILECHECKER] decimal issue detected, not removing decimals')
else:
logger.fdebug('[FILECHECKER] more than one decimal detected, and the series does not have decimals - assuming in place of spaces.')
subname = re.sub('[\.]', '', subname)
subnm = re.findall('[^()]+', subname)
else:
if numberinseries == 'True' or decimalinseries == 'True':
#we need to remove the series from the subname and then search the remainder.
subthis = re.sub('.cbr', '', subname)
subthis = re.sub('.cbz', '', subthis)
if decimalinseries == 'True':
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic) #remove spec chars for watchcomic match.
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
else:
# in order to get series like Earth 2 scanned in that contain a decimal, I removed the \. from the re.subs below - 28-08-2014
watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic) #remove spec chars for watchcomic match.
subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname)
subthis = re.sub('\s+', ' ', subthis)
logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis)
#we need to make sure the file is part of the correct series or else will match falsely
if watchname.lower() not in subthis.lower():
logger.fdebug('[FILECHECKER] ' + watchname + ' this is a false match to ' + subthis + ' - Ignoring this result.')
continue
subthis = subthis[len(watchname):].strip() #remove watchcomic
#we need to now check the remainder of the string for digits assuming it's a possible year
logger.fdebug('[FILECHECKER] new subname: ' + subthis)
if subthis.startswith('('):
# if it startswith a bracket, then it's probably a year and the format is incorrect to continue - let's check.
for i in subthis.split():
tmpi = re.sub('[\(\)]', '', i).strip()
if tmpi.isdigit():
if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4:
logger.fdebug('[FILECHECKER] Year detected: ' + str(tmpi))
subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis)
subname = re.sub('\(\)', '', subname).strip()
logger.fdebug('[FILECHECKER] Flipping the issue with the year: ' + subname)
break
else:
numcheck = re.findall('[19\d{2}|20\d{2}]', subthis)
if len(numcheck) == 1:
subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\2 (\\1)', subthis)
else:
subname = re.sub('(19\d{2}|20\d{2})(.*)', '\\1 (\\2)', subthis)
subname = re.sub('\(\)', '', subname).strip()
subname = watchname + ' ' + subname
subname = re.sub('\s+', ' ', subname).strip()
logger.fdebug('[FILECHECKER] New subname reversed: ' + subname)
subnm = re.findall('[^()]+', subname)
subsplit = subname.replace('_', ' ').split()
if sarc is None:
if Publisher.lower() in re.sub('_', ' ', subname.lower()):
#if the Publisher is given within the title or filename even (for some reason, some people
#have this to distinguish different titles), let's remove it entirely.
lenm = len(subnm)
cnt = 0
pub_removed = None
while (cnt < lenm):
submod = re.sub('_', ' ', subnm[cnt])
if submod is None: break
if submod == ' ':
pass
else:
logger.fdebug('[FILECHECKER] ' + str(cnt) + ". Bracket Word: " + submod)
if Publisher.lower() in submod.lower() and cnt >= 1:
logger.fdebug('[FILECHECKER] Publisher detected within title : ' + submod)
logger.fdebug('[FILECHECKER] cnt is : ' + str(cnt) + ' --- Publisher is: ' + Publisher)
#-strip publisher if exists here-
pub_removed = submod
logger.fdebug('[FILECHECKER] removing publisher from title')
subname_pubremoved = re.sub(pub_removed, '', subname)
logger.fdebug('[FILECHECKER] pubremoved : ' + subname_pubremoved)
subname_pubremoved = re.sub('\(\)', '', subname_pubremoved) #remove empty brackets
subname_pubremoved = re.sub('\s+', ' ', subname_pubremoved) #remove spaces > 1
logger.fdebug('[FILECHECKER] blank brackets removed: ' + subname_pubremoved)
subnm = re.findall('[^()]+', subname_pubremoved)
break
cnt+=1
#If the Year comes before the Issue # the subname is passed with no Issue number.
#This logic checks for numbers before the extension in the format of 1 01 001
#and adds to the subname. (Cases where comic name is $Series_$Year_$Issue)
# if len(subnm) > 1:
# if (re.search('(19\d{2}|20\d{2})',subnm[1]) is not None):
# logger.info('subnm[1]: ' + str(subnm[1]))
# for i in subnm:
# tmpi = i.strip()
# if tmpi.isdigit():
# if (tmpi.startswith('19') or tmpi.startswith('20')) and len(tmpi) == 4:
# logger.info('[FILECHECKER] year detected: ' + str(tmpi))
# #strip out all the brackets in the subnm[2] if it exists so we're left with just the issue # in most cases
# subremoved = re.findall('[^()]+', subnm[2]).strip()
# if len(subremoved) > 5:
# logger.info('[FILECHECKER] something is wrong with the parsing - better report the issue on github.')
# break
# subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 ' + str(subremoved) + ' (\\2)', subname)
# subname = re.sub('\(\)', '', subname).strip()
# logger.info('[FILECHECKER] THE new subname reversed: ' + str(subname))
# break
# else:
# subname = re.sub('(.*)[\s+|_+](19\d{2}|20\d{2})(.*)', '\\1 \\2 (\\3)', subname)
# subnm = re.findall('[^()]+', subname) # we need to regenerate this here.
# logger.fdebug('[FILECHECKER] subnm0: ' + str(subnm[0]))
# logger.fdebug('[FILECHECKER] subnm1: ' + str(subnm[1]))
# logger.fdebug('subnm2: ' + str(subnm[2]))
# subname = str(subnm[0]).lstrip() + ' (' + str(subnm[1]).strip() + ') '
subname = subnm[0]
if len(subnm) == 1:
# if it still has no year (brackets), check setting and either assume no year needed.
subname = subname
logger.fdebug('[FILECHECKER] subname no brackets: ' + subname)
nonocount = 0
charpos = 0
detneg = "no"
leavehyphen = False
should_restart = True
lenwatch = len(watchcomic) # because subname gets replaced dynamically, the length will change and things go wrong.
while should_restart:
should_restart = False
for nono in not_these:
if nono in subname:
subcnt = subname.count(nono)
charpos = indices(subname, nono) # will return a list of char positions in subname
logger.fdebug("[" + str(nono) + "] charpos: " + str(charpos))
if nono == '-':
i=0
while (i < len(charpos)):
for i, j in enumerate(charpos):
if j +2 > len(subname):
sublimit = subname[j +1:]
else:
sublimit = subname[j +1:j +2]
if sublimit.isdigit():
logger.fdebug('[FILECHECKER] possible negative issue detected.')
nonocount = nonocount + subcnt - 1
detneg = "yes"
elif '-' in watchcomic and j < lenwatch:
lenwatch -=1
logger.fdebug('[FILECHECKER] - appears in series title.')
logger.fdebug('[FILECHECKER] up to - :' + subname[:j +1].replace('-', ' '))
logger.fdebug('[FILECHECKER] after - :' + subname[j +1:])
subname = subname[:j +1].replace('-', '') + subname[j +1:]
logger.fdebug('[FILECHECKER] new subname is : ' + subname)
should_restart = True
leavehyphen = True
i+=1
if detneg == "no" and leavehyphen == False:
subname = re.sub(str(nono), ' ', subname)
nonocount = nonocount + subcnt
#logger.fdebug('[FILECHECKER] (str(nono) + " detected " + str(subcnt) + " times.")
# segment '.' having a . by itself will denote the entire string which we don't want
elif nono == '.':
logger.fdebug('[FILECHECKER] Decimal check.')
x = 0
fndit = 0
dcspace = 0
while (x < len(charpos)):
for x, j in enumerate(charpos):
fndit = j
logger.fdebug('fndit: ' + str(fndit))
logger.fdebug('isdigit1: ' + subname[fndit -1:fndit])
logger.fdebug('isdigit2: ' + subname[fndit +1:fndit +2])
if subname[fndit -1:fndit].isdigit() and subname[fndit +1:fndit +2].isdigit():
logger.fdebug('[FILECHECKER] decimal issue detected.')
dcspace+=1
else:
subname = subname[:fndit] + ' ' + subname[fndit +1:]
nonocount+=1
x+=1
nonocount += (subcnt + dcspace)
#if dcspace == 1:
# nonocount = nonocount + subcnt + dcspace
#else:
# subname = re.sub('\.', ' ', subname)
# nonocount = nonocount + subcnt - 1 #(remove the extension from the length)
else:
#this is new - if it's a symbol seperated by a space on each side it drags in an extra char.
x = 0
fndit = 0
blspc = 0
if nono == '#':
fndit = subname.find(nono)
if subname[fndit +1].isdigit():
subname = re.sub('#', '', subname)
continue
while x < subcnt:
fndit = subname.find(nono, fndit)
#print ("space before check: " + str(subname[fndit-1:fndit]))
#print ("space after check: " + str(subname[fndit+1:fndit+2]))
if subname[fndit -1:fndit] == ' ' and subname[fndit +1:fndit +2] == ' ':
logger.fdebug('[FILECHECKER] blankspace detected before and after ' + str(nono))
blspc+=1
x+=1
logger.fdebug('[FILECHECKER] replacing ' + str(nono) + ' with a space')
subname = re.sub(str(nono), '', subname)
nonocount = nonocount + subcnt + blspc
#subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
if decimalinseries == 'True':
modwatchcomic = re.sub('[\_\#\,\/\:\;\!\$\%\?\@]', ' ', u_watchcomic)
else:
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\!\$\%\?\@]', ' ', u_watchcomic)
if bracketsinseries == 'True':
modwatchcomic = re.sub('[\(\)]', ' ', modwatchcomic)
modwatchcomic = re.sub('[\-\']', '', modwatchcomic) #trying this too - 2014-03-01
#if leavehyphen == False:
# logger.fdebug('[FILECHECKER] ('removing hyphen for comparisons')
# modwatchcomic = re.sub('-', ' ', modwatchcomic)
# subname = re.sub('-', ' ', subname)
detectand = False
detectthe_mod = False
detectthe_sub = False
modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
if ' the ' in modwatchcomic.lower() or modwatchcomic.lower().startswith('the '):
modwatchcomic = re.sub("\\bthe\\b", "", modwatchcomic.lower())
logger.fdebug('[FILECHECKER] new modwatchcomic: ' + modwatchcomic)
detectthe_mod = True
modwatchcomic = re.sub('\s+', ' ', modwatchcomic).strip()
if '&' in subname:
logger.fdebug('[FILECHECKER] detected & in subname')
subname = re.sub('\&', ' and ', subname)
detectand = True
if ' the ' in subname.lower() or subname.lower().startswith('the '):
subname = re.sub("\\bthe\\b", "", subname.lower())
detectthe_sub = True
subname = re.sub('\s+', ' ', subname).strip()
#AS_Alt = []
AS_Tuple = []
if AlternateSearch is not None:
chkthealt = AlternateSearch.split('##')
if chkthealt == 0:
AS_Alternate = AlternateSearch
for calt in chkthealt:
AS_tupled = False
AS_Alternate = re.sub('##', '', calt)
if '!!' in AS_Alternate:
# if it's !! present, it's the comicid associated with the series as an added annual.
# extract the !!, store it and then remove it so things will continue.
as_start = AS_Alternate.find('!!')
logger.fdebug('as_start: ' + str(as_start) + ' --- ' + str(AS_Alternate[as_start:]))
as_end = AS_Alternate.find('##', as_start)
if as_end == -1: as_end = len(AS_Alternate)
logger.fdebug('as_start: ' + str(as_end) + ' --- ' + str(AS_Alternate[as_start:as_end]))
AS_ComicID = AS_Alternate[as_start +2:as_end]
logger.fdebug('[FILECHECKER] Extracted comicid for given annual : ' + str(AS_ComicID))
AS_Alternate = re.sub('!!' + str(AS_ComicID), '', AS_Alternate)
AS_tupled = True
#same = encode.
u_altsearchcomic = AS_Alternate.encode('ascii', 'ignore').strip()
altsearchcomic = re.sub('[\_\#\,\/\:\;\.\!\$\%\+\?\@]', ' ', u_altsearchcomic)
altsearchcomic = re.sub('[\-\']', '', altsearchcomic) #because this is a watchcomic registered, use same algorithim for watchcomic
altsearchcomic = re.sub('\&', ' and ', altsearchcomic)
if detectthe_sub == True:
altsearchcomic = re.sub("\\bthe\\b", "", altsearchcomic.lower())
altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()
if AS_tupled:
AS_Tuple.append({"ComicID": AS_ComicID,
"AS_Alternate": altsearchcomic})
AS_Alt.append(altsearchcomic)
else:
#create random characters so it will never match.
altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
AS_Alt.append(altsearchcomic)
#if '_' in subname:
# subname = subname.replace('_', ' ')
logger.fdebug('[FILECHECKER] AS_Alt : ' + str(AS_Alt))
logger.fdebug('[FILECHECKER] watchcomic:' + modwatchcomic + ' ..comparing to found file: ' + subname)
if modwatchcomic.lower() in subname.lower() or any(x.lower() in subname.lower() for x in AS_Alt):
#if the alternate search name is almost identical, it won't match up because it will hit the 'normal' first.
#not important for series' matches, but for annuals, etc it is very important.
#loop through the Alternates picking out the ones that match and then do an overall loop.
enable_annual = False
loopchk = [x for x in AS_Alt if x.lower() in subname.lower()]
if len(loopchk) > 0 and loopchk[0] != '':
logger.fdebug('[FILECHECKER] This should be an alternate: ' + str(loopchk))
if 'annual' in subname.lower():
logger.fdebug('[FILECHECKER] Annual detected - proceeding')
enable_annual = True
else:
loopchk = []
if modwatchcomic.lower() in subname.lower() and enable_annual == False:
loopchk.append(modwatchcomic)
if 'annual' in subname.lower():
if 'bi annual' in subname.lower():
logger.fdebug('[FILECHECKER] BiAnnual detected - wouldn\'t Deadpool be proud?')
subname = re.sub('Bi Annual', 'BiAnnual', subname)
jtd_len = subname.lower().find('bi annual')
enable_annual = True
else:
logger.fdebug('[FILECHECKER] Annual detected - proceeding cautiously.')
jtd_len = subname.lower().find('annual')
enable_annual = False
logger.fdebug('[FILECHECKER] Complete matching list of names to this file [' + str(len(loopchk)) + '] : ' + str(loopchk))
for loopit in loopchk:
modwatchcomic = loopit
logger.fdebug('[FILECHECKER] AS_Tuple : ' + str(AS_Tuple))
annual_comicid = None
for ATS in AS_Tuple:
logger.fdebug('[FILECHECKER] ' + str(ATS['AS_Alternate']) + ' comparing to ' + subname[:len(ATS['AS_Alternate'])]) #str(modwatchcomic))
if ATS['AS_Alternate'].lower().strip() == subname[:len(ATS['AS_Alternate'])].lower().strip(): #modwatchcomic
logger.fdebug('[FILECHECKER] Associating ComiciD : ' + str(ATS['ComicID']))
annual_comicid = str(ATS['ComicID'])
modwatchcomic = ATS['AS_Alternate']
break
comicpath = os.path.join(basedir, item)
logger.fdebug('[FILECHECKER] ' + modwatchcomic + ' - watchlist match on : ' + comicpath)
comicsize = os.path.getsize(comicpath)
#print ("Comicsize:" + str(comicsize))
comiccnt+=1
stann = 0
cchk = modwatchcomic
#else:
#if modwatchcomic.lower() in subname.lower():
# cchk = modwatchcomic
#else:
# cchk_ls = [x for x in AS_Alt if x.lower() in subname.lower()]
# cchk = cchk_ls[0]
logger.fdebug('[FILECHECKER] cchk is : ' + str(cchk))
logger.fdebug('[FILECHECKER] we should remove ' + str(nonocount) + ' characters')
findtitlepos = subname.find('-')
if charpos != 0:
logger.fdebug('[FILECHECKER] detected ' + str(len(charpos)) + ' special characters')
for i, j in enumerate(charpos):
logger.fdebug('i,j:' + str(i) + ',' + str(j))
logger.fdebug(str(len(subname)) + ' - subname: ' + subname)
logger.fdebug("digitchk: " + subname[j -1:])
if j >= len(subname):
logger.fdebug('[FILECHECKER] ' + str(j) + ' is >= ' + str(len(subname)) + ' .End reached. ignoring remainder.')
break
elif subname[j:] == '-':
try:
if j <= len(subname) and subname[j +1].isdigit():
logger.fdebug('[FILECHECKER] negative issue detected.')
#detneg = "yes"
except IndexError:
logger.fdebug('[FILECHECKER] There was a problem parsing the information from this filename: ' + comicpath)
elif j > findtitlepos:
if subname[j:] == '#':
if subname[j +1].isdigit():
logger.fdebug('[FILECHECKER] # detected denoting issue#, ignoring.')
else:
nonocount-=1
elif ('-' in watchcomic or '.' in watchcomic) and j < len(watchcomic):
logger.fdebug('[FILECHECKER] - appears in series title, ignoring.')
else:
digitchk = re.sub('#', '', subname[j -1:]).strip()
logger.fdebug('[FILECHECKER] special character appears outside of title - ignoring @ position: ' + str(charpos[i]))
nonocount-=1
#remove versioning here
if volrem != None:
jtd_len = len(cchk)# + len(volrem)# + nonocount + 1 #1 is to account for space btwn comic and vol #
else:
jtd_len = len(cchk)# + nonocount
# if sarc and mylar.READ2FILENAME:
# removest = subname.find(' ') # the - gets removed above so we test for the first blank space...
# if subname[:removest].isdigit():
# jtd_len += removest + 1 # +1 to account for space in place of -
# logger.fdebug('[FILECHECKER] adjusted jtd_len to : ' + str(removest) + ' because of story-arc reading order tags')
logger.fdebug('[FILECHECKER] nonocount [' + str(nonocount) + '] cchk [' + cchk + '] length [' + str(len(cchk)) + ']')
#if detectand:
# jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
#if detectthe_mod == True and detectthe_sub == False:
#jtd_len = jtd_len - 3 # char subsitiution diff between 'the' and '' = 3 chars
#justthedigits = item[jtd_len:]
logger.fdebug('[FILECHECKER] final jtd_len to prune [' + str(jtd_len) + ']')
logger.fdebug('[FILECHECKER] before title removed from FILENAME [' + str(item) + ']')
logger.fdebug('[FILECHECKER] after title removed from FILENAME [' + str(item[jtd_len:]) + ']')
logger.fdebug('[FILECHECKER] creating just the digits using SUBNAME, pruning first [' + str(jtd_len) + '] chars from [' + subname + ']')
justthedigits_1 = re.sub('#', '', subname[jtd_len:]).strip()
if enable_annual:
logger.fdebug('enable annual is on')
if annual_comicid is not None:
logger.fdebug('annual comicid is ' + str(annual_comicid))
if 'biannual' in modwatchcomic.lower():
logger.fdebug('bi annual detected')
justthedigits_1 = 'BiAnnual ' + justthedigits_1
else:
logger.fdebug('annual detected')
justthedigits_1 = 'Annual ' + justthedigits_1
logger.fdebug('[FILECHECKER] after title removed from SUBNAME [' + justthedigits_1 + ']')
titlechk = False
if digitchk:
try:
#do the issue title check here
logger.fdebug('[FILECHECKER] Possible issue title is : ' + str(digitchk))
# see if it can float the digits
try:
st = digitchk.find('.')
logger.fdebug('st:' + str(st))
st_d = digitchk[:st]
logger.fdebug('st_d:' + str(st_d))
st_e = digitchk[st +1:]
logger.fdebug('st_e:' + str(st_e))
#x = int(float(st_d))
#logger.fdebug('x:' + str(x))
#validity check
if helpers.is_number(st_d):
#x2 = int(float(st_e))
if helpers.is_number(st_e):
logger.fdebug('[FILECHECKER] This is a decimal issue.')
else: raise ValueError
else: raise ValueError
except ValueError, e:
if digitchk.startswith('.'):
pass
else:
if len(justthedigits_1) >= len(digitchk) and len(digitchk) > 3:
logger.fdebug('[FILECHECKER] Removing issue title.')
justthedigits_1 = re.sub(digitchk, '', justthedigits_1).strip()
logger.fdebug('[FILECHECKER] After issue title removed [' + justthedigits_1 + ']')
titlechk = True
hyphensplit = digitchk
issue_firstword = digitchk.split()[0]
splitit = subname.split()
splitst = len(splitit)
logger.fdebug('[FILECHECKER] splitit :' + str(splitit))
logger.fdebug('[FILECHECKER] splitst :' + str(len(splitit)))
orignzb = item
except:
#test this out for manual post-processing items like original sin 003.3 - thor and loki 002...
#***************************************************************************************
# need to assign digitchk here for issues that don't have a title and fail the above try.
#***************************************************************************************
try:
logger.fdebug('[FILECHECKER] justthedigits_1 len : ' + str(len(justthedigits_1)))
logger.fdebug('[FILECHECKER] digitchk len : ' + str(len(digitchk)))
if len(justthedigits_1) >= len(digitchk) and len(digitchk) > 3:
logger.fdebug('[FILECHECKER] Removing issue title.')
justthedigits_1 = re.sub(digitchk, '', justthedigits_1).strip()
logger.fdebug('[FILECHECKER] After issue title removed [' + justthedigits_1 + ']')
titlechk = True
hyphensplit = digitchk
issue_firstword = digitchk.split()[0]
splitit = subname.split()
splitst = len(splitit)
logger.info('[FILECHECKER] splitit :' + str(splitit))
logger.info('[FILECHECKER] splitst :' + str(len(splitit)))
orignzb = item
except:
pass #(revert this back if above except doesn't work)
#remove the title if it appears
#findtitle = justthedigits.find('-')
#if findtitle > 0 and detneg == "no":
# justthedigits = justthedigits[:findtitle]
# logger.fdebug('[FILECHECKER] ("removed title from name - is now : " + str(justthedigits))
justthedigits = justthedigits_1.split(' ', 1)[0]
digitsvalid = "false"
if not justthedigits.isdigit() and 'annual' not in justthedigits.lower():
logger.fdebug('[FILECHECKER] Invalid character found in filename after item removal - cannot find issue # with this present. Temporarily removing it from the comparison to be able to proceed.')
try:
justthedigits = justthedigits_1.split(' ', 1)[1]
if justthedigits.isdigit():
digitsvalid = "true"
except:
pass
if digitsvalid == "false":
if 'annual' not in justthedigits.lower():
for jdc in list(justthedigits):
if not jdc.isdigit():
jdc_start = justthedigits.find(jdc)
alpha_isschk = justthedigits[jdc_start:]
for issexcept in issue_exceptions:
if issexcept.lower() in alpha_isschk.lower() and len(alpha_isschk) <= len(issexcept):
logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION : [' + justthedigits + ']')
digitsvalid = "true"
break
if digitsvalid == "true": break
try:
tmpthedigits = justthedigits_1.split(' ', 1)[1]
logger.fdebug('[FILECHECKER] If the series has a decimal, this should be a number [' + tmpthedigits + ']')
if 'cbr' in tmpthedigits.lower() or 'cbz' in tmpthedigits.lower():
tmpthedigits = tmpthedigits[:-3].strip()
logger.fdebug('[FILECHECKER] Removed extension - now we should just have a number [' + tmpthedigits + ']')
poss_alpha = tmpthedigits
if poss_alpha.isdigit():
digitsvalid = "true"
if (justthedigits.lower() == 'annual' and 'annual' not in watchcomic.lower()) or (annual_comicid is not None):
logger.fdebug('[FILECHECKER] ANNUAL DETECTED [' + poss_alpha + ']')
justthedigits += ' ' + poss_alpha
else:
justthedigits += '.' + poss_alpha
logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED [' + justthedigits + ']')
else:
for issexcept in issue_exceptions:
decimalexcept = False
if '.' in issexcept:
decimalexcept = True
issexcept = issexcept[1:] #remove the '.' from comparison...
if issexcept.lower() in poss_alpha.lower() and len(poss_alpha) <= len(issexcept):
if decimalexcept:
issexcept = '.' + issexcept
justthedigits += issexcept #poss_alpha
logger.fdebug('[FILECHECKER] ALPHANUMERIC EXCEPTION. COMBINING : [' + justthedigits + ']')
digitsvalid = "true"
break
except:
tmpthedigits = None
# justthedigits = justthedigits.split(' ', 1)[0]
#if the issue has an alphanumeric (issue_exceptions, join it and push it through)
logger.fdebug('[FILECHECKER] JUSTTHEDIGITS [' + justthedigits + ']')
if digitsvalid == "true":
pass
else:
if justthedigits.isdigit():
digitsvalid = "true"
else:
if '.' in justthedigits:
tmpdec = justthedigits.find('.')
b4dec = justthedigits[:tmpdec]
a4dec = justthedigits[tmpdec +1:]
if a4dec.isdigit() and b4dec.isdigit():
logger.fdebug('[FILECHECKER] DECIMAL ISSUE DETECTED')
digitsvalid = "true"
else:
try:
x = float(justthedigits)
#validity check
if x < 0:
logger.fdebug("I've encountered a negative issue #: " + str(justthedigits) + ". Trying to accomodate.")
digitsvalid = "true"
else: raise ValueError
except ValueError, e:
logger.fdebug('Probably due to an incorrect match - I cannot determine the issue number from given issue #: ' + str(justthedigits))
logger.fdebug('[FILECHECKER] final justthedigits [' + justthedigits + ']')
if digitsvalid == "false":
logger.fdebug('[FILECHECKER] Issue number not properly detected...ignoring.')
comiccnt -=1 # remove the entry from the list count as it was incorrrectly tallied.
continue
if manual is not None:
#this is needed for Manual Run to determine matches
#without this Batman will match on Batman Incorporated, and Batman and Robin, etc..
# in case it matches on an Alternate Search pattern, set modwatchcomic to the cchk value
modwatchcomic = cchk
logger.fdebug('[FILECHECKER] cchk = ' + cchk.lower())
logger.fdebug('[FILECHECKER] modwatchcomic = ' + modwatchcomic.lower())
logger.fdebug('[FILECHECKER] subname = ' + subname.lower())
comyear = manual['SeriesYear']
issuetotal = manual['Total']
comicvolume = manual['ComicVersion']
logger.fdebug('[FILECHECKER] SeriesYear: ' + str(comyear))
logger.fdebug('[FILECHECKER] IssueTotal: ' + str(issuetotal))
logger.fdebug('[FILECHECKER] Comic Volume: ' + str(comicvolume))
logger.fdebug('[FILECHECKER] volume detected: ' + str(volrem))
if comicvolume:
ComVersChk = re.sub("[^0-9]", "", comicvolume)
if ComVersChk == '' or ComVersChk == '1':
ComVersChk = 0
else:
ComVersChk = 0
# even if it's a V1, we need to pull the date for the given issue ID and get the publication year
# for the issue. Because even if it's a V1, if there are additional Volumes then it's possible that
# it will take the incorrect series. (ie. Detective Comics (1937) & Detective Comics (2011).
# If issue #28 (2013) is found, it exists in both series, and because DC 1937 is a V1, it will bypass
# the year check which will result in the incorrect series being picked (1937)
#set the issue/year threshold here.
# 2013 - (24issues/12) = 2011.
#minyear = int(comyear) - (int(issuetotal) / 12)
maxyear = manual['LatestDate'][:4] # yyyy-mm-dd
#subnm defined at being of module.
len_sm = len(subnm)
#print ("there are " + str(lenm) + " words.")
cnt = 0
yearmatch = "none"
#logger.fdebug('[FILECHECKER] subsplit : ' + subsplit)
versionmatch = "false"
if vers4year is not "no" or vers4vol is not "no":
if comicvolume:
D_ComicVersion = re.sub("[^0-9]", "", comicvolume)
if D_ComicVersion == '':
D_ComicVersion = 0
else:
D_ComicVersion = 0
F_ComicVersion = re.sub("[^0-9]", "", volrem)
S_ComicVersion = str(comyear)
logger.fdebug('[FILECHECKER] FCVersion: ' + str(F_ComicVersion))
logger.fdebug('[FILECHECKER] DCVersion: ' + str(D_ComicVersion))
logger.fdebug('[FILECHECKER] SCVersion: ' + str(S_ComicVersion))
#if annualize == "true" and int(ComicYear) == int(F_ComicVersion):
# logger.fdebug('[FILECHECKER] ("We matched on versions for annuals " + str(volrem))
try:
if int(F_ComicVersion) == int(D_ComicVersion) or int(F_ComicVersion) == int(S_ComicVersion):
logger.fdebug('[FILECHECKER] We matched on versions...' + str(volrem))
versionmatch = "true"
yearmatch = "false"
else:
logger.fdebug('[FILECHECKER] Versions wrong. Ignoring possible match.')
except ValueError:
logger.warning('[FILECHECKER] Unable to determine version number. This issue will be skipped.')
result_comyear = None
while (cnt < len_sm):
if subnm[cnt] is None: break
if subnm[cnt] == ' ':
pass
else:
logger.fdebug('[FILECHECKER] ' + str(cnt) + ' Bracket Word: ' + subnm[cnt])
#if ComVersChk == 0:
# logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check')
# yearmatch = "true"
# break
if (subnm[cnt].startswith('19') or subnm[cnt].startswith('20')) and len(subnm[cnt]) == 4:
logger.fdebug('[FILECHECKER] year detected: ' + subnm[cnt])
result_comyear = subnm[cnt]
##### - checking to see what removing this does for the masses
if int(result_comyear) <= int(maxyear) and int(result_comyear) >= int(comyear):
logger.fdebug('[FILECHECKER] ' + str(result_comyear) + ' is within the series range of ' + str(comyear) + '-' + str(maxyear))
#still possible for incorrect match if multiple reboots of series end/start in same year
yearmatch = "true"
break
else:
logger.fdebug('[FILECHECKER] ' + str(result_comyear) + ' - not right - year not within series range of ' + str(comyear) + '-' + str(maxyear))
yearmatch = "false" #set to true for mass push check.
break
##### - end check
cnt+=1
if versionmatch == "false":
if yearmatch == "false":
logger.fdebug('[FILECHECKER] Failed to match on both version and issue year.')
continue
else:
logger.fdebug('[FILECHECKER] Matched on year, not on version - continuing.')
else:
if yearmatch == "false":
logger.fdebug('[FILECHECKER] Matched on version, but not on year - continuing.')
else:
logger.fdebug('[FILECHECKER] Matched on both version, and issue year - continuing.')
logger.fdebug('[FILECHECKER] yearmatch string is : ' + str(yearmatch))
if yearmatch == "none":
if ComVersChk == 0:
logger.fdebug('[FILECHECKER] Series version detected as V1 (only series in existance with that title). Bypassing year check.')
yearmatch = "true"
else:
continue
if 'annual' in subname.lower():
subname = re.sub('annual', '', subname.lower())
subname = re.sub('\s+', ' ', subname)
#if the sub has an annual, let's remove it from the modwatch as well
modwatchcomic = re.sub('annual', '', modwatchcomic.lower())
isstitle_chk = False
if titlechk:
issuetitle = helpers.get_issue_title(ComicID=manual['ComicID'], IssueNumber=justthedigits)
if issuetitle:
vals = []
watchcomic_split = watchcomic.split()
vals = mylar.search.IssueTitleCheck(issuetitle, watchcomic_split, splitit, splitst, issue_firstword, hyphensplit, orignzb=item)
logger.fdebug('vals: ' + str(vals))
if vals:
if vals[0]['status'] == 'continue':
continue
else:
logger.fdebug('Issue title status returned of : ' + str(vals[0]['status'])) # will either be OK or pass.
splitit = vals[0]['splitit']
splitst = vals[0]['splitst']
isstitle_chk = vals[0]['isstitle_chk']
possibleissue_num = vals[0]['possibleissue_num']
#if the issue title was present and it contained a numeric, it will pull that as the issue incorrectly
if isstitle_chk == True:
justthedigits = possibleissue_num
subname = re.sub(' '.join(vals[0]['isstitle_removal']), '', subname).strip()
else:
logger.fdebug('No issue title.')
#tmpitem = item[:jtd_len]
# if it's an alphanumeric with a space, rejoin, so we can remove it cleanly just below this.
substring_removal = None
poss_alpha = subname.split(' ')[-1:]
logger.fdebug('[FILECHECKER] poss_alpha: ' + str(poss_alpha))
logger.fdebug('[FILECHECKER] lenalpha: ' + str(len(''.join(poss_alpha))))
for issexcept in issue_exceptions:
if issexcept.lower()in str(poss_alpha).lower() and len(''.join(poss_alpha)) <= len(issexcept):
#get the last 2 words so that we can remove them cleanly
substring_removal = ' '.join(subname.split(' ')[-2:])
substring_join = ''.join(subname.split(' ')[-2:])
logger.fdebug('[FILECHECKER] substring_removal: ' + substring_removal)
logger.fdebug('[FILECHECKER] substring_join: ' + substring_join)
break
if substring_removal is not None:
sub_removed = subname.replace('_', ' ').replace(substring_removal, substring_join)
else:
sub_removed = subname.replace('_', ' ')
logger.fdebug('[FILECHECKER] sub_removed: ' + sub_removed)
split_sub = sub_removed.rsplit(' ', 1)[0].split(' ') #removes last word (assuming it's the issue#)
split_mod = modwatchcomic.replace('_', ' ').split() #batman
i = 0
newc = ''
while (i < len(split_mod)):
newc += split_sub[i] + ' '
i+=1
if newc:
split_sub = newc.strip().split()
logger.fdebug('[FILECHECKER] split_sub: ' + str(split_sub))
logger.fdebug('[FILECHECKER] split_mod: ' + str(split_mod))
x = len(split_sub) -1
scnt = 0
if x > len(split_mod) -1:
logger.fdebug('[FILECHECKER] number of words do not match...aborting.')
else:
while (x > -1):
logger.fdebug(str(split_sub[x]) + ' comparing to ' + str(split_mod[x]))
if str(split_sub[x]).lower() == str(split_mod[x]).lower():
scnt+=1
logger.fdebug('[FILECHECKER] word match exact. ' + str(scnt) + '/' + str(len(split_mod)))
x-=1
wordcnt = int(scnt)
logger.fdebug('[FILECHECKER] scnt:' + str(scnt))
totalcnt = int(len(split_mod))
logger.fdebug('[FILECHECKER] split_mod length:' + str(totalcnt))
try:
spercent = (wordcnt /totalcnt) * 100
except ZeroDivisionError:
spercent = 0
logger.fdebug('[FILECHECKER] we got ' + str(spercent) + ' percent.')
if int(spercent) >= 80:
logger.fdebug('[FILECHECKER] this should be considered an exact match.Justthedigits:' + justthedigits)
else:
logger.fdebug('[FILECHECKER] failure - not an exact match.')
continue
if comicsize == 0:
logger.fdebug('[FILECHECKER] Size of given file is 0 bytes. Ignoring.')
continue
if manual:
#print item
#print comicpath
#print comicsize
#print result_comyear
#print justthedigits
comiclist.append({
'ComicFilename': item,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'ComicYear': result_comyear,
'JusttheDigits': justthedigits
})
#print('appended.')
# watchmatch['comiclist'] = comiclist
# break
else:
if moddir is not None:
item = os.path.join(moddir, item)
comiclist.append({
'ComicFilename': item,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'JusttheDigits': justthedigits,
'AnnualComicID': annual_comicid
})
#crcvalue = crc(comicpath)
#logger.fdebug('[FILECHECKER] CRC is calculated as ' + str(crcvalue) + ' for : ' + item)
watchmatch['comiclist'] = comiclist
break
else:
#directory found - ignoring
pass
logger.fdebug('[FILECHECKER] you have a total of ' + str(comiccnt) + ' ' + watchcomic + ' comics')
watchmatch['comiccount'] = comiccnt
return watchmatch
def validateAndCreateDirectory(dir, create=False, module=None):
if module is None:
module = ''
module += '[DIRECTORY-CHECK]'
if os.path.exists(dir):
logger.info(module + ' Found comic directory: ' + dir)
return True
else:
logger.warn(module + ' Could not find comic directory: ' + dir)
if create:
if dir.strip():
logger.info(module + ' Creating comic directory (' + str(mylar.CHMOD_DIR) + ') : ' + dir)
try:
permission = int(mylar.CHMOD_DIR, 8)
os.umask(0) # this is probably redudant, but it doesn't hurt to clear the umask here.
os.makedirs(dir.rstrip(), permission)
except OSError:
raise SystemExit(module + ' Could not create directory: ' + dir + '. Exiting....')
return True
else:
logger.warn(module + ' Provided directory is blank, aborting')
return False
return False
def indices(string, char):
return [i for i, c in enumerate(string) if c == char]
def traverse_directories(dir):
filelist = []
for (dirname, subs, files) in os.walk(dir):
for fname in files:
if dirname == dir:
direc = ''
else:
direc = dirname
if '.AppleDouble' in direc:
#Ignoring MAC OS Finder directory of cached files (/.AppleDouble/<name of file(s)>)
continue
filelist.append({"directory": direc,
"filename": fname})
logger.fdebug('there are ' + str(len(filelist)) + ' files.')
#logger.fdeubg(filelist)
return filelist
def crc(filename):
#memory in lieu of speed (line by line)
#prev = 0
#for eachLine in open(filename,"rb"):
# prev = zlib.crc32(eachLine, prev)
#return "%X"%(prev & 0xFFFFFFFF)
#speed in lieu of memory (file into memory entirely)
return "%X" % (zlib.crc32(open(filename, "rb").read()) & 0xFFFFFFFF)