mylar/mylar/filechecker.py

184 lines
7.8 KiB
Python
Raw Normal View History

#/usr/bin/env python
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
import os
import os.path
import pprint
import subprocess
import re
import logger
import mylar
import sys
def file2comicmatch(watchmatch):
#print ("match: " + str(watchmatch))
pass
def listFiles(dir,watchcomic,AlternateSearch=None):
# use AlternateSearch to check for filenames that follow that naming pattern
# ie. Star Trek TNG Doctor Who Assimilation won't get hits as the
# checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
# we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()
logger.fdebug("comic: " + watchcomic)
basedir = dir
logger.fdebug("Looking in: " + dir)
watchmatch = {}
comiclist = []
comiccnt = 0
not_these = ['\#',
'\,',
'\/',
'\:',
'\;',
'.',
'\-',
'\!',
'\$',
'\%',
'\+',
'\'',
'\?',
'\@']
for item in os.listdir(basedir):
#print item
#subname = os.path.join(basedir, item)
subname = item
#versioning - remove it
subsplit = subname.split()
volrem = None
for subit in subsplit:
#print ("subit:" + str(subit))
if 'v' in str(subit).lower():
#print ("possible versioning detected.")
vfull = 0
if subit[1:].isdigit():
#if in format v1, v2009 etc...
if len(subit) > 3:
# if it's greater than 3 in length, then the format is Vyyyy
vfull = 1 # add on 1 character length to account for extra space
#print (subit + " - assuming versioning. Removing from initial search pattern.")
subname = re.sub(str(subit), '', subname)
volrem = subit
#print ("removed " + str(volrem) + " from filename wording")
if subit.lower()[:3] == 'vol':
#if in format vol.2013 etc
#because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
#print ("volume detected as version #:" + str(subit))
subname = re.sub(subit, '', subname)
volrem = subit
subname = re.sub('\_', ' ', subname)
nonocount = 0
for nono in not_these:
if nono in subname:
subcnt = subname.count(nono)
#logger.fdebug(str(nono) + " detected " + str(subcnt) + " times.")
# segment '.' having a . by itself will denote the entire string which we don't want
if nono == '.':
subname = re.sub('\.', ' ', subname)
nonocount = nonocount + subcnt - 1 #(remove the extension from the length)
else:
subname = re.sub(str(nono), ' ', subname)
nonocount = nonocount + subcnt
#subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]',' ', subname)
modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_watchcomic)
detectand = False
modwatchcomic = re.sub('\&', ' and ', modwatchcomic)
modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
if '&' in subname:
subname = re.sub('\&', ' and ', subname)
detectand = True
subname = re.sub('\s+', ' ', str(subname)).strip()
if AlternateSearch is not None:
#same = encode.
u_altsearchcomic = AlternateSearch.encode('ascii', 'ignore').strip()
altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\?\@]', ' ', u_altsearchcomic)
altseachcomic = re.sub('\&', ' and ', altsearchcomic)
altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()
else:
#create random characters so it will never match.
altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
#if '_' in subname:
# subname = subname.replace('_', ' ')
#logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():
if 'annual' in subname.lower():
#print ("it's an annual - unsure how to proceed")
continue
comicpath = os.path.join(basedir, item)
logger.fdebug( modwatchcomic + " - watchlist match on : " + comicpath)
comicsize = os.path.getsize(comicpath)
#print ("Comicsize:" + str(comicsize))
comiccnt+=1
if modwatchcomic.lower() in subname.lower():
#print ("we should remove " + str(nonocount) + " characters")
#remove versioning here
if volrem != None:
jtd_len = len(modwatchcomic) + len(volrem) + nonocount + 1 #1 is to account for space btwn comic and vol #
else:
jtd_len = len(modwatchcomic) + nonocount
if detectand:
jtd_len = jtd_len - 2 # char substitution diff between & and 'and' = 2 chars
elif altsearchcomic.lower() in subname.lower():
#remove versioning here
if volrem != None:
jtd_len = len(altsearchcomic) + len(volrem) + nonocount + 1
else:
jtd_len = len(altsearchcomic) + nonocount
if detectand:
jtd_len = jtd_len - 2
justthedigits = item[jtd_len:]
comiclist.append({
'ComicFilename': item,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'JusttheDigits': justthedigits
})
watchmatch['comiclist'] = comiclist
else:
pass
#print ("directory found - ignoring")
logger.fdebug("you have a total of " + str(comiccnt) + " " + watchcomic + " comics")
watchmatch['comiccount'] = comiccnt
return watchmatch
def validateAndCreateDirectory(dir, create=False):
if os.path.exists(dir):
logger.info("Found comic directory: " + dir)
return True
else:
logger.warn("Could not find comic directory: " + dir)
if create:
if dir.strip():
logger.info("Creating comic directory ("+str(mylar.CHMOD_DIR)+") : " + dir)
try:
permission = int(mylar.CHMOD_DIR, 8)
os.umask(0) # this is probably redudant, but it doesn't hurt to clear the umask here.
os.makedirs(str(dir), permission )
except OSError:
raise SystemExit('Could not create data directory: ' + mylar.DATA_DIR + '. Exiting....')
return True
else:
logger.warn("Provided directory is blank, aborting")
return False
return False