mylar/mylar/librarysync.py

543 lines
26 KiB
Python
Executable File

# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import os
import glob
import re
import shutil
import mylar
from mylar import db, logger, helpers, importer, updater
# You can scan a single directory and append it to the current library by specifying append=True
def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None):
if cron and not mylar.LIBRARYSCAN:
return
if not dir:
dir = mylar.COMIC_DIR
# If we're appending a dir, it's coming from the post processor which is
# already bytestring
if not append:
dir = dir.encode(mylar.SYS_ENCODING)
if not os.path.isdir(dir):
logger.warn('Cannot find directory: %s. Not scanning' % dir.decode(mylar.SYS_ENCODING, 'replace'))
return
logger.info('Scanning comic directory: %s' % dir.decode(mylar.SYS_ENCODING, 'replace'))
basedir = dir
comic_list = []
comiccnt = 0
extensions = ('cbr','cbz')
for r,d,f in os.walk(dir):
#for directory in d[:]:
# if directory.startswith("."):
# d.remove(directory)
for files in f:
if any(files.lower().endswith('.' + x.lower()) for x in extensions):
comic = files
comicpath = os.path.join(r, files)
comicsize = os.path.getsize(comicpath)
print "Comic: " + comic
print "Comic Path: " + comicpath
print "Comic Size: " + str(comicsize)
# We need the unicode path to use for logging, inserting into database
unicode_comic_path = comicpath.decode(mylar.SYS_ENCODING, 'replace')
comiccnt+=1
comic_dict = { 'ComicFilename': comic,
'ComicLocation': comicpath,
'ComicSize': comicsize,
'Unicode_ComicLocation': unicode_comic_path }
comic_list.append(comic_dict)
logger.info("I've found a total of " + str(comiccnt) + " comics....analyzing now")
logger.info("comiclist: " + str(comic_list))
myDB = db.DBConnection()
#let's load in the watchlist to see if we have any matches.
logger.info("loading in the watchlist to see if a series is being watched already...")
watchlist = myDB.action("SELECT * from comics")
ComicName = []
ComicYear = []
ComicPublisher = []
ComicTotal = []
ComicID = []
ComicLocation = []
AltName = []
watchcnt = 0
watch_kchoice = []
watchchoice = {}
import_by_comicids = []
import_comicids = {}
for watch in watchlist:
# let's clean up the name, just in case for comparison purposes...
watchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['ComicName']).encode('utf-8').strip()
#watchcomic = re.sub('\s+', ' ', str(watchcomic)).strip()
if ' the ' in watchcomic.lower():
#drop the 'the' from the watchcomic title for proper comparisons.
watchcomic = watchcomic[-4:]
alt_chk = "no" # alt-checker flag (default to no)
# account for alternate names as well
if watch['AlternateSearch'] is not None and watch['AlternateSearch'] is not 'None':
altcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', watch['AlternateSearch']).encode('utf-8').strip()
#altcomic = re.sub('\s+', ' ', str(altcomic)).strip()
AltName.append(altcomic)
alt_chk = "yes" # alt-checker flag
ComicName.append(watchcomic)
ComicYear.append(watch['ComicYear'])
ComicPublisher.append(watch['ComicPublisher'])
ComicTotal.append(watch['Total'])
ComicID.append(watch['ComicID'])
ComicLocation.append(watch['ComicLocation'])
watchcnt+=1
logger.info("Successfully loaded " + str(watchcnt) + " series from your watchlist.")
ripperlist=['digital-',
'empire',
'dcp']
watchfound = 0
for i in comic_list:
print i['ComicFilename']
comfilename = i['ComicFilename']
comlocation = i['ComicLocation']
#let's clean up the filename for matching purposes
cfilename = re.sub('[\_\#\,\/\:\;\-\!\$\%\&\+\'\?\@]', ' ', comfilename)
#cfilename = re.sub('\s', '_', str(cfilename))
#versioning - remove it
subsplit = cfilename.replace('_', ' ').split()
volno = None
volyr = None
for subit in subsplit:
if subit[0].lower() == 'v':
vfull = 0
if subit[1:].isdigit():
#if in format v1, v2009 etc...
if len(subit) > 3:
# if it's greater than 3 in length, then the format is Vyyyy
vfull = 1 # add on 1 character length to account for extra space
cfilename = re.sub(subit, '', cfilename)
volno = re.sub("[^0-9]", " ", subit)
elif subit.lower()[:3] == 'vol':
#if in format vol.2013 etc
#because the '.' in Vol. gets removed, let's loop thru again after the Vol hit to remove it entirely
logger.fdebug('volume indicator detected as version #:' + str(subit))
cfilename = re.sub(subit, '', cfilename)
volyr = re.sub("[^0-9]", " ", subit)
cm_cn = 0
#we need to track the counter to make sure we are comparing the right array parts
#this takes care of the brackets :)
m = re.findall('[^()]+', cfilename)
lenm = len(m)
print ("there are " + str(lenm) + " words.")
cnt = 0
yearmatch = "false"
foundonwatch = "False"
issue = 999999
while (cnt < lenm):
if m[cnt] is None: break
if m[cnt] == ' ':
pass
else:
logger.fdebug(str(cnt) + ". Bracket Word: " + m[cnt])
if cnt == 0:
comic_andiss = m[cnt]
logger.fdebug("Comic: " + comic_andiss)
# if it's not in the standard format this will bork.
# let's try to accomodate (somehow).
# first remove the extension (if any)
extensions = ('cbr', 'cbz')
if comic_andiss.lower().endswith(extensions):
comic_andiss = comic_andiss[:-4]
print ("removed extension from filename.")
#now we have to break up the string regardless of formatting.
#let's force the spaces.
comic_andiss = re.sub('_', ' ', comic_andiss)
cs = comic_andiss.split()
cs_len = len(cs)
cn = ''
ydetected = 'no'
idetected = 'no'
decimaldetect = 'no'
for i in reversed(xrange(len(cs))):
#start at the end.
print ("word: " + str(cs[i]))
#assume once we find issue - everything prior is the actual title
#idetected = no will ignore everything so it will assume all title
if cs[i][:-2] == '19' or cs[i][:-2] == '20' and idetected == 'no':
print ("year detected: " + str(cs[i]))
ydetected = 'yes'
result_comyear = cs[i]
elif cs[i].isdigit() and idetected == 'no' or '.' in cs[i]:
issue = cs[i]
print ("issue detected : " + str(issue))
idetected = 'yes'
if '.' in cs[i]:
#make sure it's a number on either side of decimal and assume decimal issue.
decst = cs[i].find('.')
dec_st = cs[i][:decst]
dec_en = cs[i][decst+1:]
print ("st: " + str(dec_st))
print ("en: " + str(dec_en))
if dec_st.isdigit() and dec_en.isdigit():
print ("decimal issue detected...adjusting.")
issue = dec_st + "." + dec_en
print ("issue detected: " + str(issue))
idetected = 'yes'
else:
print ("false decimal represent. Chunking to extra word.")
cn = cn + cs[i] + " "
break
elif '\#' in cs[i] or decimaldetect == 'yes':
print ("issue detected: " + str(cs[i]))
idetected = 'yes'
else: cn = cn + cs[i] + " "
if ydetected == 'no':
#assume no year given in filename...
result_comyear = "0000"
print ("cm?: " + str(cn))
if issue is not '999999':
comiss = issue
else:
logger.ERROR("Invalid Issue number (none present) for " + comfilename)
break
cnsplit = cn.split()
cname = ''
findcn = 0
while (findcn < len(cnsplit)):
cname = cname + cs[findcn] + " "
findcn+=1
cname = cname[:len(cname)-1] # drop the end space...
print ("assuming name is : " + cname)
com_NAME = cname
print ("com_NAME : " + com_NAME)
yearmatch = "True"
else:
# we're assuming that the year is in brackets (and it should be damnit)
if m[cnt][:-2] == '19' or m[cnt][:-2] == '20':
print ("year detected: " + str(m[cnt]))
ydetected = 'yes'
result_comyear = m[cnt]
cnt+=1
splitit = []
watchcomic_split = []
logger.fdebug("filename comic and issue: " + comic_andiss)
#changed this from '' to ' '
comic_iss_b4 = re.sub('[\-\:\,]', ' ', comic_andiss)
comic_iss = comic_iss_b4.replace('.',' ')
comic_iss = re.sub('[\s+]', ' ', comic_iss).strip()
logger.fdebug("adjusted comic and issue: " + str(comic_iss))
#remove 'the' from here for proper comparisons.
if ' the ' in comic_iss.lower():
comic_iss = comic_iss[-4:]
splitit = comic_iss.split(None)
logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss))
#bmm = re.findall('v\d', comic_iss)
#if len(bmm) > 0: splitst = len(splitit) - 2
#else: splitst = len(splitit) - 1
#-----
#here we cycle through the Watchlist looking for a match.
while (cm_cn < watchcnt):
#setup the watchlist
comname = ComicName[cm_cn]
print ("watch_comic:" + comname)
comyear = ComicYear[cm_cn]
compub = ComicPublisher[cm_cn]
comtotal = ComicTotal[cm_cn]
comicid = ComicID[cm_cn]
watch_location = ComicLocation[cm_cn]
# if splitit[(len(splitit)-1)].isdigit():
# #compares - if the last digit and second last digit are #'s seperated by spaces assume decimal
# comic_iss = splitit[(len(splitit)-1)]
# splitst = len(splitit) - 1
# if splitit[(len(splitit)-2)].isdigit():
# # for series that have a digit at the end, it screws up the logistics.
# i = 1
# chg_comic = splitit[0]
# while (i < (len(splitit)-1)):
# chg_comic = chg_comic + " " + splitit[i]
# i+=1
# logger.fdebug("chg_comic:" + str(chg_comic))
# if chg_comic.upper() == comname.upper():
# logger.fdebug("series contains numerics...adjusting..")
# else:
# changeup = "." + splitit[(len(splitit)-1)]
# logger.fdebug("changeup to decimal: " + str(changeup))
# comic_iss = splitit[(len(splitit)-2)] + "." + comic_iss
# splitst = len(splitit) - 2
# else:
# if the nzb name doesn't follow the series-issue-year format even closely..ignore nzb
# logger.fdebug("invalid naming format of filename detected - cannot properly determine issue")
# continue
# make sure that things like - in watchcomic are accounted for when comparing to nzb.
# there shouldn't be an issue in the comic now, so let's just assume it's all gravy.
splitst = len(splitit)
watchcomic_split = helpers.cleanName(comname)
watchcomic_split = re.sub('[\-\:\,\.]', ' ', watchcomic_split).split(None)
logger.fdebug(str(splitit) + " file series word count: " + str(splitst))
logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split)))
if (splitst) != len(watchcomic_split):
logger.fdebug("incorrect comic lengths...not a match")
# if str(splitit[0]).lower() == "the":
# logger.fdebug("THE word detected...attempting to adjust pattern matching")
# splitit[0] = splitit[4:]
else:
logger.fdebug("length match..proceeding")
n = 0
scount = 0
logger.fdebug("search-length: " + str(splitst))
logger.fdebug("Watchlist-length: " + str(len(watchcomic_split)))
while ( n <= (splitst)-1 ):
logger.fdebug("splitit: " + str(splitit[n]))
if n < (splitst) and n < len(watchcomic_split):
logger.fdebug(str(n) + " Comparing: " + str(watchcomic_split[n]) + " .to. " + str(splitit[n]))
if '+' in watchcomic_split[n]:
watchcomic_split[n] = re.sub('+', '', str(watchcomic_split[n]))
if str(watchcomic_split[n].lower()) in str(splitit[n].lower()) and len(watchcomic_split[n]) >= len(splitit[n]):
logger.fdebug("word matched on : " + str(splitit[n]))
scount+=1
#elif ':' in splitit[n] or '-' in splitit[n]:
# splitrep = splitit[n].replace('-', '')
# print ("non-character keyword...skipped on " + splitit[n])
elif str(splitit[n]).lower().startswith('v'):
logger.fdebug("possible versioning..checking")
#we hit a versioning # - account for it
if splitit[n][1:].isdigit():
comicversion = str(splitit[n])
logger.fdebug("version found: " + str(comicversion))
else:
logger.fdebug("Comic / Issue section")
if splitit[n].isdigit():
logger.fdebug("issue detected")
#comiss = splitit[n]
# comicNAMER = n - 1
# com_NAME = splitit[0]
# cmnam = 1
# while (cmnam <= comicNAMER):
# com_NAME = str(com_NAME) + " " + str(splitit[cmnam])
# cmnam+=1
# logger.fdebug("comic: " + str(com_NAME))
else:
logger.fdebug("non-match for: "+ str(splitit[n]))
pass
n+=1
#set the match threshold to 80% (for now)
# if it's less than 80% consider it a non-match and discard.
#splitit has to splitit-1 because last position is issue.
wordcnt = int(scount)
logger.fdebug("scount:" + str(wordcnt))
totalcnt = int(splitst)
logger.fdebug("splitit-len:" + str(totalcnt))
spercent = (wordcnt/totalcnt) * 100
logger.fdebug("we got " + str(spercent) + " percent.")
if int(spercent) >= 80:
logger.fdebug("it's a go captain... - we matched " + str(spercent) + "%!")
logger.fdebug("this should be a match!")
# if '.' in comic_iss:
# comisschk_find = comic_iss.find('.')
# comisschk_b4dec = comic_iss[:comisschk_find]
# comisschk_decval = comic_iss[comisschk_find+1:]
# logger.fdebug("Found IssueNumber: " + str(comic_iss))
# logger.fdebug("..before decimal: " + str(comisschk_b4dec))
# logger.fdebug("...after decimal: " + str(comisschk_decval))
# #--let's make sure we don't wipe out decimal issues ;)
# if int(comisschk_decval) == 0:
# ciss = comisschk_b4dec
# cintdec = int(comisschk_decval)
# else:
# if len(comisschk_decval) == 1:
# ciss = comisschk_b4dec + "." + comisschk_decval
# cintdec = int(comisschk_decval) * 10
# else:
# ciss = comisschk_b4dec + "." + comisschk_decval.rstrip('0')
# cintdec = int(comisschk_decval.rstrip('0')) * 10
# comintIss = (int(comisschk_b4dec) * 1000) + cintdec
# else:
# comintIss = int(comic_iss) * 1000
logger.fdebug("issue we found for is : " + str(comiss))
#set the year to the series we just found ;)
result_comyear = comyear
#issue comparison now as well
logger.info(u"Found " + comname + " (" + str(comyear) + ") issue: " + str(comiss))
# watchfound+=1
watchmatch = str(comicid)
# watch_kchoice.append({
# "ComicID": str(comicid),
# "ComicName": str(comname),
# "ComicYear": str(comyear),
# "ComicIssue": str(int(comic_iss)),
# "ComicLocation": str(watch_location),
# "OriginalLocation" : str(comlocation),
# "OriginalFilename" : str(comfilename)
# })
foundonwatch = "True"
break
elif int(spercent) < 80:
logger.fdebug("failure - we only got " + str(spercent) + "% right!")
cm_cn+=1
if foundonwatch == "False":
watchmatch = None
#---if it's not a match - send it to the importer.
n = 0
# print ("comic_andiss : " + str(comic_andiss))
# csplit = comic_andiss.split(None)
# while ( n <= (len(csplit)-1) ):
# print ("csplit:" + str(csplit[n]))
# if csplit[n].isdigit():
# logger.fdebug("issue detected")
# comiss = splitit[n]
# logger.fdebug("issue # : " + str(comiss))
# comicNAMER = n - 1
# com_NAME = csplit[0]
# cmnam = 1
# while (cmnam <= comicNAMER):
# com_NAME = str(com_NAME) + " " + str(csplit[cmnam])
# cmnam+=1
# logger.fdebug("comic: " + str(com_NAME))
# n+=1
if volyr is None:
if result_comyear is None:
result_comyear = '0000' #no year in filename basically.
else:
if result_comyear is None:
result_comyear = volyr
print ("adding " + com_NAME + " to the import-queue!")
impid = com_NAME + "-" + str(result_comyear) + "-" + str(comiss)
print ("impid: " + str(impid))
import_by_comicids.append({
"impid": impid,
"watchmatch": watchmatch,
"comicname" : com_NAME,
"comicyear" : result_comyear,
"comfilename" : comfilename,
"comlocation" : comlocation.decode(mylar.SYS_ENCODING)
})
if len(watch_kchoice) > 0:
watchchoice['watchlist'] = watch_kchoice
print ("watchchoice: " + str(watchchoice))
logger.info("I have found " + str(watchfound) + " out of " + str(comiccnt) + " comics for series that are being watched.")
wat = 0
comicids = []
if watchfound > 0:
if mylar.IMP_MOVE:
logger.info("You checked off Move Files...so that's what I'm going to do")
#check to see if Move Files is enabled.
#if not being moved, set the archive bit.
print("Moving files into appropriate directory")
while (wat < watchfound):
watch_the_list = watchchoice['watchlist'][wat]
watch_comlocation = watch_the_list['ComicLocation']
watch_comicid = watch_the_list['ComicID']
watch_comicname = watch_the_list['ComicName']
watch_comicyear = watch_the_list['ComicYear']
watch_comiciss = watch_the_list['ComicIssue']
print ("ComicLocation: " + str(watch_comlocation))
orig_comlocation = watch_the_list['OriginalLocation']
orig_filename = watch_the_list['OriginalFilename']
print ("Orig. Location: " + str(orig_comlocation))
print ("Orig. Filename: " + str(orig_filename))
#before moving check to see if Rename to Mylar structure is enabled.
if mylar.IMP_RENAME:
print("Renaming files according to configuration details : " + str(mylar.FILE_FORMAT))
renameit = helpers.rename_param(watch_comicid, watch_comicname, watch_comicyear, watch_comiciss)
nfilename = renameit['nfilename']
dst_path = os.path.join(watch_comlocation,nfilename)
if str(watch_comicid) not in comicids:
comicids.append(watch_comicid)
else:
print("Renaming files not enabled, keeping original filename(s)")
dst_path = os.path.join(watch_comlocation,orig_filename)
#os.rename(os.path.join(self.nzb_folder, str(ofilename)), os.path.join(self.nzb_folder,str(nfilename + ext)))
#src = os.path.join(, str(nfilename + ext))
print ("I'm going to move " + str(orig_comlocation) + " to .." + str(dst_path))
try:
shutil.move(orig_comlocation, dst_path)
except (OSError, IOError):
logger.info("Failed to move directory - check directories and manually re-run.")
wat+=1
else:
# if move files isn't enabled, let's set all found comics to Archive status :)
while (wat < watchfound):
watch_the_list = watchchoice['watchlist'][wat]
watch_comicid = watch_the_list['ComicID']
watch_issue = watch_the_list['ComicIssue']
print ("ComicID: " + str(watch_comicid))
print ("Issue#: " + str(watch_issue))
issuechk = myDB.action("SELECT * from issues where ComicID=? AND INT_IssueNumber=?", [watch_comicid, watch_issue]).fetchone()
if issuechk is None:
print ("no matching issues for this comic#")
else:
print("...Existing status: " + str(issuechk['Status']))
control = {"IssueID": issuechk['IssueID']}
values = { "Status": "Archived"}
print ("...changing status of " + str(issuechk['Issue_Number']) + " to Archived ")
myDB.upsert("issues", values, control)
if str(watch_comicid) not in comicids:
comicids.append(watch_comicid)
wat+=1
if comicids is None: pass
else:
c_upd = len(comicids)
c = 0
while (c < c_upd ):
print ("Rescanning.. " + str(c))
updater.forceRescan(c)
if not len(import_by_comicids):
return "Completed"
if len(import_by_comicids) > 0:
import_comicids['comic_info'] = import_by_comicids
print ("import comicids: " + str(import_by_comicids))
return import_comicids, len(import_by_comicids)