mylar/mylar/filechecker.py

#/usr/bin/env python
#  This file is part of Mylar.
#
#  Mylar is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  Mylar is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Mylar.  If not, see <http://www.gnu.org/licenses/>.

import os
import os.path
import pprint
import subprocess
import re
import logger

def file2comicmatch(watchmatch):
    #print ("match: " + str(watchmatch))
    pass

def listFiles(dir,watchcomic,AlternateSearch=None):
    # use AlternateSearch to check for filenames that follow that naming pattern
    # ie. Star Trek TNG Doctor Who Assimilation won't get hits as the 
    # checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)
    
    # we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up
    u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()    
    logger.fdebug("comic: " + watchcomic)
    basedir = dir
    logger.fdebug("Looking in: " + dir)
    watchmatch = {}
    comiclist = []
    comiccnt = 0
    for item in os.listdir(basedir):
        #print item
        #subname = os.path.join(basedir, item)
        subname = item
        #print subname
        subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname))
        modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_watchcomic)
        modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()
        #versioning - remove it
        subsplit = subname.split()
        for subit in subsplit:
            if 'v' in str(subit):
                #print ("possible versioning detected.")
                if subit[1:].isdigit():
                    #print (subit + "  - assuming versioning. Removing from initial search pattern.")
                    subname = re.sub(str(subit), '', subname)
                
        subname = re.sub('\s+', ' ', str(subname)).strip()
        if AlternateSearch is not None:
            #same = encode.
            u_altsearchcomic = AlternateSearch.encode('ascii', 'ignore').strip()
            altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_altsearchcomic)
            altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()       
        else:
            #create random characters so it will never match.
            altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"
        #if '_' in subname:
        #    subname = subname.replace('_', ' ')
        logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))
        if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():
            if 'annual' in subname.lower():
                #print ("it's an annual - unsure how to proceed")
                continue
            comicpath = os.path.join(basedir, item)
            logger.fdebug( modwatchcomic + " - watchlist match on : " + comicpath)
            comicsize = os.path.getsize(comicpath)
            #print ("Comicsize:" + str(comicsize))
            comiccnt+=1
            if modwatchcomic.lower() in subname.lower():
                jtd_len = len(modwatchcomic)
                justthedigits = item[jtd_len:]
            elif altsearchcomic.lower() in subname.lower():
                jtd_len = len(altsearchcomic)
                justthedigits = item[jtd_len:]
            comiclist.append({
                 'ComicFilename':           item,
                 'ComicLocation':           comicpath,
                 'ComicSize':               comicsize,
                 'JusttheDigits':           justthedigits
                 })
            watchmatch['comiclist'] = comiclist
        else:
            pass
            #print ("directory found - ignoring")
    logger.fdebug("you have a total of " + str(comiccnt) + " " + watchcomic + " comics")
    watchmatch['comiccount'] = comiccnt
    return watchmatch
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`#/usr/bin/env python`
			`# This file is part of Mylar.`
			`#`
			`# Mylar is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# Mylar is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with Mylar. If not, see <http://www.gnu.org/licenses/>.`

			`import os`
			`import os.path`
			`import pprint`
			`import subprocess`
Fix: decimal issue should be fixed for both searching and file checking, Fix: blackhole nzbs in-line for post-processing, Imp: Alternate Naming allowed for searching now(Comic Detail/Edit tab) 2012-12-31 16:52:16 +00:00			`import re`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`import logger`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00
			`def file2comicmatch(watchmatch):`
			`#print ("match: " + str(watchmatch))`
			`pass`

FIX: Alternate Search Names (if given) are also used by the filescanner now 2013-01-07 20:35:17 +00:00			`def listFiles(dir,watchcomic,AlternateSearch=None):`
			`# use AlternateSearch to check for filenames that follow that naming pattern`
			`# ie. Star Trek TNG Doctor Who Assimilation won't get hits as the`
			`# checker looks for Star Trek TNG Doctor Who Assimilation2 (according to CV)`
FIX: Fixed some issues with unicode conversions and handling special characters 2013-03-08 01:36:36 +00:00
			`# we need to convert to ascii, as watchcomic is utf-8 and special chars f'it up`
			`u_watchcomic = watchcomic.encode('ascii', 'ignore').strip()`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`logger.fdebug("comic: " + watchcomic)`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`basedir = dir`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`logger.fdebug("Looking in: " + dir)`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`watchmatch = {}`
			`comiclist = []`
			`comiccnt = 0`
			`for item in os.listdir(basedir):`
			`#print item`
Fix: decimal issue should be fixed for both searching and file checking, Fix: blackhole nzbs in-line for post-processing, Imp: Alternate Naming allowed for searching now(Comic Detail/Edit tab) 2012-12-31 16:52:16 +00:00			`#subname = os.path.join(basedir, item)`
			`subname = item`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`#print subname`
Fix: decimal issue should be fixed for both searching and file checking, Fix: blackhole nzbs in-line for post-processing, Imp: Alternate Naming allowed for searching now(Comic Detail/Edit tab) 2012-12-31 16:52:16 +00:00			`subname = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]',' ', str(subname))`
FIX: Fixed some issues with unicode conversions and handling special characters 2013-03-08 01:36:36 +00:00			`modwatchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_watchcomic)`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`modwatchcomic = re.sub('\s+', ' ', str(modwatchcomic)).strip()`
FIX: If the actual filenames contained a vX (Version #), filechecker would ignore it 2013-02-20 19:34:55 +00:00			`#versioning - remove it`
			`subsplit = subname.split()`
			`for subit in subsplit:`
			`if 'v' in str(subit):`
			`#print ("possible versioning detected.")`
			`if subit[1:].isdigit():`
			`#print (subit + " - assuming versioning. Removing from initial search pattern.")`
			`subname = re.sub(str(subit), '', subname)`

Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`subname = re.sub('\s+', ' ', str(subname)).strip()`
FIX: Alternate Search Names (if given) are also used by the filescanner now 2013-01-07 20:35:17 +00:00			`if AlternateSearch is not None:`
FIX: Fixed some issues with unicode conversions and handling special characters 2013-03-08 01:36:36 +00:00			`#same = encode.`
			`u_altsearchcomic = AlternateSearch.encode('ascii', 'ignore').strip()`
			`altsearchcomic = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\&\+\'\?\@]', ' ', u_altsearchcomic)`
FIX: Alternate Search Names (if given) are also used by the filescanner now 2013-01-07 20:35:17 +00:00			`altsearchcomic = re.sub('\s+', ' ', str(altsearchcomic)).strip()`
			`else:`
			`#create random characters so it will never match.`
			`altsearchcomic = "127372873872871091383 abdkhjhskjhkjdhakajhf"`
Fix: decimal issue should be fixed for both searching and file checking, Fix: blackhole nzbs in-line for post-processing, Imp: Alternate Naming allowed for searching now(Comic Detail/Edit tab) 2012-12-31 16:52:16 +00:00			`#if '_' in subname:`
			`# subname = subname.replace('_', ' ')`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`logger.fdebug("watchcomic:" + str(modwatchcomic) + " ..comparing to found file: " + str(subname))`
FIX: Alternate Search Names (if given) are also used by the filescanner now 2013-01-07 20:35:17 +00:00			`if modwatchcomic.lower() in subname.lower() or altsearchcomic.lower() in subname.lower():`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`if 'annual' in subname.lower():`
several bug fixes - hopefully not breaking something else..new search, better adds, force chk works, other bugs fixed. 2012-09-24 05:17:29 +00:00			`#print ("it's an annual - unsure how to proceed")`
tonnes of fixes/updates in this...new post-processing (like sickbeard), custom format for folers/files now, comic detail image fix, status changes (archived status added), have counts reflected on Archived status, file-scanner redone to be more inclusive, 1/2 issue fix 2012-10-30 10:43:01 +00:00			`continue`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`comicpath = os.path.join(basedir, item)`
Fix: Recheck files/scanning of files resulted in missing files even if present (issue#127) 2013-01-05 03:00:13 +00:00			`logger.fdebug( modwatchcomic + " - watchlist match on : " + comicpath)`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`comicsize = os.path.getsize(comicpath)`
			`#print ("Comicsize:" + str(comicsize))`
			`comiccnt+=1`
FIX:(#162) Sab Download directory option available for Post-Processing, FIX:(#195) Series that had digits in the title would error out occasionally on File Checking, IMP: Minimum/Maximum size restrictions available for Experimental Search ONLY, IMP: some further work on the directory import screens 2013-02-09 03:34:02 +00:00			`if modwatchcomic.lower() in subname.lower():`
			`jtd_len = len(modwatchcomic)`
			`justthedigits = item[jtd_len:]`
			`elif altsearchcomic.lower() in subname.lower():`
			`jtd_len = len(altsearchcomic)`
			`justthedigits = item[jtd_len:]`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`comiclist.append({`
			`'ComicFilename': item,`
			`'ComicLocation': comicpath,`
FIX:(#162) Sab Download directory option available for Post-Processing, FIX:(#195) Series that had digits in the title would error out occasionally on File Checking, IMP: Minimum/Maximum size restrictions available for Experimental Search ONLY, IMP: some further work on the directory import screens 2013-02-09 03:34:02 +00:00			`'ComicSize': comicsize,`
			`'JusttheDigits': justthedigits`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`})`
			`watchmatch['comiclist'] = comiclist`
			`else:`
			`pass`
			`#print ("directory found - ignoring")`
FIX: Fixed some issues with unicode conversions and handling special characters 2013-03-08 01:36:36 +00:00			`logger.fdebug("you have a total of " + str(comiccnt) + " " + watchcomic + " comics")`
attempted fix for : and & in Comic Titles when adding 2012-09-13 15:27:34 +00:00			`watchmatch['comiccount'] = comiccnt`
			`return watchmatch`