FIX:(#817) If series contained issue that was a lone alpha (ie. issue X), would fail everywhere. Adjusted to allow for adding / searching / filechecking, FIX: Unicoded Issue Numbers that are fractions for GUI display, FIX: Fraction Issue Numbers ( 1/2, 3/4, etc) would round up for filechecking purposes and would cause problems when the series contained more than one identical issue number

2014-09-08 04:35:15 -04:00 · 2014-09-08 04:35:15 -04:00 · 4e46e91b8b
parent 2173614836
commit 4e46e91b8b
5 changed files with 141 additions and 50 deletions
--- a/mylar/PostProcessor.py
+++ b/mylar/PostProcessor.py
@ -556,7 +556,10 @@ class PostProcessor(object):
            logger.fdebug(module + ' Zero Suppression set to : ' + str(mylar.ZERO_LEVEL_N))

            if str(len(issueno)) > 1:
-                if int(issueno) < 0:
+                if issueno.isalpha():
+                    self._log('issue detected as an alpha.')
+                    prettycomiss = str(issueno)
+                elif int(issueno) < 0:
                    self._log("issue detected is a negative")
                    prettycomiss = '-' + str(zeroadd) + str(abs(issueno))
                elif int(issueno) < 10:
--- a/mylar/filechecker.py
+++ b/mylar/filechecker.py
@ -66,7 +66,8 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non
                      'AI', 
                      'A',
                      'B',
-                      'C']
+                      'C',
+                      'X']

    extensions = ('.cbr', '.cbz')

@ -256,8 +257,9 @@ def listFiles(dir,watchcomic,Publisher,AlternateSearch=None,manual=None,sarc=Non
                    watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic)   #remove spec chars for watchcomic match.
                    subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
                else:
-                    watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', watchcomic)   #remove spec chars for watchcomic match.
-                    subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\.\-]', '', subthis)
+                    # in order to get series like Earth 2 scanned in that contain a decimal, I removed the \. from the re.subs below - 28-08-2014
+                    watchname = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', watchcomic)   #remove spec chars for watchcomic match.
+                    subthis = re.sub('[\:\;\!\'\/\?\+\=\_\%\-]', '', subthis)
                logger.fdebug('[FILECHECKER] watch-cleaned: ' + watchname)
                subthis = re.sub('\s+',' ', subthis)
                logger.fdebug('[FILECHECKER] sub-cleaned: ' + subthis)
--- a/mylar/helpers.py
+++ b/mylar/helpers.py
@ -171,7 +171,7 @@ def human2bytes(s):
 def replace_all(text, dic):
    for i, j in dic.iteritems():
        text = text.replace(i, j)
-    return text
+    return text.rstrip()
    
 def cleanName(string):

@ -734,7 +734,7 @@ def cleanhtml(raw_html):
 def issuedigits(issnum):
    import db, logger
    #print "issnum : " + str(issnum)
-    if str(issnum).isdigit():
+    if issnum.isdigit():
        int_issnum = int( issnum ) * 1000
    else:
        #count = 0
@ -767,14 +767,11 @@ def issuedigits(issnum):
            else:
                int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w')
        elif u'\xbd' in issnum:
-            issnum = .5
-            int_issnum = int(issnum) * 1000
+            int_issnum = .5 * 1000
        elif u'\xbc' in issnum:
-            issnum = .25
-            int_issnum = int(issnum) * 1000
+            int_issnum = .25 * 1000
        elif u'\xbe' in issnum:
-            issnum = .75
-            int_issnum = int(issnum) * 1000
+            int_issnum = .75 * 1000
        elif u'\u221e' in issnum:
            #issnum = utf-8 will encode the infinity symbol without any help
            int_issnum = 9999999999 * 1000  # set 9999999999 for integer value of issue
@ -820,6 +817,8 @@ def issuedigits(issnum):
                        try:
                            isschk = float(issno)
                        except ValueError, e:
+                            if len(issnum) == 1 and issnum.isalpha():
+                                break
                            logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.')
                            issno = None
                            tstord = None
@ -827,18 +826,18 @@ def issuedigits(issnum):
                        break
                    x+=1
                if tstord is not None and issno is not None:
-                    logger.fdebug('tstord: ' + str(tstord))
                    a = 0
                    ordtot = 0
-                    while (a < len(tstord)):
-                        try:
-                            ordtot += ord(tstord[a].lower())  #lower-case the letters for simplicty
-                        except ValueError:
-                            break
-                        a+=1
-                    logger.fdebug('issno: ' + str(issno))
-                    int_issnum = (int(issno) * 1000) + ordtot
-                    logger.fdebug('intissnum : ' + str(int_issnum))
+                    if len(issnum) == 1 and issnum.isalpha():
+                        int_issnum = ord(tstord.lower())
+                    else:
+                        while (a < len(tstord)):
+                            try:
+                                ordtot += ord(tstord[a].lower())  #lower-case the letters for simplicty
+                            except ValueError:
+                                break
+                            a+=1
+                        int_issnum = (int(issno) * 1000) + ordtot
                elif invchk == "true":
                    logger.fdebug('this does not have an issue # that I can parse properly.')
                    int_issnum = 999999999999999
@ -1395,6 +1394,14 @@ def IssueDetails(filelocation, IssueID=None):

    return issuedetails

+def get_issue_title(IssueID):
+    import db
+    myDB = db.DBConnection()
+    issue = myDB.selectone('SELECT * FROM issues WHERE IssueID=?', [IssueID]).fetchone()
+    if issue is None:
+        logger.warn('Unable to locate given IssueID within the db.')
+        return None
+    return issue['IssueName']


 from threading import Thread
--- a/mylar/importer.py
+++ b/mylar/importer.py
@ -1110,14 +1110,12 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
                elif 'now' in issnum.lower():
                    int_issnum = (int(issnum[:-4]) * 1000) + ord('n') + ord('o') + ord('w')
                elif u'\xbd' in issnum:
-                    issnum = .5
-                    int_issnum = int(issnum) * 1000
+                    int_issnum = .5 * 1000
+                    logger.info('1/2 issue detected :' + issnum + ' === ' + str(int_issnum))
                elif u'\xbc' in issnum:
-                    issnum = .25
-                    int_issnum = int(issnum) * 1000
+                    int_issnum = .25 * 1000
                elif u'\xbe' in issnum:
-                    issnum = .75
-                    int_issnum = int(issnum) * 1000
+                    int_issnum = .75 * 1000
                elif u'\u221e' in issnum:
                    #issnum = utf-8 will encode the infinity symbol without any help
                    int_issnum = 9999999999 * 1000  # set 9999999999 for integer value of issue
@ -1167,10 +1165,13 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
                            if issnum[x].isalpha():
                                #take first occurance of alpha in string and carry it through
                                tstord = issnum[x:].rstrip()
-                                issno = issnum[:x].rstrip()
+                                issno = issnum[:x+1].rstrip()
                                try:
                                    isschk = float(issno)
                                except ValueError, e:
+                                    if len(issnum) == 1 and issnum.isalpha():
+                                        logger.fdebug('detected lone alpha issue. Attempting to figure this out.')
+                                        break
                                    logger.fdebug('invalid numeric for issue - cannot be found. Ignoring.')
                                    issno = None
                                    tstord = None
@ -1178,15 +1179,15 @@ def updateissuedata(comicid, comicname=None, issued=None, comicIssues=None, call
                                break
                            x+=1
                        if tstord is not None and issno is not None:
-                            logger.fdebug('tstord: ' + str(tstord))
                            a = 0
                            ordtot = 0
-                            while (a < len(tstord)):
-                                ordtot += ord(tstord[a].lower())  #lower-case the letters for simplicty
-                                a+=1
-                            logger.fdebug('issno: ' + str(issno))
-                            int_issnum = (int(issno) * 1000) + ordtot
-                            logger.fdebug('intissnum : ' + str(int_issnum))
+                            if len(issno) == 1 and issnum.isalpha():
+                                int_issnum = ord(tstord.lower())
+                            else:
+                                while (a < len(tstord)):
+                                    ordtot += ord(tstord[a].lower())  #lower-case the letters for simplicty
+                                    a+=1
+                                int_issnum = (int(issno) * 1000) + ordtot
                        elif invchk == "true":
                            logger.fdebug('this does not have an issue # that I can parse properly.')
                            return
--- a/mylar/search.py
+++ b/mylar/search.py
@ -42,6 +42,10 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
    else: ComicYear = str(ComicYear)[:4]
    if Publisher == 'IDW Publishing': Publisher = 'IDW'
    logger.fdebug('Publisher is : ' + str(Publisher))
+
+    issuetitle = helpers.get_issue_title(IssueID)
+    logger.info('Issue Title given as : ' + str(issuetitle))
+
    if mode == 'want_ann':
        logger.info("Annual issue search detected. Appending to issue #")
        #anything for mode other than None indicates an annual.
@ -191,7 +195,7 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
                searchprov = prov_order[prov_count].lower()

            if searchmode == 'rss':
-                findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID)
+                findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle)
                if findit == 'yes':
                    logger.fdebug("findit = found!")
                    break
@ -204,13 +208,13 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
                        for calt in chkthealt:
                            AS_Alternate = re.sub('##','',calt)
                            logger.info(u"Alternate Search pattern detected...re-adjusting to : " + str(AS_Alternate) + " " + str(ComicYear))
-                            findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID)
+                            findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, RSS="yes", ComicID=ComicID, issuetitle=issuetitle)
                            if findit == 'yes':
                                break
                        if findit == 'yes': break

            else:
-                findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID)
+                findit = NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID, issuetitle=issuetitle)
                if findit == 'yes':
                    logger.fdebug("findit = found!")
                    break
@ -223,14 +227,14 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD
                        for calt in chkthealt:
                            AS_Alternate = re.sub('##','',calt)
                            logger.info(u"Alternate Search pattern detected...re-adjusting to : " + str(AS_Alternate) + " " + str(ComicYear))
-                            findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID)
+                            findit = NZB_SEARCH(AS_Alternate, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, searchprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host, ComicVersion=ComicVersion, SARC=SARC, IssueArcID=IssueArcID, ComicID=ComicID, issuetitle=issuetitle)
                            if findit == 'yes':
                                break
                        if findit == 'yes': break

            if searchprov == 'newznab':
                searchprov = newznab_host[0].rstrip()
-            logger.info('Could not find Issue ' + str(IssueNumber) + ' of ' + ComicName + '(' + str(SeriesYear) + ') using ' + str(searchprov))
+            logger.info('Could not find Issue ' + IssueNumber + ' of ' + ComicName + '(' + str(SeriesYear) + ') using ' + str(searchprov))
            prov_count+=1
            #torprtmp+=1  #torprtmp-=1

@ -245,7 +249,7 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueD

    return findit, 'None'

-def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, nzbprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host=None, ComicVersion=None, SARC=None, IssueArcID=None, RSS=None, ComicID=None):
+def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDate, StoreDate, nzbprov, prov_count, IssDateFix, IssueID, UseFuzzy, newznab_host=None, ComicVersion=None, SARC=None, IssueArcID=None, RSS=None, ComicID=None, issuetitle=None):
    
    if nzbprov == 'nzb.su':
        apikey = mylar.NZBSU_APIKEY
@ -276,7 +280,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
            tmpprov = name_newznab + ' (' + nzbprov + ')'
        else:
            tmpprov = nzbprov
-    logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + str(IssueNumber) + " (" + str(ComicYear) + ") using " + str(tmpprov))
+    logger.info(u"Shhh be very quiet...I'm looking for " + ComicName + " issue: " + IssueNumber + " (" + str(ComicYear) + ") using " + str(tmpprov))

    #load in do not download db here for given series
    #myDB = db.DBConnection()
@ -322,7 +326,17 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
   #NEW ---
    intIss = helpers.issuedigits(IssueNumber)
    iss = IssueNumber
-    findcomiciss = iss
+    if u'\xbd' in IssueNumber:
+        findcomiciss = '0.5' 
+    elif u'\xbc' in IssueNumber:
+        findcomiciss = '0.25'
+    elif u'\xbe' in IssueNumber:
+        findcomiciss = '0.75'
+    elif u'\u221e' in IssueNumber:
+        #issnum = utf-8 will encode the infinity symbol without any help
+        findcomiciss = 'infinity'  # set 9999999999 for integer value of issue
+    else:
+        findcomiciss = iss

    #print ("we need : " + str(findcomic[findcount]) + " issue: #" + str(findcomiciss[findcount]))
    cm1 = re.sub("[\/]", " ", findcomic)
@ -835,6 +849,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                    comic_iss = comic_iss_b4.replace('.',' ')
                    #if issue_except: comic_iss = re.sub(issue_except.lower(), '', comic_iss)
                    logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss))
+
                    splitit = comic_iss.split(None)
                    #something happened to dognzb searches or results...added a '.' in place of spaces
                    #screwed up most search results with dognzb. Let's try to adjust.
@ -877,7 +892,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                            if tmpiss[i].isalpha():
                            #take first occurance of alpha in string and carry it through
                                alphas = tmpiss[i:].rstrip()
-                                a_issno = tmpiss[:i].rstrip()
+                                a_issno = tmpiss[:i+1].rstrip()
                                break
                            i+=1
                        logger.fdebug("alphas: " + str(alphas))
@ -1000,9 +1015,61 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                    initialchk = 'ok'
                    if (splitst) != len(watchcomic_split):
                        logger.fdebug("incorrect comic lengths...not a match")
-                        #because the word 'the' can appear anywhere and really mess up matches...
-#                        if str(splitit[0]).lower() == "the" or str(watchcomic_split[0]).lower() == "the":
-#                            if str(splitit[0]).lower() == "the":
+
+                        issuetitle = re.sub('[\-\:\,\?\.]', ' ', str(issuetitle))
+                        issuetitle_words = issuetitle.split(None)
+                        #issue title comparison here:
+                        logger.fdebug('there are ' + str(len(issuetitle_words)) + ' words in the issue title of : ' + str(issuetitle))
+                        # we minus 1 the splitst since the issue # is included in there.
+                        if (splitst - 1) > len(watchcomic_split):  
+                            extra_words = splitst - len(watchcomic_split)
+                            logger.fdebug('there are ' + str(extra_words) + ' left over after we remove the series title.')
+                            wordcount = 1
+                            #remove the series title here so we just have the 'hopefully' issue title
+                            for word in splitit:
+                                #logger.info('word: ' + str(word))
+                                if wordcount > len(watchcomic_split):
+                                    #logger.info('wordcount: ' + str(wordcount))
+                                    #logger.info('watchcomic_split: ' + str(len(watchcomic_split)))
+                                    if wordcount - len(watchcomic_split) == 1:
+                                        search_issue_title = word
+                                    else:
+                                        search_issue_title += ' ' + word
+                                wordcount +=1
+
+                            logger.fdebug('search_issue_title is : ' + str(search_issue_title))
+                            #now we have the nzb issue title (if it exists), let's break it down further.
+                            sit_split = search_issue_title.split(None)
+                            watch_split_count = len(issuetitle_words)
+                            wsplit = 0
+                            isstitle_match = 0   #counter to tally % match
+                            misword = 0 # counter to tally words that probably don't need to be an 'exact' match for
+                            for sit in sit_split:
+                                if sit.lower() == issuetitle_words[wsplit].lower():
+                                    logger.fdebug('word match: ' + str(sit))
+                                    isstitle_match +=1
+                                else:
+                                    if sit.lower() == 'part':
+                                        #logger.fdebug('not worrying about this word : ' + str(sit))
+                                        misword +=1
+                                    if sit.isdigit():
+                                        #logger.fdebug('found digit - possible mini-series/arc subset.')
+                                        if sit in issuetitle:
+                                            logger.fdebug('found matching numeric in issuetitle.')
+                                            isstitle_match +=1
+
+                            logger.fdebug('isstitle_match count : ' + str(isstitle_match))
+                            if isstitle_match > 0:
+                                iss_calc = int( watch_split_count / isstitle_match )
+                                logger.fdebug('iss_calc: ' + str(iss_calc) + ' %')
+                            else:
+                                iss_calc = 0
+                                logger.fdebug('0 words matched on issue title.')
+                            if int(iss_calc) > 80:
+                                logger.fdebug('>80% match on issue name. If this were implemented, this would be considered a match.')
+                        else:
+                            pass
+
                        for tstsplit in splitit:
                            if tstsplit.lower() == 'the':
                                logger.fdebug("THE word detected in found comic...attempting to adjust pattern matching")
@ -1014,6 +1081,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                                splitst = splitst - 1 #remove 'the' from start
                                logger.fdebug("comic is now : " + str(splitit))#str(comic_iss[4:]))
                                #if str(watchcomic_split[0]).lower() == "the":
+
                        for tstsplit in watchcomic_split:
                            if tstsplit.lower() == 'the':
                                logger.fdebug("THE word detected in watchcomic - attempting to adjust match.")
@ -1362,7 +1430,17 @@ def nzbname_create(provider, title=None, info=None):
            #pretty this biatch up.
            BComicName = re.sub('[\:\,\/\?]', '', str(ComicName))
            Bl_ComicName = re.sub('[\&]', 'and', str(BComicName))
-            nzbname = str(re.sub(" ", ".", str(Bl_ComicName))) + "." + str(IssueNumber) + ".(" + str(comyear) + ")"
+            if u'\xbd' in issnum:
+                str_IssueNumber = '0.5'
+            elif u'\xbc' in issnum:
+                str_IssueNumber = '0.25'
+            elif u'\xbe' in issnum:
+                str_IssueNumber = '0.75'
+            elif u'\u221e' in issnum:
+                str_IssueNumber = 'infinity'
+            else:
+                str_IssueNumber = IssueNumber
+            nzbname = str(re.sub(" ", ".", str(Bl_ComicName))) + "." + str(str_IssueNumber) + ".(" + str(comyear) + ")"

            logger.fdebug("nzb name to be used for post-processing is : " + str(nzbname))

@ -1468,7 +1546,7 @@ def searcher(nzbprov, nzbname, comicinfo, link, IssueID, ComicID, tmpprov, direc
                logger.fdebug('[FAILED_DOWNLOAD_CHECKER] This is not in the failed downloads list. Will continue with the download.')

    logger.fdebug('issues match!')
-    logger.info(u"Found " + ComicName + " (" + str(comyear) + ") issue: " + str(IssueNumber) + " using " + str(tmpprov) )
+    logger.info(u"Found " + ComicName + " (" + str(comyear) + ") issue: " + IssueNumber + " using " + str(tmpprov) )

    linkstart = os.path.splitext(link)[0]
    if nzbprov == 'nzb.su' or nzbprov == 'newznab':