Fix: nzb.su should now be working (sorry), Fix: accounted for some problems with searches not being found when titles contained - or : , Fix: SAB priority working again, Fix: adding comics starting in 2013 wouldn't work sometimes

2025-03-10 05:52:48 +00:00 · 2012-12-23 13:16:25 -05:00 · 2012-12-23 13:16:25 -05:00 · bd49656c5c
commit bd49656c5c
parent e5f8170094
2 changed files with 47 additions and 89 deletions
--- a/mylar/parseit.py
+++ b/mylar/parseit.py
@ -23,71 +23,11 @@ import datetime
 from decimal import Decimal
 from HTMLParser import HTMLParseError

-def MysterBinScrape(comsearch, comyear):
-        #comyear is publication year of comic - should result in fewer results, which means better (hopefully)
-        searchterms = str(comsearch) + "+" + str(comyear)
-        # subsetting the results by cbr/cbz will allow for better control.
-        # min/max size should be set or else *.part01's and group collections will be parsed
-        #  and will result in errors all over & no hits.
-        # min is set low enough to filter out cover-only releases and the like
-        # max is set high enough to inlude everything but collections/groups of cbr/cbz which confuse us.
-        # minsize = 9mb  maxsize = 75mb  (for now)
-	input = 'http://www.mysterbin.com/advsearch?q=' + str(searchterms) + '&match=normal&minSize=9&maxSize=75&group=alt.binaries.comics.dcp&maxAge=1269&complete=2'
-	response = urllib2.urlopen ( input )
-	try:
-            soup = BeautifulSoup ( response )
-        except HTMLParseError:
-            logger.info(u"Unable to decipher using Experimental Search. Parser problem.")            
-            return "no results"
-	cnt = len(soup.findAll("input", {"class" : "check4nzb"}))
-        logger.info(u"I found " + str(cnt) + " results doing my search...now I'm going to analyze the results.")
-
-        if cnt == 0: return "no results"
-	resultName = []
-	resultComic = []
-	n = 0
-        mres = {}
-        entries = []
-	while ( n < cnt ):
-	    resultp = soup.findAll("input", {"class" : "check4nzb"})[n]
-	    nzblink = str("http://www.mysterbin.com/nzb?c=" + resultp['value'])
-	    #print ( "nzb-link: " + str(nzblink) )
-
-	    subtxt3 = soup.findAll("div", {"class" : "divc"})[n]
-	    subres = subtxt3.find("span", {"style" : ""})
-	    blah = subres.find('a').contents[2]
-	    blah = re.sub("</?[^\W].{0,10}?>", "", str(blah))
-            #print ("Blah:" + str(blah))
-	    nook=3
-	    totlink = str(blah)
-	    while ('"' not in blah):               
-	        blah = subres.find('a').contents[nook]
-	        if '"</a>' in blah:
-                    findyenc = blah.find('"')
-                    blah = blah[findyenc:]
-                    #break
-                #print ("Blah:" + str(blah))
-	        goo = re.sub("</?[^\W].{0,10}?>", "", str(blah))
-	        #print ("goo:" + str(goo))
-    	    	totlink = totlink + str(goo)
-    	    	#print (nook, blah)
-   	   	nook+=1
-
-            #print ("exit mainloop")
-            #print (str(nzblink))
-            #print (str(totlink))
-            entries.append({
-                'title':   str(totlink),
-                'link':    str(nzblink)
-                })
-            #print (entries[n])
-            mres['entries'] = entries
-    	    n+=1
-    	#print ("FINAL: " + str(totlink))
-        return mres    
-
 def GCDScraper(ComicName, ComicYear, Total, ComicID):
    NOWyr = datetime.date.today().year
+    if datetime.date.today().month == 12:
+        NOWyr = NOWyr + 1
+        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
--- a/mylar/search.py
+++ b/mylar/search.py
@ -156,6 +156,8 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, IssueDate, IssueI

            nzbpr-=1

+        if nzbpr >= 0 and findit != 'yes':
+            logger.info(u"More than one search provider given - trying next one.")
        # ----
        if findit == 'yes': return findit
    return findit
@ -323,19 +325,25 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
                    
                    splitit = []   
                    watchcomic_split = []
-                    comic_iss_b4 = re.sub('[\-\:\,]', '', str(comic_andiss))
-                    logger.fdebug("original nzb comic and issue: " + str(comic_iss_b4))
-                    #log2file = log2file + "o.g.comic: " + str(comic_iss_b4) + "\n"
+                    logger.fdebug("original nzb comic and issue: " + str(comic_andiss)) 
+                    #changed this from '' to ' '
+                    comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(comic_andiss))
+
                    comic_iss = comic_iss_b4.replace('.',' ')
                    logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss))
                    splitit = comic_iss.split(None)
                    #something happened to dognzb searches or results...added a '.' in place of spaces
                    #screwed up most search results with dognzb. Let's try to adjust.
-                    watchcomic_split = findcomic[findloop].split(None)
+                    #watchcomic_split = findcomic[findloop].split(None)
                    logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss))
                    bmm = re.findall('v\d', comic_iss)
                    if len(bmm) > 0: splitst = len(splitit) - 2
                    else: splitst = len(splitit) - 1
+                    # make sure that things like - in watchcomic are accounted for when comparing to nzb.
+                    watchcomic_split = re.sub('[\-\:\,]', ' ', findcomic[findloop]).split(None)
+
+                    logger.fdebug(str(splitit) + " nzb series word count: " + str(splitst))
+                    logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split)))
                    if (splitst) != len(watchcomic_split):
                        logger.fdebug("incorrect comic lengths...not a match")
                        if str(splitit[0]).lower() == "the":
@ -490,30 +498,43 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is

                                    logger.info(u"Sucessfully retrieved nzb file using " + str(nzbprov))
                                    nzbname = str(filenzb)
-                                    logger.fdebug("nzbname used for post-processing:" + str(nzbname))

-                                    # let's build the send-to-SAB string now:
-                                    tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name="
-                                    logger.fdebug("send-to-SAB host string: " + str(tmpapi))
-                                    # if the savefile location has spaces in the path, could cause problems.
-                                    # let's adjust.
-                                    savefileURL = re.sub(" ","%20", str(savefile))
-                                    tmpapi = tmpapi + str(savefileURL)
-                                    logger.fdebug("...attaching nzbfile: " + str(tmpapi))
-                                    # if category is blank, let's adjust
-                                    if mylar.SAB_CATEGORY:
-                                        tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
-                                        logger.fdebug("...attaching category: " + str(tmpapi))
-                                    if mylar.RENAME_FILES == 1:
-                                        tmpapi = tmpapi + "&script=ComicRN.py"
-                                        logger.fdebug("...attaching rename script: " + str(tmpapi))
-                                    #final build of send-to-SAB    
-                                    tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
-                                    tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY)
                                elif nzbprov == 'nzb.su':
                                    logger.fdebug("NZB.SU - linkapi:" + str(linkapi))
                                    nzbname = re.sub(" ", "_", str(entry['title']))

+                                logger.fdebug("nzbname used for post-processing:" + str(nzbname))
+
+                                # let's build the send-to-SAB string now:
+                                tmpapi = str(mylar.SAB_HOST)
+                                logger.fdebug("send-to-SAB host string: " + str(tmpapi))
+                                # nzb.su only works with direct links for some reason...
+                                if nzbprov == 'nzb.su':
+                                    SABtype = "/api?mode=addurl&name="
+                                    savefileURL = str(linkapi)
+                                else:
+                                    SABtype = "/api?mode=addlocalfile&name="
+                                    # if the savefile location has spaces in the path, could cause problems.
+                                    # let's adjust.
+                                    savefileURL = re.sub(" ","%20", str(savefile))
+                                tmpapi = tmpapi + str(SABtype)
+                                logger.fdebug("...selecting API type: " + str(tmpapi))
+                                tmpapi = tmpapi + str(savefileURL)
+                                logger.fdebug("...attaching nzbfile: " + str(tmpapi))
+                                # determine SAB priority
+                                if mylar.SAB_PRIORITY:
+                                    tmpapi = tmpapi + "&priority=" + str(sabpriority)
+                                    logger.fdebug("...setting priority: " + str(tmpapi))
+                                # if category is blank, let's adjust
+                                if mylar.SAB_CATEGORY:
+                                    tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
+                                    logger.fdebug("...attaching category: " + str(tmpapi))
+                                if mylar.RENAME_FILES == 1:
+                                    tmpapi = tmpapi + "&script=ComicRN.py"
+                                    logger.fdebug("...attaching rename script: " + str(tmpapi))
+                                #final build of send-to-SAB    
+                                tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
+
                                logger.fdebug("Completed send-to-SAB link: " + str(tmpapi))

                                try:
@ -544,9 +565,6 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
            updater.nzblog(IssueID, nzbname)
            nzbpr == 0
            continue
-        elif foundc == "no" and nzbpr <> 0:
-            if IssDateFix == "no":
-                logger.info(u"More than one search provider given - trying next one.")
        elif foundc == "no" and nzbpr == 0:
            foundcomic.append("no")
            logger.fdebug("couldn't find a matching comic")