Fix: nzb.su should now be working (sorry), Fix: accounted for some problems with searches not being found when titles contained - or : , Fix: SAB priority working again, Fix: adding comics starting in 2013 wouldn't work sometimes

2012-12-23 13:16:25 -05:00 · 2012-12-23 13:16:25 -05:00 · bd49656c5c
parent e5f8170094
commit bd49656c5c
2 changed files with 47 additions and 89 deletions
--- a/mylar/parseit.py
+++ b/mylar/parseit.py
@ -23,71 +23,11 @@ import datetime
 from decimal import Decimal
 from HTMLParser import HTMLParseError
 def MysterBinScrape(comsearch, comyear):
        #comyear is publication year of comic - should result in fewer results, which means better (hopefully)
        searchterms = str(comsearch) + "+" + str(comyear)
        # subsetting the results by cbr/cbz will allow for better control.
        # min/max size should be set or else *.part01's and group collections will be parsed
        #  and will result in errors all over & no hits.
        # min is set low enough to filter out cover-only releases and the like
        # max is set high enough to inlude everything but collections/groups of cbr/cbz which confuse us.
        # minsize = 9mb  maxsize = 75mb  (for now)
 	input = 'http://www.mysterbin.com/advsearch?q=' + str(searchterms) + '&match=normal&minSize=9&maxSize=75&group=alt.binaries.comics.dcp&maxAge=1269&complete=2'
 	response = urllib2.urlopen ( input )
 	try:
            soup = BeautifulSoup ( response )
        except HTMLParseError:
            logger.info(u"Unable to decipher using Experimental Search. Parser problem.")            
            return "no results"
 	cnt = len(soup.findAll("input", {"class" : "check4nzb"}))
        logger.info(u"I found " + str(cnt) + " results doing my search...now I'm going to analyze the results.")
        if cnt == 0: return "no results"
 	resultName = []
 	resultComic = []
 	n = 0
        mres = {}
        entries = []
 	while ( n < cnt ):
 	    resultp = soup.findAll("input", {"class" : "check4nzb"})[n]
 	    nzblink = str("http://www.mysterbin.com/nzb?c=" + resultp['value'])
 	    #print ( "nzb-link: " + str(nzblink) )
 	    subtxt3 = soup.findAll("div", {"class" : "divc"})[n]
 	    subres = subtxt3.find("span", {"style" : ""})
 	    blah = subres.find('a').contents[2]
 	    blah = re.sub("</?[^\W].{0,10}?>", "", str(blah))
            #print ("Blah:" + str(blah))
 	    nook=3
 	    totlink = str(blah)
 	    while ('"' not in blah):               
 	        blah = subres.find('a').contents[nook]
 	        if '"</a>' in blah:
                    findyenc = blah.find('"')
                    blah = blah[findyenc:]
                    #break
                #print ("Blah:" + str(blah))
 	        goo = re.sub("</?[^\W].{0,10}?>", "", str(blah))
 	        #print ("goo:" + str(goo))
    	    	totlink = totlink + str(goo)
    	    	#print (nook, blah)
   	   	nook+=1
            #print ("exit mainloop")
            #print (str(nzblink))
            #print (str(totlink))
            entries.append({
                'title':   str(totlink),
                'link':    str(nzblink)
                })
            #print (entries[n])
            mres['entries'] = entries
    	    n+=1
    	#print ("FINAL: " + str(totlink))
        return mres    
 def GCDScraper(ComicName, ComicYear, Total, ComicID):
    NOWyr = datetime.date.today().year
    if datetime.date.today().month == 12:
        NOWyr = NOWyr + 1
        logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
    comicnm = ComicName
    comicyr = ComicYear
    comicis = Total
--- a/mylar/search.py
+++ b/mylar/search.py
@ -156,6 +156,8 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, IssueDate, IssueI
            nzbpr-=1
        if nzbpr >= 0 and findit != 'yes':
            logger.info(u"More than one search provider given - trying next one.")
        # ----
        if findit == 'yes': return findit
    return findit
@ -323,19 +325,25 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
                    splitit = []   
                    watchcomic_split = []
-                    comic_iss_b4 = re.sub('[\-\:\,]', '', str(comic_andiss))
+                    logger.fdebug("original nzb comic and issue: " + str(comic_andiss)) 
-                    logger.fdebug("original nzb comic and issue: " + str(comic_iss_b4))
+                    #changed this from '' to ' '
-                    #log2file = log2file + "o.g.comic: " + str(comic_iss_b4) + "\n"
+                    comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(comic_andiss))
                    comic_iss = comic_iss_b4.replace('.',' ')
                    logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss))
                    splitit = comic_iss.split(None)
                    #something happened to dognzb searches or results...added a '.' in place of spaces
                    #screwed up most search results with dognzb. Let's try to adjust.
-                    watchcomic_split = findcomic[findloop].split(None)
+                    #watchcomic_split = findcomic[findloop].split(None)
                    logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss))
                    bmm = re.findall('v\d', comic_iss)
                    if len(bmm) > 0: splitst = len(splitit) - 2
                    else: splitst = len(splitit) - 1
                    # make sure that things like - in watchcomic are accounted for when comparing to nzb.
                    watchcomic_split = re.sub('[\-\:\,]', ' ', findcomic[findloop]).split(None)
                    logger.fdebug(str(splitit) + " nzb series word count: " + str(splitst))
                    logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split)))
                    if (splitst) != len(watchcomic_split):
                        logger.fdebug("incorrect comic lengths...not a match")
                        if str(splitit[0]).lower() == "the":
@ -490,30 +498,43 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
                                    logger.info(u"Sucessfully retrieved nzb file using " + str(nzbprov))
                                    nzbname = str(filenzb)
                                    logger.fdebug("nzbname used for post-processing:" + str(nzbname))
                                    # let's build the send-to-SAB string now:
                                    tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name="
                                    logger.fdebug("send-to-SAB host string: " + str(tmpapi))
                                    # if the savefile location has spaces in the path, could cause problems.
                                    # let's adjust.
                                    savefileURL = re.sub(" ","%20", str(savefile))
                                    tmpapi = tmpapi + str(savefileURL)
                                    logger.fdebug("...attaching nzbfile: " + str(tmpapi))
                                    # if category is blank, let's adjust
                                    if mylar.SAB_CATEGORY:
                                        tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
                                        logger.fdebug("...attaching category: " + str(tmpapi))
                                    if mylar.RENAME_FILES == 1:
                                        tmpapi = tmpapi + "&script=ComicRN.py"
                                        logger.fdebug("...attaching rename script: " + str(tmpapi))
                                    #final build of send-to-SAB    
                                    tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
                                    tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY)
                                elif nzbprov == 'nzb.su':
                                    logger.fdebug("NZB.SU - linkapi:" + str(linkapi))
                                    nzbname = re.sub(" ", "_", str(entry['title']))
                                logger.fdebug("nzbname used for post-processing:" + str(nzbname))
                                # let's build the send-to-SAB string now:
                                tmpapi = str(mylar.SAB_HOST)
                                logger.fdebug("send-to-SAB host string: " + str(tmpapi))
                                # nzb.su only works with direct links for some reason...
                                if nzbprov == 'nzb.su':
                                    SABtype = "/api?mode=addurl&name="
                                    savefileURL = str(linkapi)
                                else:
                                    SABtype = "/api?mode=addlocalfile&name="
                                    # if the savefile location has spaces in the path, could cause problems.
                                    # let's adjust.
                                    savefileURL = re.sub(" ","%20", str(savefile))
                                tmpapi = tmpapi + str(SABtype)
                                logger.fdebug("...selecting API type: " + str(tmpapi))
                                tmpapi = tmpapi + str(savefileURL)
                                logger.fdebug("...attaching nzbfile: " + str(tmpapi))
                                # determine SAB priority
                                if mylar.SAB_PRIORITY:
                                    tmpapi = tmpapi + "&priority=" + str(sabpriority)
                                    logger.fdebug("...setting priority: " + str(tmpapi))
                                # if category is blank, let's adjust
                                if mylar.SAB_CATEGORY:
                                    tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
                                    logger.fdebug("...attaching category: " + str(tmpapi))
                                if mylar.RENAME_FILES == 1:
                                    tmpapi = tmpapi + "&script=ComicRN.py"
                                    logger.fdebug("...attaching rename script: " + str(tmpapi))
                                #final build of send-to-SAB    
                                tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
                                logger.fdebug("Completed send-to-SAB link: " + str(tmpapi))
                                try:
@ -544,9 +565,6 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
            updater.nzblog(IssueID, nzbname)
            nzbpr == 0
            continue
        elif foundc == "no" and nzbpr <> 0:
            if IssDateFix == "no":
                logger.info(u"More than one search provider given - trying next one.")
        elif foundc == "no" and nzbpr == 0:
            foundcomic.append("no")
            logger.fdebug("couldn't find a matching comic")