Fix: nzb.su should now be working (sorry), Fix: accounted for some problems with searches not being found when titles contained - or : , Fix: SAB priority working again, Fix: adding comics starting in 2013 wouldn't work sometimes

This commit is contained in:
evilhero 2012-12-23 13:16:25 -05:00
parent e5f8170094
commit bd49656c5c
2 changed files with 47 additions and 89 deletions

View File

@ -23,71 +23,11 @@ import datetime
from decimal import Decimal
from HTMLParser import HTMLParseError
def MysterBinScrape(comsearch, comyear):
#comyear is publication year of comic - should result in fewer results, which means better (hopefully)
searchterms = str(comsearch) + "+" + str(comyear)
# subsetting the results by cbr/cbz will allow for better control.
# min/max size should be set or else *.part01's and group collections will be parsed
# and will result in errors all over & no hits.
# min is set low enough to filter out cover-only releases and the like
# max is set high enough to inlude everything but collections/groups of cbr/cbz which confuse us.
# minsize = 9mb maxsize = 75mb (for now)
input = 'http://www.mysterbin.com/advsearch?q=' + str(searchterms) + '&match=normal&minSize=9&maxSize=75&group=alt.binaries.comics.dcp&maxAge=1269&complete=2'
response = urllib2.urlopen ( input )
try:
soup = BeautifulSoup ( response )
except HTMLParseError:
logger.info(u"Unable to decipher using Experimental Search. Parser problem.")
return "no results"
cnt = len(soup.findAll("input", {"class" : "check4nzb"}))
logger.info(u"I found " + str(cnt) + " results doing my search...now I'm going to analyze the results.")
if cnt == 0: return "no results"
resultName = []
resultComic = []
n = 0
mres = {}
entries = []
while ( n < cnt ):
resultp = soup.findAll("input", {"class" : "check4nzb"})[n]
nzblink = str("http://www.mysterbin.com/nzb?c=" + resultp['value'])
#print ( "nzb-link: " + str(nzblink) )
subtxt3 = soup.findAll("div", {"class" : "divc"})[n]
subres = subtxt3.find("span", {"style" : ""})
blah = subres.find('a').contents[2]
blah = re.sub("</?[^\W].{0,10}?>", "", str(blah))
#print ("Blah:" + str(blah))
nook=3
totlink = str(blah)
while ('"' not in blah):
blah = subres.find('a').contents[nook]
if '"</a>' in blah:
findyenc = blah.find('"')
blah = blah[findyenc:]
#break
#print ("Blah:" + str(blah))
goo = re.sub("</?[^\W].{0,10}?>", "", str(blah))
#print ("goo:" + str(goo))
totlink = totlink + str(goo)
#print (nook, blah)
nook+=1
#print ("exit mainloop")
#print (str(nzblink))
#print (str(totlink))
entries.append({
'title': str(totlink),
'link': str(nzblink)
})
#print (entries[n])
mres['entries'] = entries
n+=1
#print ("FINAL: " + str(totlink))
return mres
def GCDScraper(ComicName, ComicYear, Total, ComicID):
NOWyr = datetime.date.today().year
if datetime.date.today().month == 12:
NOWyr = NOWyr + 1
logger.fdebug("We're in December, incremented search Year to increase search results: " + str(NOWyr))
comicnm = ComicName
comicyr = ComicYear
comicis = Total

View File

@ -156,6 +156,8 @@ def search_init(ComicName, IssueNumber, ComicYear, SeriesYear, IssueDate, IssueI
nzbpr-=1
if nzbpr >= 0 and findit != 'yes':
logger.info(u"More than one search provider given - trying next one.")
# ----
if findit == 'yes': return findit
return findit
@ -323,19 +325,25 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
splitit = []
watchcomic_split = []
comic_iss_b4 = re.sub('[\-\:\,]', '', str(comic_andiss))
logger.fdebug("original nzb comic and issue: " + str(comic_iss_b4))
#log2file = log2file + "o.g.comic: " + str(comic_iss_b4) + "\n"
logger.fdebug("original nzb comic and issue: " + str(comic_andiss))
#changed this from '' to ' '
comic_iss_b4 = re.sub('[\-\:\,]', ' ', str(comic_andiss))
comic_iss = comic_iss_b4.replace('.',' ')
logger.fdebug("adjusted nzb comic and issue: " + str(comic_iss))
splitit = comic_iss.split(None)
#something happened to dognzb searches or results...added a '.' in place of spaces
#screwed up most search results with dognzb. Let's try to adjust.
watchcomic_split = findcomic[findloop].split(None)
#watchcomic_split = findcomic[findloop].split(None)
logger.fdebug("adjusting from: " + str(comic_iss_b4) + " to: " + str(comic_iss))
bmm = re.findall('v\d', comic_iss)
if len(bmm) > 0: splitst = len(splitit) - 2
else: splitst = len(splitit) - 1
# make sure that things like - in watchcomic are accounted for when comparing to nzb.
watchcomic_split = re.sub('[\-\:\,]', ' ', findcomic[findloop]).split(None)
logger.fdebug(str(splitit) + " nzb series word count: " + str(splitst))
logger.fdebug(str(watchcomic_split) + " watchlist word count: " + str(len(watchcomic_split)))
if (splitst) != len(watchcomic_split):
logger.fdebug("incorrect comic lengths...not a match")
if str(splitit[0]).lower() == "the":
@ -490,30 +498,43 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
logger.info(u"Sucessfully retrieved nzb file using " + str(nzbprov))
nzbname = str(filenzb)
logger.fdebug("nzbname used for post-processing:" + str(nzbname))
# let's build the send-to-SAB string now:
tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name="
logger.fdebug("send-to-SAB host string: " + str(tmpapi))
# if the savefile location has spaces in the path, could cause problems.
# let's adjust.
savefileURL = re.sub(" ","%20", str(savefile))
tmpapi = tmpapi + str(savefileURL)
logger.fdebug("...attaching nzbfile: " + str(tmpapi))
# if category is blank, let's adjust
if mylar.SAB_CATEGORY:
tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
logger.fdebug("...attaching category: " + str(tmpapi))
if mylar.RENAME_FILES == 1:
tmpapi = tmpapi + "&script=ComicRN.py"
logger.fdebug("...attaching rename script: " + str(tmpapi))
#final build of send-to-SAB
tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
tmpapi = str(mylar.SAB_HOST) + "/api?mode=addlocalfile&name=" + str(savefile) + "&pp=3&cat=" + str(mylar.SAB_CATEGORY) + "&script=ComicRN.py&apikey=" + str(mylar.SAB_APIKEY)
elif nzbprov == 'nzb.su':
logger.fdebug("NZB.SU - linkapi:" + str(linkapi))
nzbname = re.sub(" ", "_", str(entry['title']))
logger.fdebug("nzbname used for post-processing:" + str(nzbname))
# let's build the send-to-SAB string now:
tmpapi = str(mylar.SAB_HOST)
logger.fdebug("send-to-SAB host string: " + str(tmpapi))
# nzb.su only works with direct links for some reason...
if nzbprov == 'nzb.su':
SABtype = "/api?mode=addurl&name="
savefileURL = str(linkapi)
else:
SABtype = "/api?mode=addlocalfile&name="
# if the savefile location has spaces in the path, could cause problems.
# let's adjust.
savefileURL = re.sub(" ","%20", str(savefile))
tmpapi = tmpapi + str(SABtype)
logger.fdebug("...selecting API type: " + str(tmpapi))
tmpapi = tmpapi + str(savefileURL)
logger.fdebug("...attaching nzbfile: " + str(tmpapi))
# determine SAB priority
if mylar.SAB_PRIORITY:
tmpapi = tmpapi + "&priority=" + str(sabpriority)
logger.fdebug("...setting priority: " + str(tmpapi))
# if category is blank, let's adjust
if mylar.SAB_CATEGORY:
tmpapi = tmpapi + "&cat=" + str(mylar.SAB_CATEGORY)
logger.fdebug("...attaching category: " + str(tmpapi))
if mylar.RENAME_FILES == 1:
tmpapi = tmpapi + "&script=ComicRN.py"
logger.fdebug("...attaching rename script: " + str(tmpapi))
#final build of send-to-SAB
tmpapi = tmpapi + "&apikey=" + str(mylar.SAB_APIKEY)
logger.fdebug("Completed send-to-SAB link: " + str(tmpapi))
try:
@ -544,9 +565,6 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, nzbprov, nzbpr, Is
updater.nzblog(IssueID, nzbname)
nzbpr == 0
continue
elif foundc == "no" and nzbpr <> 0:
if IssDateFix == "no":
logger.info(u"More than one search provider given - trying next one.")
elif foundc == "no" and nzbpr == 0:
foundcomic.append("no")
logger.fdebug("couldn't find a matching comic")