#!/usr/bin/env python import os import sys import time import feedparser import re import logger import mylar import unicodedata import urllib def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix, booktype=None): cName = searchName #clean up searchName due to webparse/redudant naming that would return too specific of results. commons = ['and', 'the', '&', '-'] for x in commons: cnt = 0 for m in re.finditer(x, searchName.lower()): cnt +=1 tehstart = m.start() tehend = m.end() if any([x == 'the', x == 'and']): if len(searchName) == tehend: tehend =-1 if all([tehstart == 0, searchName[tehend] == ' ']) or all([tehstart != 0, searchName[tehstart-1] == ' ', searchName[tehend] == ' ']): searchName = searchName.replace(x, ' ', cnt) else: continue else: searchName = searchName.replace(x, ' ', cnt) searchName = re.sub('\s+', ' ', searchName) searchName = re.sub("[\,\:]", "", searchName).strip() #logger.fdebug("searchname: %s" % searchName) #logger.fdebug("issue: %s" % searchIssue) #logger.fdebug("year: %s" % searchYear) encodeSearch = urllib.quote_plus(searchName) splitSearch = encodeSearch.split(" ") tmpsearchIssue = searchIssue if any([booktype == 'One-Shot', booktype == 'TPB']): tmpsearchIssue = '1' loop = 4 elif len(searchIssue) == 1: loop = 3 elif len(searchIssue) == 2: loop = 2 else: loop = 1 if "-" in searchName: searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?') regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?') if mylar.CONFIG.USE_MINSIZE is True: minsize = str(mylar.CONFIG.MINSIZE) else: minsize = '10' size_constraints = "&minsize=" + minsize if mylar.CONFIG.USE_MAXSIZE is True: maxsize = str(mylar.CONFIG.MAXSIZE) else: maxsize = '0' size_constraints += "&maxsize=" + maxsize if mylar.CONFIG.USENET_RETENTION is not None: max_age = "&maxage=" + str(mylar.CONFIG.USENET_RETENTION) else: max_age = "&maxage=0" feeds = [] i = 1 while (i <= loop): if i == 1: searchmethod = tmpsearchIssue elif i == 2: searchmethod = '0' + tmpsearchIssue elif i == 3: searchmethod = '00' + tmpsearchIssue elif i == 4: searchmethod = tmpsearchIssue else: break if i == 4: logger.fdebug('Now searching experimental for %s to try and ensure all the bases are covered' % cName) joinSearch = "+".join(splitSearch) else: logger.fdebug('Now searching experimental for issue number: %s to try and ensure all the bases are covered' % searchmethod) joinSearch = "+".join(splitSearch) + "+" +searchmethod if mylar.CONFIG.PREFERRED_QUALITY == 1: joinSearch = joinSearch + " .cbr" elif mylar.CONFIG.PREFERRED_QUALITY == 2: joinSearch = joinSearch + " .cbz" feeds.append(feedparser.parse(mylar.EXPURL + "search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=85" % (joinSearch, max_age, size_constraints))) time.sleep(5) if mylar.CONFIG.ALTEXPERIMENTAL: feeds.append(feedparser.parse(mylar.EXPURL + "search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=86" % (joinSearch, max_age, size_constraints))) time.sleep(5) i+=1 entries = [] mres = {} tallycount = 0 for feed in feeds: totNum = len(feed.entries) tallycount += len(feed.entries) #keyPair = {} keyPair = [] regList = [] countUp = 0 while countUp < totNum: urlParse = feed.entries[countUp].enclosures[0] #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link #keyPair[feed.entries[countUp].title] = urlParse["href"] keyPair.append({"title": feed.entries[countUp].title, "link": urlParse["href"], "length": urlParse["length"], "pubdate": feed.entries[countUp].updated}) countUp=countUp +1 # thanks to SpammyHagar for spending the time in compiling these regEx's! regExTest="" regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear) regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear) #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1) regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1) regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1) regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1) regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive] except_list=['releases', 'gold line', 'distribution', '0-day', '0 day', '0day', 'o-day'] for entry in keyPair: title = entry['title'] #logger.fdebug("titlesplit: " + str(title.split("\""))) splitTitle = title.split("\"") noYear = 'False' _digits = re.compile('\d') for subs in splitTitle: #logger.fdebug('sub:' + subs) regExCount = 0 if len(subs) >= len(cName) and not any(d in subs.lower() for d in except_list) and bool(_digits.search(subs)) is True: #Looping through dictionary to run each regEx - length + regex is determined by regexList up top. # while regExCount < len(regexList): # regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE) # regExCount = regExCount +1 # if regExTest: # logger.fdebug(title) # entries.append({ # 'title': subs, # 'link': str(link) # }) # this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes' if subs.lower().startswith('for'): if cName.lower().startswith('for'): pass else: #this is the crap we ignore. Continue (commented else, as it spams the logs) #logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.') continue #logger.fdebug('match.') if IssDateFix != "no": if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1) else: ComicYearFix = str(int(searchYear) + 1) else: ComicYearFix = searchYear if searchYear not in subs and ComicYearFix not in subs: noYear = 'True' noYearline = subs if (searchYear in subs or ComicYearFix in subs) and noYear == 'True': #this would occur on the next check in the line, if year exists and #the noYear check in the first check came back valid append it subs = noYearline + ' (' + searchYear + ')' noYear = 'False' if noYear == 'False': entries.append({ 'title': subs, 'link': entry['link'], 'pubdate': entry['pubdate'], 'length': entry['length'] }) break # break out so we don't write more shit. # if len(entries) >= 1: if tallycount >= 1: mres['entries'] = entries return mres else: logger.fdebug("No Results Found") return "no results"