mirror of
https://github.com/evilhero/mylar
synced 2025-01-03 05:24:43 +00:00
210 lines
8.5 KiB
Python
Executable file
210 lines
8.5 KiB
Python
Executable file
#!/usr/bin/env python
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import feedparser
|
|
import re
|
|
import logger
|
|
import mylar
|
|
import unicodedata
|
|
import urllib
|
|
|
|
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix, booktype=None):
|
|
cName = searchName
|
|
|
|
#clean up searchName due to webparse/redudant naming that would return too specific of results.
|
|
commons = ['and', 'the', '&', '-']
|
|
for x in commons:
|
|
cnt = 0
|
|
for m in re.finditer(x, searchName.lower()):
|
|
cnt +=1
|
|
tehstart = m.start()
|
|
tehend = m.end()
|
|
if any([x == 'the', x == 'and']):
|
|
if len(searchName) == tehend:
|
|
tehend =-1
|
|
if all([tehstart == 0, searchName[tehend] == ' ']) or all([tehstart != 0, searchName[tehstart-1] == ' ', searchName[tehend] == ' ']):
|
|
searchName = searchName.replace(x, ' ', cnt)
|
|
else:
|
|
continue
|
|
else:
|
|
searchName = searchName.replace(x, ' ', cnt)
|
|
|
|
searchName = re.sub('\s+', ' ', searchName)
|
|
searchName = re.sub("[\,\:]", "", searchName).strip()
|
|
#logger.fdebug("searchname: %s" % searchName)
|
|
#logger.fdebug("issue: %s" % searchIssue)
|
|
#logger.fdebug("year: %s" % searchYear)
|
|
encodeSearch = urllib.quote_plus(searchName)
|
|
splitSearch = encodeSearch.split(" ")
|
|
|
|
tmpsearchIssue = searchIssue
|
|
|
|
if any([booktype == 'One-Shot', booktype == 'TPB']):
|
|
tmpsearchIssue = '1'
|
|
loop = 4
|
|
elif len(searchIssue) == 1:
|
|
loop = 3
|
|
elif len(searchIssue) == 2:
|
|
loop = 2
|
|
else:
|
|
loop = 1
|
|
|
|
if "-" in searchName:
|
|
searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')
|
|
regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')
|
|
|
|
if mylar.CONFIG.USE_MINSIZE is True:
|
|
minsize = str(mylar.CONFIG.MINSIZE)
|
|
else:
|
|
minsize = '10'
|
|
size_constraints = "&minsize=" + minsize
|
|
|
|
if mylar.CONFIG.USE_MAXSIZE is True:
|
|
maxsize = str(mylar.CONFIG.MAXSIZE)
|
|
else:
|
|
maxsize = '0'
|
|
size_constraints += "&maxsize=" + maxsize
|
|
|
|
if mylar.CONFIG.USENET_RETENTION is not None:
|
|
max_age = "&maxage=" + str(mylar.CONFIG.USENET_RETENTION)
|
|
else:
|
|
max_age = "&maxage=0"
|
|
|
|
feeds = []
|
|
i = 1
|
|
while (i <= loop):
|
|
if i == 1:
|
|
searchmethod = tmpsearchIssue
|
|
elif i == 2:
|
|
searchmethod = '0' + tmpsearchIssue
|
|
elif i == 3:
|
|
searchmethod = '00' + tmpsearchIssue
|
|
elif i == 4:
|
|
searchmethod = tmpsearchIssue
|
|
else:
|
|
break
|
|
|
|
if i == 4:
|
|
logger.fdebug('Now searching experimental for %s to try and ensure all the bases are covered' % cName)
|
|
joinSearch = "+".join(splitSearch)
|
|
else:
|
|
logger.fdebug('Now searching experimental for issue number: %s to try and ensure all the bases are covered' % searchmethod)
|
|
joinSearch = "+".join(splitSearch) + "+" +searchmethod
|
|
|
|
|
|
|
|
if mylar.CONFIG.PREFERRED_QUALITY == 1: joinSearch = joinSearch + " .cbr"
|
|
elif mylar.CONFIG.PREFERRED_QUALITY == 2: joinSearch = joinSearch + " .cbz"
|
|
|
|
feeds.append(feedparser.parse(mylar.EXPURL + "search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=85" % (joinSearch, max_age, size_constraints)))
|
|
time.sleep(5)
|
|
if mylar.CONFIG.ALTEXPERIMENTAL:
|
|
feeds.append(feedparser.parse(mylar.EXPURL + "search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=86" % (joinSearch, max_age, size_constraints)))
|
|
time.sleep(5)
|
|
i+=1
|
|
|
|
entries = []
|
|
mres = {}
|
|
tallycount = 0
|
|
|
|
for feed in feeds:
|
|
totNum = len(feed.entries)
|
|
tallycount += len(feed.entries)
|
|
|
|
#keyPair = {}
|
|
keyPair = []
|
|
regList = []
|
|
countUp = 0
|
|
|
|
while countUp < totNum:
|
|
urlParse = feed.entries[countUp].enclosures[0]
|
|
#keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
|
|
#keyPair[feed.entries[countUp].title] = urlParse["href"]
|
|
keyPair.append({"title": feed.entries[countUp].title,
|
|
"link": urlParse["href"],
|
|
"length": urlParse["length"],
|
|
"pubdate": feed.entries[countUp].updated})
|
|
countUp=countUp +1
|
|
|
|
# thanks to SpammyHagar for spending the time in compiling these regEx's!
|
|
|
|
regExTest=""
|
|
|
|
regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
|
|
regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
|
|
|
|
#Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
|
|
regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
|
|
regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)
|
|
regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
|
|
regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)
|
|
|
|
regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive]
|
|
|
|
except_list=['releases', 'gold line', 'distribution', '0-day', '0 day', '0day', 'o-day']
|
|
|
|
for entry in keyPair:
|
|
title = entry['title']
|
|
#logger.fdebug("titlesplit: " + str(title.split("\"")))
|
|
splitTitle = title.split("\"")
|
|
noYear = 'False'
|
|
_digits = re.compile('\d')
|
|
|
|
for subs in splitTitle:
|
|
#logger.fdebug('sub:' + subs)
|
|
regExCount = 0
|
|
if len(subs) >= len(cName) and not any(d in subs.lower() for d in except_list) and bool(_digits.search(subs)) is True:
|
|
#Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
|
|
# while regExCount < len(regexList):
|
|
# regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
|
|
# regExCount = regExCount +1
|
|
# if regExTest:
|
|
# logger.fdebug(title)
|
|
# entries.append({
|
|
# 'title': subs,
|
|
# 'link': str(link)
|
|
# })
|
|
# this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes'
|
|
if subs.lower().startswith('for'):
|
|
if cName.lower().startswith('for'):
|
|
pass
|
|
else:
|
|
#this is the crap we ignore. Continue (commented else, as it spams the logs)
|
|
#logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.')
|
|
continue
|
|
#logger.fdebug('match.')
|
|
if IssDateFix != "no":
|
|
if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1)
|
|
else: ComicYearFix = str(int(searchYear) + 1)
|
|
else:
|
|
ComicYearFix = searchYear
|
|
|
|
if searchYear not in subs and ComicYearFix not in subs:
|
|
noYear = 'True'
|
|
noYearline = subs
|
|
|
|
if (searchYear in subs or ComicYearFix in subs) and noYear == 'True':
|
|
#this would occur on the next check in the line, if year exists and
|
|
#the noYear check in the first check came back valid append it
|
|
subs = noYearline + ' (' + searchYear + ')'
|
|
noYear = 'False'
|
|
|
|
if noYear == 'False':
|
|
|
|
entries.append({
|
|
'title': subs,
|
|
'link': entry['link'],
|
|
'pubdate': entry['pubdate'],
|
|
'length': entry['length']
|
|
})
|
|
break # break out so we don't write more shit.
|
|
|
|
# if len(entries) >= 1:
|
|
if tallycount >= 1:
|
|
mres['entries'] = entries
|
|
return mres
|
|
else:
|
|
logger.fdebug("No Results Found")
|
|
return "no results"
|