mylar/mylar/findcomicfeed.py

199 lines
8.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python
import os
import sys
IMP:New scheduler tab (Manage / Activity) where you can see job current run status', next runtime, and prev runtimes as well as force/pause a job, FIX: Disabling torrents will now properly hide torrent information, IMP: Specified daemon port for deluge as an on-screen tip for more detail, IMP: Added 100,200,ALL as viewable watchlist views, FIX: When viewing pullist and annual integration enabled, if annual was present would incorrectly link to invalid annual series instead of the actual series itself, IMP: Added more detail error messages to metatagging errors and better handling of stranded files during cleanup, IMP: Improved some handling for weekly pull-list one-off's and refactored the nzb/oneoff post-processing into a seperate function for future callables, Moved all the main url locations for public torrent sites to the init module so that it can be cascaded down for use in other modules instead as a global, IMP: Added a 'deep_search_32p' variable in the config.ini for specific usage with 32p, where if there is more than one result will dig deeper into each result to try and figure out if there are series matches, as opposed to the default where it will only use ref32p table if available or just the first hit in a multiple series search results and ignore the remainder, FIX:Fixed some unknown characters appearing in the pullist due to unicode-related conversion problems, FIX: fixed some special cases of file parsing errors due to Volume label being named different than expected, FIX: Added a 3s pause between experimental searches to try and not hit their frequency limitation, IMP: Weekly Pullist One-off's will now show status of Snatched/Downloaded as required, FIX: Fixed some deluge parameter problems when using auto-snatch torrent script/option, IMP: Changed the downlocation in the auto-snatch option to an env variable instead of being passed to avoid unicode-related problems, FIX: Fixed some magnet-related issues for torrents when using a watchdir + TPSE, FIX: Added more verbose error message for rtorrent connection issues, FIX: Could not connect to rtorrent client if no username/password were provided, IMP: Set the db updater to run every 5 minutes on the watchlist, automatically refreshing the oldest updated series each time that is more than 5 hours old (force db update from the activity/job schedulers page will run the db updater against the entire watchlist in sequence), IMP: Attempt to handle long paths in windows (ie. > 256c) by prepending the unicode windows api character to the import a directory path (windows only), IMP: When manual metatagging a series, will update the series after all the metatagging has been completed as opposed to after each issue, IMP: Will now display available inkdrops on Config/Search Providers tab when using 32P (future will utilize/indicate inkdrop threshold when downloading)
2017-09-10 15:50:08 +00:00
import time
2016-09-07 04:04:42 +00:00
import feedparser
import re
import logger
import mylar
import unicodedata
import urllib
def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix):
cName = searchName
#clean up searchName due to webparse/redudant naming that would return too specific of results.
commons = ['and', 'the', '&', '-']
for x in commons:
cnt = 0
for m in re.finditer(x, searchName.lower()):
cnt +=1
tehstart = m.start()
tehend = m.end()
if any([x == 'the', x == 'and']):
if len(searchName) == tehend:
tehend =-1
if all([tehstart == 0, searchName[tehend] == ' ']) or all([tehstart != 0, searchName[tehstart-1] == ' ', searchName[tehend] == ' ']):
searchName = searchName.replace(x, ' ', cnt)
else:
continue
else:
searchName = searchName.replace(x, ' ', cnt)
searchName = re.sub('\s+', ' ', searchName)
searchName = re.sub("[\,\:]", "", searchName).strip()
#logger.fdebug("searchname: %s" % searchName)
#logger.fdebug("issue: %s" % searchIssue)
#logger.fdebug("year: %s" % searchYear)
encodeSearch = urllib.quote_plus(searchName)
splitSearch = encodeSearch.split(" ")
if len(searchIssue) == 1:
loop = 3
elif len(searchIssue) == 2:
loop = 2
else:
loop = 1
if "-" in searchName:
searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')
regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')
if mylar.CONFIG.USE_MINSIZE is True:
minsize = str(mylar.CONFIG.MINSIZE)
else:
minsize = '10'
size_constraints = "&minsize=" + minsize
if mylar.CONFIG.USE_MAXSIZE is True:
maxsize = str(mylar.CONFIG.MAXSIZE)
else:
maxsize = '0'
size_constraints += "&maxsize=" + maxsize
if mylar.CONFIG.USENET_RETENTION is not None:
max_age = "&maxage=" + str(mylar.CONFIG.USENET_RETENTION)
else:
max_age = "&maxage=0"
feeds = []
i = 1
while (i <= loop):
if i == 1:
searchmethod = searchIssue
elif i == 2:
searchmethod = '0' + searchIssue
elif i == 3:
searchmethod = '00' + searchIssue
else:
break
joinSearch = "+".join(splitSearch) + "+" +searchmethod
logger.fdebug('Now searching experimental for issue number: %s to try and ensure all the bases are covered' % searchmethod)
if mylar.CONFIG.PREFERRED_QUALITY == 1: joinSearch = joinSearch + " .cbr"
elif mylar.CONFIG.PREFERRED_QUALITY == 2: joinSearch = joinSearch + " .cbz"
feeds.append(feedparser.parse("http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=85" % (joinSearch, max_age, size_constraints)))
time.sleep(5)
if mylar.CONFIG.ALTEXPERIMENTAL:
feeds.append(feedparser.parse("http://beta.nzbindex.com/search/rss?q=%s&max=50&minage=0%s&hidespam=1&hidepassword=1&sort=agedesc%s&complete=0&hidecross=0&hasNFO=0&poster=&g[]=86" % (joinSearch, max_age, size_constraints)))
time.sleep(5)
i+=1
entries = []
mres = {}
tallycount = 0
for feed in feeds:
totNum = len(feed.entries)
tallycount += len(feed.entries)
#keyPair = {}
keyPair = []
regList = []
countUp = 0
while countUp < totNum:
urlParse = feed.entries[countUp].enclosures[0]
#keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
#keyPair[feed.entries[countUp].title] = urlParse["href"]
keyPair.append({"title": feed.entries[countUp].title,
"link": urlParse["href"],
"length": urlParse["length"],
"pubdate": feed.entries[countUp].updated})
countUp=countUp +1
# thanks to SpammyHagar for spending the time in compiling these regEx's!
regExTest=""
regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
#Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
2015-05-22 08:32:51 +00:00
regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)
regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)
regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive]
except_list=['releases', 'gold line', 'distribution', '0-day', '0 day', '0day', 'o-day']
for entry in keyPair:
title = entry['title']
#logger.fdebug("titlesplit: " + str(title.split("\"")))
splitTitle = title.split("\"")
noYear = 'False'
_digits = re.compile('\d')
for subs in splitTitle:
#logger.fdebug('sub:' + subs)
regExCount = 0
if len(subs) >= len(cName) and not any(d in subs.lower() for d in except_list) and bool(_digits.search(subs)) is True:
#Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
# while regExCount < len(regexList):
# regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
# regExCount = regExCount +1
2015-05-22 08:32:51 +00:00
# if regExTest:
# logger.fdebug(title)
# entries.append({
# 'title': subs,
# 'link': str(link)
# })
# this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes'
if subs.lower().startswith('for'):
if cName.lower().startswith('for'):
pass
else:
#this is the crap we ignore. Continue (commented else, as it spams the logs)
#logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.')
continue
#logger.fdebug('match.')
if IssDateFix != "no":
if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1)
else: ComicYearFix = str(int(searchYear) + 1)
else:
ComicYearFix = searchYear
if searchYear not in subs and ComicYearFix not in subs:
noYear = 'True'
noYearline = subs
if (searchYear in subs or ComicYearFix in subs) and noYear == 'True':
#this would occur on the next check in the line, if year exists and
#the noYear check in the first check came back valid append it
2015-05-22 08:32:51 +00:00
subs = noYearline + ' (' + searchYear + ')'
noYear = 'False'
if noYear == 'False':
2015-05-22 08:32:51 +00:00
entries.append({
'title': subs,
'link': entry['link'],
'pubdate': entry['pubdate'],
'length': entry['length']
})
break # break out so we don't write more shit.
2015-05-22 08:32:51 +00:00
# if len(entries) >= 1:
if tallycount >= 1:
mres['entries'] = entries
2015-05-22 08:32:51 +00:00
return mres
else:
logger.fdebug("No Results Found")
return "no results"