mylar/mylar/findcomicfeed.py

#!/usr/bin/env python

import os
import sys
import lib.feedparser as feedparser
#import feedparser
import re
import logger
import mylar
import unicodedata
import urllib

def Startit(searchName, searchIssue, searchYear, ComicVersion, IssDateFix):
    #searchName = "Uncanny Avengers"
    #searchIssue = "01"
    #searchYear = "2012"
    if searchName.lower().startswith('the '):
        searchName = searchName[4:]
    cName = searchName
    #clean up searchName due to webparse.
    searchName = searchName.replace("%20", " ")
    if "," in searchName:
        searchName = searchName.replace(",", "")
    logger.fdebug("name:" + str(searchName))
    logger.fdebug("issue:" + str(searchIssue))
    logger.fdebug("year:" + str(searchYear))
    encodeSearch = urllib.quote_plus(searchName)
    splitSearch = encodeSearch.split(" ")

    joinSearch = "+".join(splitSearch) +"+" +searchIssue
    searchIsOne = "0" +searchIssue
    searchIsTwo = "00" +searchIssue

    if mylar.PREFERRED_QUALITY == 1: joinSearch = joinSearch + " .cbr"
    elif mylar.PREFERRED_QUALITY == 2: joinSearch = joinSearch + " .cbz"

    if "-" in searchName:
        searchName = searchName.replace("-", '((\\s)?[-:])?(\\s)?')

    regexName = searchName.replace(" ", '((\\s)?[-:])?(\\s)?')


    #logger.fdebug('searchName:' + searchName)
    #logger.fdebug('regexName:' + regexName)

    if mylar.USE_MINSIZE:
        size_constraints = "minsize=" + str(mylar.MINSIZE)
    else:
        size_constraints = "minsize=10"

    if mylar.USE_MAXSIZE:
        size_constraints = size_constraints + "&maxsize=" + str(mylar.MAXSIZE)

    if mylar.USENET_RETENTION != None:
        max_age = "&age=" + str(mylar.USENET_RETENTION)

    feeds = []
    feeds.append(feedparser.parse("http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&" + str(size_constraints) + str(max_age) + "&dq=%s&max=50&more=1" %joinSearch))
    if mylar.ALTEXPERIMENTAL:
        feeds.append(feedparser.parse("http://nzbindex.nl/rss/?dq=%s&g[]=41&g[]=510&sort=agedesc&hidespam=0&max=&more=1" %joinSearch))

    entries = []
    mres = {}
    tallycount = 0

    for feed in feeds:
        totNum = len(feed.entries)
        tallycount += len(feed.entries)

        #keyPair = {}
        keyPair = []
        regList = []
        countUp = 0

        logger.fdebug(str(totNum) + " results")

        while countUp < totNum:
     	    urlParse = feed.entries[countUp].enclosures[0]
	    #keyPair[feed.entries[countUp].title] = feed.entries[countUp].link
	    #keyPair[feed.entries[countUp].title] = urlParse["href"]
            keyPair.append({"title":     feed.entries[countUp].title,
                            "link":      urlParse["href"],
                            "length":    urlParse["length"],
                            "pubdate":   feed.entries[countUp].updated})
    	    countUp=countUp +1
        logger.fdebug('keypair: ' + str(keyPair))


        # thanks to SpammyHagar for spending the time in compiling these regEx's!

        regExTest=""

        regEx = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)
        regExOne = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, searchYear)

        #Sometimes comics aren't actually published the same year comicVine says - trying to adjust for these cases
        regExTwo = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
        regExThree = "(%s\\s*(0)?(0)?%s\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)
        regExFour = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) +1)
        regExFive = "(%s\\s*(0)?(0)?%s\\s*\\(.*?\\)\\s*\\(%s\\))" %(regexName, searchIssue, int(searchYear) -1)

        regexList=[regEx, regExOne, regExTwo, regExThree, regExFour, regExFive]

        except_list=['releases', 'gold line', 'distribution', '0-day', '0 day']

        for entry in keyPair:
            title = entry['title']
            logger.fdebug("titlesplit: " + str(title.split("\"")))
            splitTitle = title.split("\"")
            noYear = 'False'
            _digits = re.compile('\d')

            for subs in splitTitle:
                logger.fdebug('sub:' + subs)
                regExCount = 0
                if len(subs) >= len(cName) and not any(d in subs.lower() for d in except_list) and bool(_digits.search(subs)) is True:
                #Looping through dictionary to run each regEx - length + regex is determined by regexList up top.
#                while regExCount < len(regexList):
#                    regExTest = re.findall(regexList[regExCount], subs, flags=re.IGNORECASE)
#                    regExCount = regExCount +1
#                    if regExTest:
#                        logger.fdebug(title)
#                        entries.append({
#                                  'title':   subs,
#                                  'link':    str(link)
#                                  })
                    # this will still match on crap like 'For SomeSomayes' especially if the series length < 'For SomeSomayes'
                    if subs.lower().startswith('for'):
                        if cName.lower().startswith('for'):
                            pass
                        else:
                            #this is the crap we ignore. Continue (commented else, as it spams the logs)
                            #logger.fdebug('this starts with FOR : ' + str(subs) + '. This is not present in the series - ignoring.')
                            continue
                    logger.fdebug('match.')
                    if IssDateFix != "no":
                        if IssDateFix == "01" or IssDateFix == "02": ComicYearFix = str(int(searchYear) - 1)
                        else: ComicYearFix = str(int(searchYear) + 1)
                    else:
                        ComicYearFix = searchYear

                    if searchYear not in subs and ComicYearFix not in subs:
                        noYear = 'True'
                        noYearline = subs

                    if (searchYear in subs or ComicYearFix in subs) and noYear == 'True':
                        #this would occur on the next check in the line, if year exists and
                        #the noYear check in the first check came back valid append it
                        subs = noYearline + ' (' + searchYear + ')'
                        noYear = 'False'

                    if noYear == 'False':

                        entries.append({
                                  'title':     subs,
                                  'link':      entry['link'],
                                  'pubdate':   entry['pubdate'],
                                  'length':    entry['length']
                                  })
                        break  # break out so we don't write more shit.

#    if len(entries) >= 1:
    if tallycount >= 1:
        mres['entries'] = entries
        return mres
    else:
        logger.fdebug("No Results Found")
        return "no results"