mylar/mylar/rsscheck.py

#!/usr/bin/python

import os, sys
import re
import lib.feedparser as feedparser
import urllib2
import ftpsshup
import datetime

import mylar
from mylar import db, logger, ftpsshup, helpers

def tehMain():
    logger.info('RSS Feed Check was last run at : ' + str(mylar.RSS_LASTRUN))
    firstrun = "no"
    #check the last run of rss to make sure it's not hammering.
    if mylar.RSS_LASTRUN is None or mylar.RSS_LASTRUN == '' or mylar.RSS_LASTRUN == '0':
        logger.info('RSS Feed Check First Ever Run.')
        firstrun = "yes"
        mins = 0
    else:
        c_obj_date = datetime.datetime.strptime(mylar.RSS_LASTRUN, "%Y-%m-%d %H:%M:%S")
        n_date = datetime.datetime.now()
        absdiff = abs(n_date - c_obj_date)
        mins = (absdiff.days * 24 * 60 * 60 + absdiff.seconds) / 60.0  #3600 is for hours.

    if firstrun == "no" and mins < int(mylar.RSS_CHECKINTERVAL):
        logger.fdebug('RSS Check has taken place less than the threshold - not initiating at this time.')
        return

    mylar.RSS_LASTRUN = helpers.now()
    logger.fdebug('Updating RSS Run time to : ' + str(mylar.RSS_LASTRUN))
    mylar.config_write()

    #function for looping through nzbs/torrent feeds
    if mylar.ENABLE_TORRENTS:
        logger.fdebug("[RSS] Initiating Torrent RSS Check.")
        if mylar.ENABLE_KAT:
            logger.fdebug('[RSS] Initiating Torrent RSS Feed Check on KAT.')
            torrents(pickfeed='3')
        if mylar.ENABLE_CBT:
            logger.fdebug('[RSS] Initiating Torrent RSS Feed Check on CBT.')
            torrents(pickfeed='1')
            torrents(pickfeed='4')
    logger.fdebug('RSS] Initiating RSS Feed Check for NZB Providers.')
    nzbs()    
    logger.fdebug('[RSS] RSS Feed Check/Update Complete')
    logger.fdebug('[RSS] Watchlist Check for new Releases')
    #if mylar.ENABLE_TORRENTS:
    #    if mylar.ENABLE_KAT:
    #        search.searchforissue(rsscheck='yes')
    #    if mylar.ENABLE_CBT:    
    mylar.search.searchforissue(rsscheck='yes')
    #nzbcheck here
    #nzbs(rsscheck='yes')
    logger.fdebug('[RSS] Watchlist Check complete.')
    return

def torrents(pickfeed=None,seriesname=None,issue=None):
    if pickfeed is None:
        pickfeed = 1
    #else:
    #    print "pickfeed is " + str(pickfeed)
    passkey = mylar.CBT_PASSKEY 
    srchterm = None

    if seriesname:
        srchterm = re.sub(' ', '%20', seriesname)
    if issue:
        srchterm += ' ' + str(issue)

    if pickfeed == "1":      # comicbt rss feed based on followlist
        feed = "http://comicbt.com/rss.php?action=browse&passkey=" + str(passkey) + "&type=dl"
    elif pickfeed == "2" and srchterm is not None:    # kat.ph search
        feed = "http://kat.ph/usearch/" + str(srchterm) + "%20category%3Acomics%20seeds%3A1/?rss=1"
    elif pickfeed == "3":    # kat.ph rss feed
        feed = "http://kat.ph/usearch/category%3Acomics%20seeds%3A1/?rss=1"
    elif pickfeed == "4":    #comicbt follow link
        feed = "http://comicbt.com/rss.php?action=follow&passkey=" + str(passkey) + "&type=dl"
    elif pickfeed == "5":    # comicbt series link
#       seriespage = "http://comicbt.com/series.php?passkey=" + str(passkey)
        feed = "http://comicbt.com/rss.php?action=series&series=" + str(seriesno) + "&passkey=" + str(passkey)
    else:
        logger.error('invalid pickfeed denoted...')
        return
    logger.fdebug('feed #' + str(pickfeed) + ' chosen: ' + str(feed))
    title = []
    link = []
    description = []
    seriestitle = []

    if pickfeed == "5": # we need to get the series # first
        seriesSearch(seriespage, seriesname)
    feedme = feedparser.parse(feed)
    i = 0

    feeddata = []

    myDB = db.DBConnection()
    torthekat = []
    katinfo = {}

    for entry in feedme['entries']:
        if pickfeed == "3":
            tmpsz = feedme.entries[i].enclosures[0]
            feeddata.append({
                           'Site':     'KAT',
                           'Title':    feedme.entries[i].title,
                           'Link':     tmpsz['url'],
                           'Pubdate':  feedme.entries[i].updated,
                           'Size':     tmpsz['length']
                           })

        elif pickfeed == "2":
            tmpsz = feedme.entries[i].enclosures[0]
            torthekat.append({
                          'title':   feedme.entries[i].title,
                          'link':    tmpsz['url'],
                          'pubdate': feedme.entries[i].updated,
                          'site':    'KAT',
                          'length':  tmpsz['length']
                          })
            #print ("Site: KAT")
            #print ("Title: " + str(feedme.entries[i].title))
            #print ("Link: " + str(tmpsz['url']))
            #print ("pubdate: " + str(feedme.entries[i].updated))
            #print ("size: " + str(tmpsz['length']))

        elif pickfeed == "1" or pickfeed == "4":
#            tmpsz = feedme.entries[i].enclosures[0]
            feeddata.append({
                           'Site':     'comicBT',
                           'Title':    feedme.entries[i].title, 
                           'Link':     feedme.entries[i].link,
                           'Pubdate':  feedme.entries[i].updated
#                          'Size':     tmpsz['length']
                           })
            #print ("Site: ComicBT")
            #print ("Title: " + str(feeddata[i]['Title']))
            #print ("Link: " + str(feeddata[i]['Link']))
            #print ("pubdate: " + str(feeddata[i]['Pubdate']))
        i+=1
    logger.fdebug('there were ' + str(i) + ' results..')
    if not seriesname:
        rssdbupdate(feeddata,i,'torrent')
    else:
        katinfo['entries'] = torthekat
        return katinfo
    return

def nzbs(provider=None):
    nzbprovider = []
    nzbp = 0
    if mylar.NZBSU == 1:
        nzbprovider.append('nzb.su')
        nzbp+=1
    if mylar.DOGNZB == 1:
        nzbprovider.append('dognzb')
        nzbp+=1
    # --------
    #  Xperimental
    if mylar.EXPERIMENTAL == 1:
        nzbprovider.append('experimental')
        nzbp+=1

    newznabs = 0

    newznab_hosts = []

    if mylar.NEWZNAB == 1:

        for newznab_host in mylar.EXTRA_NEWZNABS:
            if newznab_host[4] == '1' or newznab_host[4] == 1:
                newznab_hosts.append(newznab_host)
                nzbprovider.append('newznab')
                newznabs+=1
                logger.fdebug('newznab name:' + str(newznab_host[0]) + ' - enabled: ' + str(newznab_host[4]))

    # --------
    providercount = int(nzbp + newznabs)
    logger.fdebug('there are : ' + str(providercount) + ' RSS search providers you have enabled.')
    nzbpr = providercount - 1
    if nzbpr < 0:
        nzbpr == 0

    feeddata = []
    feedthis = []
    ft = 0
    totNum = 0
    nonexp = "no"
   
    while (nzbpr >= 0 ):
        if nzbprovider[nzbpr] == 'experimental':
            feed = feedparser.parse("http://nzbindex.nl/rss/alt.binaries.comics.dcp/?sort=agedesc&max=50&more=1")

            totNum = len(feed.entries)
            site = 'experimental'
            keyPair = {}
            regList = []
            entries = []
            mres = {}
            countUp = 0

            i = 0
            for entry in feed['entries']:
                tmpsz = feed.entries[i].enclosures[0]
                feeddata.append({
                               'Site':     site,
                               'Title':    feed.entries[i].title,
                               'Link':     feed.entries[i].link,
                               'Pubdate':  feed.entries[i].updated,
                               'Size':     tmpsz['length']
                               })
#                print ("Site:" + str(site))
#                print ("Title:" + str(feed.entries[i].title))
#                print ("Link:" + str(feed.entries[i].link))
#                print ("Pubdate:" + str(feed.entries[i].updated))
#                print ("Size:" + str(tmpsz['length']))
                i+=1
            logger.info(str(i) + ' results from Experimental feed indexed.')
            nzbpr-=1
        else:
            if nzbprovider[nzbpr] == 'newznab':
                for newznab_host in newznab_hosts:
                    if newznab_host[3] is None:
                        newznabuid = '1'
                    else:
                        newznabuid = newznab_host[3]
                    feed = newznab_host[1].rstrip() + '/rss?t=7030&dl=1&i=' + str(newznabuid) + '&r=' + newznab_host[2].rstrip()
                    feedme = feedparser.parse(feed)
                    site = newznab_host[0].rstrip()
                    feedthis.append({"feed":     feedme,
                                     "site":     site})
                    totNum+=len(feedme.entries)
                    ft+=1
                    nonexp = "yes"
                    nzbpr-=1
            elif nzbprovider[nzbpr] == 'nzb.su':
                if mylar.NZBSU_UID is None:
                    mylar.NZBSU_UID = '1'
                feed = 'http://nzb.su/rss?t=7030&dl=1&i=' + mylar.NZBSU_UID + '&r=' + mylar.NZBSU_APIKEY
                feedme = feedparser.parse(feed)
                site = nzbprovider[nzbpr]
                feedthis.append({"feed":   feedme,
                                 "site":   site })
                totNum+=len(feedme.entries)
                ft+=1
                nonexp = "yes"
                nzbpr-=1
            elif nzbprovider[nzbpr] == 'dognzb':
                if mylar.DOGNZB_UID is None:
                    mylar.DOGNZB_UID = '1'
                feed = 'http://dognzb.cr/rss?t=7030&dl=1&i=' + mylar.DOGNZB_UID + '&r=' + mylar.DOGNZB_APIKEY
                feedme = feedparser.parse(feed)
                site = nzbprovider[nzbpr]
                ft+=1
                nonexp = "yes"
                feedthis.append({"feed":   feedme,
                                 "site":   site })
                totNum+=len(feedme.entries)
                nzbpr-=1

    i = 0
    if nonexp == "yes":
        #print str(ft) + " sites checked. There are " + str(totNum) + " entries to be updated."
        #print feedme
        #i = 0

        for ft in feedthis:
            site = ft['site']
            #print str(site) + " now being updated..."
            for entry in ft['feed'].entries:
                #print "entry: " + str(entry)
                tmpsz = entry.enclosures[0]
                feeddata.append({
                           'Site':     site,
                           'Title':    entry.title,
                           'Link':     entry.link,
                           'Pubdate':  entry.updated,
                           'Size':     tmpsz['length']
                           })

#               print ("Site: " + str(feeddata[i]['Site']))
#               print ("Title: " + str(feeddata[i]['Title']))
#               print ("Link: " + str(feeddata[i]['Link']))
#               print ("pubdate: " + str(feeddata[i]['Pubdate']))
#               print ("size: " + str(feeddata[i]['Size']))
                i+=1
            logger.info(str(site) + ' : ' + str(i) + ' entries indexed.')

    rssdbupdate(feeddata,i,'usenet')
    return

def rssdbupdate(feeddata,i,type):
    rsschktime = 15
    myDB = db.DBConnection()

    #let's add the entries into the db so as to save on searches
    #also to build up the ID's ;)
    x = 1
    while x <= i:
        try:
            dataval = feeddata[x]
        except IndexError:
            logger.fdebug('reached the end of populating. Exiting the process.')
            break
        #print "populating : " + str(dataval)
        #remove passkey so it doesn't end up in db
        if type == 'torrent':
            newlink = dataval['Link'][:(dataval['Link'].find('&passkey'))]
            newVal = {"Link":      newlink,
                      "Pubdate":   dataval['Pubdate'],
                      "Site":      dataval['Site']}
        else:
            newlink = dataval['Link']
            newVal = {"Link":      newlink,
                      "Pubdate":   dataval['Pubdate'],
                      "Site":      dataval['Site'],
                      "Size":      dataval['Size']}

        ctrlVal = {"Title":    dataval['Title']}

        myDB.upsert("rssdb", newVal,ctrlVal)

        x+=1

    logger.fdebug('Completed adding new data to RSS DB. Next add in ' + str(mylar.RSS_CHECKINTERVAL) + ' minutes')
    return

def torrentdbsearch(seriesname,issue,comicid=None,nzbprov=None):
    myDB = db.DBConnection()
    seriesname_alt = None
    print "seriesname:" + str(seriesname)
    if comicid is None or comicid == 'None':
        pass
    else:
        logger.fdebug('ComicID: ' + str(comicid))
        snm = myDB.action("SELECT * FROM comics WHERE comicid=?", [comicid]).fetchone()
        if snm is None:
            logger.fdebug('Invalid ComicID of ' + str(comicid) + '. Aborting search.')
            return
        else:
            seriesname = snm['ComicName']
            seriesname_alt = snm['AlternateSearch']


    #remove 'and' and 'the':
    tsearch_rem1 = re.sub("\\band\\b", "%", seriesname.lower())
    tsearch_rem2 = re.sub("\\bthe\\b", "%", tsearch_rem1.lower())    
    tsearch_removed = re.sub('\s+', ' ', tsearch_rem2)
    tsearch_seriesname = re.sub('[\'\!\@\#\$\%\:\-\;\/\\=\?\&\.\s]', '%',tsearch_removed)
    tsearch = tsearch_seriesname + "%"
    logger.fdebug('tsearch : ' + str(tsearch))
    AS_Alt = []
    tresults = []

    if mylar.ENABLE_CBT:
        tresults = myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site='comicBT'", [tsearch]).fetchall()
    if mylar.ENABLE_KAT:
        tresults += myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site='KAT'", [tsearch]).fetchall()

    logger.fdebug('seriesname_alt:' + str(seriesname_alt))
    if seriesname_alt is None or seriesname_alt == 'None':
        if tresults is None:
            logger.fdebug('no Alternate name given. Aborting search.')
            return "no results"
    else:
        chkthealt = seriesname_alt.split('##')
        if chkthealt == 0:
            AS_Alternate = seriesname_alt
            AS_Alt.append(seriesname_alt)
        for calt in chkthealt:
            AS_Alter = re.sub('##','',calt)
            u_altsearchcomic = AS_Alter.encode('ascii', 'ignore').strip()
            AS_Altrem = re.sub("\\band\\b", "", u_altsearchcomic.lower())
            AS_Altrem = re.sub("\\bthe\\b", "", AS_Altrem.lower())

            AS_Alternate = re.sub('[\_\#\,\/\:\;\.\-\!\$\%\+\'\&\?\@\s]', '%', AS_Altrem)
            AS_Alt.append(AS_Alternate)
            AS_Alternate += '%'

            if mylar.ENABLE_CBT:
                #print "AS_Alternate:" + str(AS_Alternate)
                tresults += myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site='comicBT'", [AS_Alternate]).fetchall()
            if mylar.ENABLE_KAT:
                tresults += myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site='KAT'", [AS_Alternate]).fetchall()

    if tresults is None:
        logger.fdebug('torrent search returned no results for ' + seriesname)
        return "no results"

    extensions = ('cbr', 'cbz')
    tortheinfo = []
    torinfo = {}

    for tor in tresults:
        torsplit = tor['Title'].split('/')
        logger.fdebug('tor-Title: ' + tor['Title'])
        logger.fdebug('there are ' + str(len(torsplit)) + ' sections in this title')
        i=0
        #0 holds the title/issue and format-type.
        while (i < len(torsplit)):
            logger.fdebug('section(' + str(i) + '): ' + str(torsplit[i]))
            i+=1

        seriesname_mod = seriesname
        foundname_mod = torsplit[0]
        seriesname_mod = re.sub("\\band\\b", " ", seriesname_mod.lower())
        foundname_mod = re.sub("\\band\\b", " ", foundname_mod.lower())
        seriesname_mod = re.sub("\\bthe\\b", " ", seriesname_mod.lower())
        foundname_mod = re.sub("\\bthe\\b", " ", foundname_mod.lower())

        seriesname_mod = re.sub('[\&]', ' ', seriesname_mod)
        foundname_mod = re.sub('[\&]', ' ', foundname_mod)

        formatrem_seriesname = re.sub('[\'\!\@\#\$\%\:\;\/\\=\?\.]', '',seriesname_mod)
        formatrem_seriesname = re.sub('\s+', ' ', formatrem_seriesname)
        if formatrem_seriesname[:1] == ' ': formatrem_seriesname = formatrem_seriesname[1:]
        formatrem_torsplit = re.sub('[\'\!\@\#\$\%\:\;\/\\=\?\.]', '',foundname_mod)
        formatrem_torsplit = re.sub('\s+', ' ', formatrem_torsplit)
        logger.fdebug(str(len(formatrem_torsplit)) + ' - formatrem_torsplit : ' + formatrem_torsplit.lower())
        logger.fdebug(str(len(formatrem_seriesname)) + ' - formatrem_seriesname :' + formatrem_seriesname.lower())

        if formatrem_seriesname.lower() in formatrem_torsplit.lower() or any(x.lower() in formatrem_torsplit.lower() for x in AS_Alt):
            logger.fdebug('matched to : ' + tor['Title'])
            logger.fdebug('matched on series title: ' + seriesname)
            titleend = formatrem_torsplit[len(formatrem_seriesname):]
            titleend = re.sub('\-', '', titleend)   #remove the '-' which is unnecessary

            titleend = re.sub('cbr', '', str(titleend)) #remove extensions
            logger.fdebug('titleend: ' + str(titleend))
            sptitle = titleend.split()
            extra = ''
#            for sp in sptitle:
#                if 'v' in sp.lower() and sp[1:].isdigit():
#                    volumeadd = sp
#                elif 'vol' in sp.lower() and sp[3:].isdigit():
#                    volumeadd = sp
#                #if sp.isdigit():
#                    #print("issue # detected : " + str(issue))
#                elif helpers.issuedigits(issue.rstrip()) == helpers.issuedigits(sp.rstrip()):
#                    logger.fdebug("Issue matched for : " + str(issue))
            #the title on CBT has a mix-mash of crap...ignore everything after cbz/cbr to cleanit
            ctitle = tor['Title'].find('cbr')
            if ctitle == 0:
                ctitle = tor['Title'].find('cbz')
            if ctitle == 0:
                logger.fdebug('cannot determine title properly - ignoring for now.')
                continue
            cttitle = tor['Title'][:ctitle]
            #print("change title to : " + str(cttitle))
#           if extra == '':
            tortheinfo.append({
                          'title':   cttitle, #tor['Title'],
                          'link':    tor['Link'],
                          'pubdate': tor['Pubdate'],
                          'site':    tor['Site'],
                          'length':  tor['Size']
                          })
#                    continue
#                        #torsend2client(formatrem_seriesname,tor['Link'])
#                    else:
#                        logger.fdebug("extra info given as :" + str(extra))
#                        logger.fdebug("extra information confirmed as a match")
#                        logger.fdebug("queuing link: " + str(tor['Link']))
#                        tortheinfo.append({
#                                      'title':   cttitle, #tor['Title'],
#                                      'link':    tor['Link'],
#                                      'pubdate': tor['Pubdate'],
#                                      'site':    tor['Site'],
#                                      'length':    tor['Size']
#                                      })
#                        logger.fdebug("entered info.")
#                        continue
                            #torsend2client(formatrem_seriesname,tor['Link'])
                #else:
                #    logger.fdebug("invalid issue#: " + str(sp))
                #    #extra = str(extra) + " " + str(sp) 
#                else:
#                    logger.fdebug("word detected - assuming continuation of title: " + str(sp))
#                    extra = str(extra) + " " + str(sp)

    torinfo['entries'] = tortheinfo

    return torinfo

def nzbdbsearch(seriesname,issue,comicid=None,nzbprov=None):
    myDB = db.DBConnection()
    seriesname_alt = None
    if comicid is None or comicid == 'None':
        pass
    else:
        snm = myDB.action("SELECT * FROM comics WHERE comicid=?", [comicid]).fetchone()
        if snm is None:
            logger.info('Invalid ComicID of ' + str(comicid) + '. Aborting search.')
            return
        else:
            seriesname = snm['ComicName']
            seriesname_alt = snm['AlternateSearch']


    nsearch_seriesname = re.sub('[\'\!\@\#\$\%\:\;\/\\=\?\.\s]', '%',seriesname)
    formatrem_seriesname = re.sub('[\'\!\@\#\$\%\:\;\/\\=\?\.]', '',seriesname)
    nsearch = nsearch_seriesname + "%"
    nresults = myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site != 'comicBT' AND Site != 'KAT'", [nsearch])
    if nresults is None:
        logger.fdebug('nzb search returned no results for ' + seriesname)
        if seriesname_alt is None:
            logger.fdebug('no nzb Alternate name given. Aborting search.')
            return "no results"
        else:
            chkthealt = seriesname_alt.split('##')
            if chkthealt == 0:
                AS_Alternate = AlternateSearch
            for calt in chkthealt:
                AS_Alternate = re.sub('##','',calt)
                nresults += myDB.action("SELECT * FROM rssdb WHERE Title like ? AND Site != 'comicBT' AND Site != 'KAT'", [AS_Alternate])
            if nresults is None:
                logger.fdebug('nzb alternate name search returned no results.')
                return "no results"

    nzbtheinfo = []
    nzbinfo = {}

    for nzb in nresults:
        # no need to parse here, just compile and throw it back ....
        nzbtheinfo.append({
                         'title':   nzb['Title'],
                         'link':    nzb['Link'],
                         'pubdate': nzb['Pubdate'],
                         'site':    nzb['Site'],
                         'length':    nzb['Size']
                         })
        logger.fdebug("entered info for " + nzb['Title'])

    nzbinfo['entries'] = nzbtheinfo
    return nzbinfo
             
def torsend2client(seriesname, linkit, site):
    logger.info('matched on ' + str(seriesname))
    filename = re.sub('[\'\!\@\#\$\%\:\;\/\\=\?\.]', '',seriesname)
    if site == 'ComicBT':
        logger.info(linkit)
        linkit = str(linkit) + '&passkey=' + str(mylar.CBT_PASSKEY)

    if linkit[-7:] != "torrent":
        filename += ".torrent"

    request = urllib2.Request(linkit)
    request.add_header('User-Agent', str(mylar.USER_AGENT))
    if mylar.TORRENT_LOCAL and mylar.LOCAL_WATCHDIR is not None:
        filepath = os.path.join(mylar.LOCAL_WATCHDIR, filename)
        logger.fdebug('filename for torrent set to : ' + filepath)
    elif mylar.TORRENT_SEEDBOX and mylar.SEEDBOX_WATCHDIR is not None:
        filepath = os.path.join(mylar.CACHE_DIR, filename)
        logger.fdebug('filename for torrent set to : ' + filepath)
    else:
        logger.error('No Local Watch Directory or Seedbox Watch Directory specified. Set it and try again.')
        return "fail"

    try:
        opener = helpers.urlretrieve(urllib2.urlopen(request), filepath)
    except Exception, e:
        logger.warn('Error fetching data from %s: %s' % (site, e))
        return "fail"

    logger.fdebug('torrent file saved as : ' + str(filepath))
    if mylar.TORRENT_LOCAL:
        return "pass"
    #remote_file = urllib2.urlopen(linkit)
    #if linkit[-7:] != "torrent":
    #    filename += ".torrent"

    #local_file = open('%s' % (os.path.join(mylar.CACHE_DIR,filename)), 'w')
    #local_file.write(remote_file.read())
    #local_file.close()
    #remote_file.close()
    elif mylar.TORRENT_SEEDBOX:
        tssh = ftpsshup.putfile(filepath,filename)
        return tssh

if __name__ == '__main__':
    #torrents(sys.argv[1])
    #torrentdbsearch(sys.argv[1], sys.argv[2], sys.argv[3])
    nzbs(sys.argv[1])