mirror of https://github.com/evilhero/mylar
191 lines
7.5 KiB
Python
Executable File
191 lines
7.5 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# This file is part of Mylar.
|
|
#
|
|
# Mylar is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Mylar is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup, UnicodeDammit
|
|
import urlparse
|
|
import re
|
|
import time
|
|
import sys
|
|
import datetime
|
|
from datetime import timedelta
|
|
import lib.cfscrape as cfscrape
|
|
|
|
import mylar
|
|
from mylar import logger, helpers
|
|
|
|
class wwt(object):
|
|
|
|
def __init__(self, name, issue):
|
|
self.url = mylar.WWTURL
|
|
self.query = name + ' ' + str(int(issue)) #'Batman White Knight'
|
|
logger.info('query set to : %s' % self.query)
|
|
pass
|
|
|
|
def wwt_connect(self):
|
|
resultlist = None
|
|
params = {'c50': 1,
|
|
'search': self.query,
|
|
'cat': 132,
|
|
'incldead': 0,
|
|
'lang': 0}
|
|
|
|
with cfscrape.create_scraper() as s:
|
|
newurl = self.url + 'torrents-search.php'
|
|
if mylar.WWT_CF_COOKIEVALUE is None:
|
|
cf_cookievalue, cf_user_agent = s.get_tokens(newurl, user_agent=mylar.CV_HEADERS['User-Agent'])
|
|
mylar.WWT_CF_COOKIEVALUE = cf_cookievalue
|
|
|
|
r = s.get(newurl, params=params, verify=True, cookies=mylar.WWT_CF_COOKIEVALUE, headers=mylar.CV_HEADERS)
|
|
|
|
if not r.status_code == 200:
|
|
return
|
|
logger.info('status code: %s' % r.status_code)
|
|
soup = BeautifulSoup(r.content, "html5lib")
|
|
|
|
resultpages = soup.find("p", {"align": "center"})
|
|
try:
|
|
pagelist = resultpages.findAll("a")
|
|
except:
|
|
logger.info('No results found for %s' % self.query)
|
|
return
|
|
|
|
pages = []
|
|
for p in pagelist:
|
|
if p['href'] not in pages:
|
|
logger.fdebug('page: %s' % p['href'])
|
|
pages.append(p['href'])
|
|
logger.fdebug('pages: %s' % (len(pages) + 1))
|
|
|
|
resultlist = self.wwt_data(soup)
|
|
if pages:
|
|
for p in pages:
|
|
time.sleep(5) #5s delay btwn requests
|
|
newurl = self.url + str(p)
|
|
r = s.get(newurl, params=params, verify=True)
|
|
if not r.status_code == 200:
|
|
continue
|
|
soup = BeautifulSoup(r.content, "html5lib")
|
|
resultlist += self.wwt_data(soup)
|
|
|
|
logger.fdebug('%s results: %s' % (len(resultlist), resultlist))
|
|
|
|
res = {}
|
|
if len(resultlist) >= 1:
|
|
res['entries'] = resultlist
|
|
return res
|
|
|
|
def wwt_data(self, data):
|
|
|
|
resultw = data.find("table", {"class": "w3-table w3-striped w3-bordered w3-card-4"})
|
|
resultp = resultw.findAll("tr")
|
|
|
|
#final = []
|
|
results = []
|
|
for res in resultp:
|
|
if res.findNext(text=True) == 'Torrents Name':
|
|
continue
|
|
title = res.find('a')
|
|
torrent = title['title']
|
|
try:
|
|
for link in res.find_all('a', href=True):
|
|
if link['href'].startswith('download.php'):
|
|
linkurl = urlparse.parse_qs(urlparse.urlparse(link['href']).query)['id']
|
|
#results = {'torrent': torrent,
|
|
# 'link': link['href']}
|
|
break
|
|
for td in res.findAll('td'):
|
|
try:
|
|
seed = td.find("font", {"color": "green"})
|
|
leech = td.find("font", {"color": "#ff0000"})
|
|
value = td.findNext(text=True)
|
|
if any(['MB' in value, 'GB' in value]):
|
|
if 'MB' in value:
|
|
szform = 'MB'
|
|
sz = 'M'
|
|
else:
|
|
szform = 'GB'
|
|
sz = 'G'
|
|
size = helpers.human2bytes(str(re.sub(szform, '', value)).strip() + sz)
|
|
elif seed is not None:
|
|
seeders = value
|
|
#results['seeders'] = seeders
|
|
elif leech is not None:
|
|
leechers = value
|
|
#results['leechers'] = leechers
|
|
else:
|
|
age = value
|
|
#results['age'] = age
|
|
except Exception as e:
|
|
logger.warn('exception: %s' % e)
|
|
|
|
logger.info('age: %s' % age)
|
|
results.append({'title': torrent,
|
|
'link': ''.join(linkurl),
|
|
'pubdate': self.string_to_delta(age),
|
|
'size': size,
|
|
'site': 'WWT'})
|
|
logger.info('results: %s' % results)
|
|
except Exception as e:
|
|
logger.warn('Error: %s' % e)
|
|
continue
|
|
#else:
|
|
# final.append(results)
|
|
|
|
return results
|
|
|
|
def string_to_delta(self, relative):
|
|
#using simplistic year (no leap months are 30 days long.
|
|
#WARNING: 12 months != 1 year
|
|
logger.info('trying to remap date from %s' % relative)
|
|
unit_mapping = [('mic', 'microseconds', 1),
|
|
('millis', 'microseconds', 1000),
|
|
('sec', 'seconds', 1),
|
|
('mins', 'seconds', 60),
|
|
('hrs', 'seconds', 3600),
|
|
('day', 'days', 1),
|
|
('wk', 'days', 7),
|
|
('mon', 'days', 30),
|
|
('year', 'days', 365)]
|
|
try:
|
|
tokens = relative.lower().split(' ')
|
|
past = False
|
|
if tokens[-1] == 'ago':
|
|
past = True
|
|
tokens = tokens[:-1]
|
|
elif tokens[0] == 'in':
|
|
tokens = tokens[1:]
|
|
|
|
units = dict(days = 0, seconds = 0, microseconds = 0)
|
|
#we should always get pairs, if not we let this die and throw an exception
|
|
while len(tokens) > 0:
|
|
value = tokens.pop(0)
|
|
if value == 'and': #just skip this token
|
|
continue
|
|
else:
|
|
value = float(value)
|
|
|
|
unit = tokens.pop(0)
|
|
for match, time_unit, time_constant in unit_mapping:
|
|
if unit.startswith(match):
|
|
units[time_unit] += value * time_constant
|
|
#print datetime.timedelta(**units), past
|
|
val = datetime.datetime.now() - datetime.timedelta(**units)
|
|
return datetime.datetime.strftime(val, '%a, %d %b %Y %H:%M:%S')
|
|
except Exception as e:
|
|
raise ValueError("Don't know how to parse %s: %s" % (relative, e))
|
|
|