mylar/mylar/wwt.py

187 lines
7.3 KiB
Python
Raw Normal View History

IMP: Added Choose specific Download option to manually select from result list on Upcoming/Details/Weekly tabs, IMP: Added Wanted storyarcs to the overall Wanted section so now will search as per global options (storyarc issues can be displayed optionally on Wanted tab), IMP: Added custom url option for image banners/posters for storyarcs, IMP: updated Cherrypy, FIX: Fixed ComicRN not working when forms authentication used - in conjunction updated autoProcessComics to 2.0 which now uses apikey instead of user/pass, IMP: Alternate Series Covers option for alternating existing series image to the most current issue image, IMP: Added overall series total to series page for reference, IMP: Search workflow completely changed to accomodate more than one indexer - now will simultaneously sumbit initial request to each provider, wait 30s submit additional as required at 30s intervals, FIX: Removed TPSE as an option and relabelled to just Public Torrents, IMP: Added direct backlog search to WWT option (pack support will follow), FIX: Removed line about configparser being required for ComicTagger usage, IMP: Test code in place for newzab testing, FIX: Fixed layout problem with torrents that are in auto-snatch status on weekly tab, IMP: backend code improvements to allow for better alias usage and annual linking directly from WS, IMP: Updated systemd init-scripts with read.me, IMP: When post-processing, will now check for available destination free space before actually moving files, IMP: Will copy during metatagging to cache folder instead of move being an option so cleanup is cleaner if something fails, FIX: Changed readinglist table to storyarcs for clarity, IMP: When post-processing issues, will now only update the one issue status and adjust totals accordingly (instead of doing a complete rescan of the series), FIX: Clear out empty ID's from the Failed DB on startup, IMP: Initial code-run at REST API interface, FIX: Fixed some matching problems with 32p due to case, IMP: removed apikeys from log entries that were accidentally logging, IMP: When searching 32p, if items get packed up - will now delete the cached reference so new items of the same can be located, IMP: ForceSearch option switched to scheduler so simultaneous runs should not occur, FIX: Fixed manual metatagging error that would occur if multiple destination directories existed
2018-02-16 19:57:01 +00:00
#!/usr/bin/env python
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
import lib.requests as requests
from bs4 import BeautifulSoup, UnicodeDammit
import urlparse
import re
import time
import sys
import datetime
from datetime import timedelta
import mylar
from mylar import logger, helpers
class wwt(object):
def __init__(self, name, issue):
self.url = 'https://worldwidetorrents.me/'
self.query = name + ' ' + str(int(issue)) #'Batman White Knight'
logger.info('query set to : %s' % self.query)
pass
def wwt_connect(self):
resultlist = None
params = {'c50': 1,
'search': self.query,
'cat': 132,
'incldead': 0,
'lang': 0}
with requests.Session() as s:
newurl = self.url + 'torrents-search.php'
r = s.get(newurl, params=params, verify=True)
if not r.status_code == 200:
return
logger.info('status code: %s' % r.status_code)
soup = BeautifulSoup(r.content, "html5lib")
resultpages = soup.find("p", {"align": "center"})
try:
pagelist = resultpages.findAll("a")
except:
logger.info('No results found for %s' % self.query)
return
IMP: Added Choose specific Download option to manually select from result list on Upcoming/Details/Weekly tabs, IMP: Added Wanted storyarcs to the overall Wanted section so now will search as per global options (storyarc issues can be displayed optionally on Wanted tab), IMP: Added custom url option for image banners/posters for storyarcs, IMP: updated Cherrypy, FIX: Fixed ComicRN not working when forms authentication used - in conjunction updated autoProcessComics to 2.0 which now uses apikey instead of user/pass, IMP: Alternate Series Covers option for alternating existing series image to the most current issue image, IMP: Added overall series total to series page for reference, IMP: Search workflow completely changed to accomodate more than one indexer - now will simultaneously sumbit initial request to each provider, wait 30s submit additional as required at 30s intervals, FIX: Removed TPSE as an option and relabelled to just Public Torrents, IMP: Added direct backlog search to WWT option (pack support will follow), FIX: Removed line about configparser being required for ComicTagger usage, IMP: Test code in place for newzab testing, FIX: Fixed layout problem with torrents that are in auto-snatch status on weekly tab, IMP: backend code improvements to allow for better alias usage and annual linking directly from WS, IMP: Updated systemd init-scripts with read.me, IMP: When post-processing, will now check for available destination free space before actually moving files, IMP: Will copy during metatagging to cache folder instead of move being an option so cleanup is cleaner if something fails, FIX: Changed readinglist table to storyarcs for clarity, IMP: When post-processing issues, will now only update the one issue status and adjust totals accordingly (instead of doing a complete rescan of the series), FIX: Clear out empty ID's from the Failed DB on startup, IMP: Initial code-run at REST API interface, FIX: Fixed some matching problems with 32p due to case, IMP: removed apikeys from log entries that were accidentally logging, IMP: When searching 32p, if items get packed up - will now delete the cached reference so new items of the same can be located, IMP: ForceSearch option switched to scheduler so simultaneous runs should not occur, FIX: Fixed manual metatagging error that would occur if multiple destination directories existed
2018-02-16 19:57:01 +00:00
pages = []
for p in pagelist:
if p['href'] not in pages:
logger.fdebug('page: %s' % p['href'])
pages.append(p['href'])
logger.fdebug('pages: %s' % (len(pages) + 1))
resultlist = self.wwt_data(soup)
if pages:
for p in pages:
time.sleep(5) #5s delay btwn requests
newurl = self.url + str(p)
r = s.get(newurl, params=params, verify=True)
if not r.status_code == 200:
continue
soup = BeautifulSoup(r.content, "html5lib")
resultlist += self.wwt_data(soup)
logger.fdebug('%s results: %s' % (len(resultlist), resultlist))
res = {}
if len(resultlist) >= 1:
res['entries'] = resultlist
return res
def wwt_data(self, data):
resultw = data.find("table", {"class": "w3-table w3-striped w3-bordered w3-card-4"})
resultp = resultw.findAll("tr")
#final = []
results = []
for res in resultp:
if res.findNext(text=True) == 'Torrents Name':
continue
title = res.find('a')
torrent = title['title']
try:
for link in res.find_all('a', href=True):
if link['href'].startswith('download.php'):
linkurl = urlparse.parse_qs(urlparse.urlparse(link['href']).query)['id']
#results = {'torrent': torrent,
# 'link': link['href']}
break
for td in res.findAll('td'):
try:
seed = td.find("font", {"color": "green"})
leech = td.find("font", {"color": "#ff0000"})
value = td.findNext(text=True)
if any(['MB' in value, 'GB' in value]):
if 'MB' in value:
szform = 'MB'
sz = 'M'
else:
szform = 'GB'
sz = 'G'
size = helpers.human2bytes(str(re.sub(szform, '', value)).strip() + sz)
elif seed is not None:
seeders = value
#results['seeders'] = seeders
elif leech is not None:
leechers = value
#results['leechers'] = leechers
else:
age = value
#results['age'] = age
except Exception as e:
logger.warn('exception: %s' % e)
logger.info('age: %s' % age)
results.append({'title': torrent,
'link': ''.join(linkurl),
'pubdate': self.string_to_delta(age),
'size': size,
'site': 'WWT'})
logger.info('results: %s' % results)
except Exception as e:
logger.warn('Error: %s' % e)
continue
#else:
# final.append(results)
return results
def string_to_delta(self, relative):
#using simplistic year (no leap months are 30 days long.
#WARNING: 12 months != 1 year
logger.info('trying to remap date from %s' % relative)
unit_mapping = [('mic', 'microseconds', 1),
('millis', 'microseconds', 1000),
('sec', 'seconds', 1),
('mins', 'seconds', 60),
('hrs', 'seconds', 3600),
('day', 'days', 1),
('wk', 'days', 7),
('mon', 'days', 30),
('year', 'days', 365)]
try:
tokens = relative.lower().split(' ')
past = False
if tokens[-1] == 'ago':
past = True
tokens = tokens[:-1]
elif tokens[0] == 'in':
tokens = tokens[1:]
units = dict(days = 0, seconds = 0, microseconds = 0)
#we should always get pairs, if not we let this die and throw an exception
while len(tokens) > 0:
value = tokens.pop(0)
if value == 'and': #just skip this token
continue
else:
value = float(value)
unit = tokens.pop(0)
for match, time_unit, time_constant in unit_mapping:
if unit.startswith(match):
units[time_unit] += value * time_constant
#print datetime.timedelta(**units), past
val = datetime.datetime.now() - datetime.timedelta(**units)
return datetime.datetime.strftime(val, '%a, %d %b %Y %H:%M:%S')
except Exception as e:
raise ValueError("Don't know how to parse %s: %s" % (relative, e))