mylar/mylar/cv.py

514 lines
22 KiB
Python
Raw Normal View History

# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
2015-05-22 08:32:51 +00:00
# Mylar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
import sys
import os
import re
IMP: Cleaned up interface for StoryArcs / Story Arc Details, IMP: Cleaned up interface for Reading list Management, IMP: Added better reading list management - new status (added, downloaded, read), IMP: Added sync option for use with another device for reading list transfer (ie. tablet) Android only, IMP: Autopopulate new weekly pull releases to reading list, IMP: 'Watch' option in weekly pull list now fully functional. Will watch CV for series' that do not have any series data yet as they are new starting series. Will auto-add once available, IMP: Auto-watch check is run after every refresh/recreate of the weeklypull list, IMP: Improved the Add a Series option where it will now look for issues that are 'new' or 'wanted' during add sequence, IMP: Main page interface now has coloured have/total bars to denote series completion, IMP: New scheduler / threading locks in place in an attempt to avoid database locks, FIX: Removed some erroneous locking that was going on when importing a directory was being run, IMP: Stat counter now present when post-processing multiple issues in sequence, FIX: for issue number error when post-processing and issue number was a non-alphanumeric, FIX: for metatagging: when original file was .cbz, would try to convert and fail, FIX: for issues that were negative and were preceeded by a # in the filename (filechecker), FIX: for publisher having non-alphanumeric character in name when attempting to determine publisher, FIX: if annuals enabled, would incorrectly show as being 'already in library' when viewing search results if results constained annuals, FIX:(#944) for incorrect nzbname being used when post-processing was being performed from an nzb client (experimental mainly), IMP: Turned off logging for ComicVine API counter, FIX: Added retry attempts when connecting to ComicVine in order to avoid errors when adding a series, IMP:(#963) Added ability to add snatched to filter when viewing Wanted issues on Wanted tab, FIX: When importing and then selecting a series to import via the select screen, will now flip back to the importresults and add the selected series in the background, IMP:(#952) Main page is now sorted in ascending order by Continuing/Ended status (and subbed by whether is Active/Paused).Custom sorting is still available, FIX: Dupecheck will now automatically assume existing 0-byte files are to be overwritten when performing post-processing, FIX: If publication date for series contained a '?' (usually with brand new series) will force to 'Present' to allow for pull-list comparisons to take place, FIX: Mylar will now disallow search results which have 'covers only' or 'variant' in the filename, IMP: Better nzbname generation/retrieval (will check inside nzb for possible names) to be used when post-processing, IMP: DB Update will now perform update to all active comics in descending order by Latest Date (instead of random order), FIX: Enforce the 5hr limit rule when running DB update (will only update series that haven't been updated in >5 hours), FIX: Annuals will now have/retain the proper status upon doing DB Update, FIX: Have totals will now be updated when doing a recheck files (sometimes wouldn't get updated depending on various states of status'), FIX:(#966) Added urllib2.URLError exeception trap when attempting to check Git for updates, IMP: Removed the individual sqlite calls for weeklypull, and brought them into line with using the db module (which will minimize concurrent access, which seemed to be causing db locks), IMP: Cleaned up some code and shuffled some functions so they are in more appropriate locations
2015-03-27 17:27:59 +00:00
import time
import logger
import string
import urllib2
import lib.feedparser
import mylar
import platform
from mylar.helpers import cvapi_check
from bs4 import BeautifulSoup as Soup
import httplib
2015-05-22 08:32:51 +00:00
def patch_http_response_read(func):
def inner(*args):
try:
return func(*args)
except httplib.IncompleteRead, e:
return e.partial
return inner
httplib.HTTPResponse.read = patch_http_response_read(httplib.HTTPResponse.read)
if platform.python_version() == '2.7.6':
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
2015-05-22 08:32:51 +00:00
def pulldetails(comicid, type, issueid=None, offset=1, arclist=None, comicidlist=None):
#import easy to use xml parser called minidom:
from xml.dom.minidom import parseString
2014-06-09 08:03:10 +00:00
if mylar.COMICVINE_API == 'None' or mylar.COMICVINE_API is None or mylar.COMICVINE_API == mylar.DEFAULT_CVAPI:
logger.warn('You have not specified your own ComicVine API key - alot of things will be limited. Get your own @ http://api.comicvine.com.')
comicapi = mylar.DEFAULT_CVAPI
else:
comicapi = mylar.COMICVINE_API
if type == 'comic':
if not comicid.startswith('4050-'): comicid = '4050-' + comicid
2015-05-22 08:32:51 +00:00
PULLURL = mylar.CVURL + 'volume/' + str(comicid) + '/?api_key=' + str(comicapi) + '&format=xml&field_list=name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,first_issue,deck,aliases'
elif type == 'issue':
if mylar.CV_ONLY:
cv_type = 'issues'
if arclist is None:
searchset = 'filter=volume:' + str(comicid) + '&field_list=cover_date,description,id,image,issue_number,name,date_last_updated,store_date'
else:
searchset = 'filter=id:' + (arclist) + '&field_list=cover_date,id,issue_number,name,date_last_updated,store_date,volume'
else:
cv_type = 'volume/' + str(comicid)
searchset = 'name,count_of_issues,issues,start_year,site_detail_url,image,publisher,description,store_date'
PULLURL = mylar.CVURL + str(cv_type) + '/?api_key=' + str(comicapi) + '&format=xml&' + str(searchset) + '&offset=' + str(offset)
elif type == 'firstissue':
#this is used ONLY for CV_ONLY
PULLURL = mylar.CVURL + 'issues/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(issueid) + '&field_list=cover_date'
elif type == 'storyarc':
PULLURL = mylar.CVURL + 'story_arcs/?api_key=' + str(comicapi) + '&format=xml&filter=name:' + str(issueid) + '&field_list=cover_date'
elif type == 'comicyears':
PULLURL = mylar.CVURL + 'volumes/?api_key=' + str(comicapi) + '&format=xml&filter=id:' + str(comicidlist) + '&field_list=name,id,start_year,publisher&offset=' + str(offset)
#logger.info('PULLURL: ' + PULLURL)
#CV API Check here.
if mylar.CVAPI_COUNT == 0 or mylar.CVAPI_COUNT >= mylar.CVAPI_MAX:
cvapi_check()
#download the file:
file = urllib2.urlopen(PULLURL)
#increment CV API counter.
2015-05-22 08:32:51 +00:00
mylar.CVAPI_COUNT += 1
#convert to string:
data = file.read()
#close file because we dont need it anymore:
file.close()
#parse the xml you downloaded
dom = parseString(data)
return dom
2015-05-22 08:32:51 +00:00
def getComic(comicid, type, issueid=None, arc=None, arcid=None, arclist=None, comicidlist=None):
if type == 'issue':
offset = 1
issue = {}
ndic = []
issuechoice = []
comicResults = []
firstdate = '2099-00-00'
#let's find out how many results we get from the query...
if comicid is None:
#if comicid is None, it's coming from the story arc search results.
id = arcid
islist = arclist
else:
id = comicid
islist = None
2015-05-22 08:32:51 +00:00
searched = pulldetails(id, 'issue', None, 0, islist)
if searched is None: return False
totalResults = searched.getElementsByTagName('number_of_total_results')[0].firstChild.wholeText
logger.fdebug("there are " + str(totalResults) + " search results...")
if not totalResults:
return False
countResults = 0
while (countResults < int(totalResults)):
logger.fdebug("querying " + str(countResults))
if countResults > 0:
#new api - have to change to page # instead of offset count
offsetcount = countResults
2015-05-22 08:32:51 +00:00
searched = pulldetails(id, 'issue', None, offsetcount, islist)
issuechoice, tmpdate = GetIssuesInfo(id, searched, arcid)
if tmpdate < firstdate:
firstdate = tmpdate
ndic = ndic + issuechoice
#search results are limited to 100 and by pagination now...let's account for this.
countResults = countResults + 100
issue['issuechoice'] = ndic
issue['firstdate'] = firstdate
return issue
elif type == 'comic':
2015-05-22 08:32:51 +00:00
dom = pulldetails(comicid, 'comic', None, 1)
return GetComicInfo(comicid, dom)
elif type == 'firstissue':
dom = pulldetails(comicid, 'firstissue', issueid, 1)
return GetFirstIssue(issueid, dom)
FIX: undefined on filter box on startup, IMP: Added Meta-Tagging options on a series / issue basis on comic details screen, IMP: Issue Information is now available per issue and is extracted currently from the cbz file to display (if no cbz is present, the option isn't available), IMP: Failed Download handling is implemented and available in GUI - required to replace existing autoProcessComics.py and ComicRN.py scripts, IMP: Added ability to specify post-processing script instead of ComicRN.py, IMP: Added abilty to edit the issue date for a given issue by simply clicking on it - this will help to avoid dates that have incorrect or 0000-00-00, IMP: Story Arc searching is working (not adding yet), IMP: Added Archived/Ignored options to Upcoming/Wanted tab, IMP: Fixed some alignment and display issues on the Upcoming section, IMP: Added better directory handling for Updating Comic Location when it needs to get changed for all existing series (locmove in config.ini), IMP: Added better handling for unicode characters in series titles when searching / filechecking, IMP: When adding a new series with no data, Mylar would error out (now will add and just retain 0 issues), FIX: When year was fuzzied, search would still attempt to do a date-check comparison and say everything failed, IMP: Better handling of nzb names when retaining for post-processing comparisons, IMP: Future Upcoming now will use actual shipping date of issue if available in order to see if issue is available, FIX: If annuals were enabled, refreshing a series would put some issues into an Archived status because the actual counts would be off, IMP: When file checking, Alternate Naming would be searched last which resulted in matching incorrectly to the series title or other alternate naming for the given series, now will check the Alternate Naming first for a match, then drop back down to the series name itself otherwise, IMP: Improved Annual detection when integrated with a given series, IMP: Improved the checking of the future Upcoming list for issues marked as Wanted but not available yet and then auto-adding, IMP: Improved upon story arc checking for missing issues / searching for wanted, IMP: Enabling Annuals support now within Configuration GUI, bunch of other things....
2014-07-28 19:28:09 +00:00
elif type == 'storyarc':
2015-05-22 08:32:51 +00:00
dom = pulldetails(arc, 'storyarc', None, 1)
return GetComicInfo(issueid, dom)
elif type == 'comicyears':
#used by the story arc searcher when adding a given arc to poll each ComicID in order to populate the Series Year.
#this grabs each issue based on issueid, and then subsets the comicid for each to be used later.
#set the offset to 0, since we're doing a filter.
2015-05-22 08:32:51 +00:00
dom = pulldetails(arcid, 'comicyears', offset=0, comicidlist=comicidlist)
return GetSeriesYears(dom)
2015-05-22 08:32:51 +00:00
def GetComicInfo(comicid, dom, safechk=None):
IMP: Cleaned up interface for StoryArcs / Story Arc Details, IMP: Cleaned up interface for Reading list Management, IMP: Added better reading list management - new status (added, downloaded, read), IMP: Added sync option for use with another device for reading list transfer (ie. tablet) Android only, IMP: Autopopulate new weekly pull releases to reading list, IMP: 'Watch' option in weekly pull list now fully functional. Will watch CV for series' that do not have any series data yet as they are new starting series. Will auto-add once available, IMP: Auto-watch check is run after every refresh/recreate of the weeklypull list, IMP: Improved the Add a Series option where it will now look for issues that are 'new' or 'wanted' during add sequence, IMP: Main page interface now has coloured have/total bars to denote series completion, IMP: New scheduler / threading locks in place in an attempt to avoid database locks, FIX: Removed some erroneous locking that was going on when importing a directory was being run, IMP: Stat counter now present when post-processing multiple issues in sequence, FIX: for issue number error when post-processing and issue number was a non-alphanumeric, FIX: for metatagging: when original file was .cbz, would try to convert and fail, FIX: for issues that were negative and were preceeded by a # in the filename (filechecker), FIX: for publisher having non-alphanumeric character in name when attempting to determine publisher, FIX: if annuals enabled, would incorrectly show as being 'already in library' when viewing search results if results constained annuals, FIX:(#944) for incorrect nzbname being used when post-processing was being performed from an nzb client (experimental mainly), IMP: Turned off logging for ComicVine API counter, FIX: Added retry attempts when connecting to ComicVine in order to avoid errors when adding a series, IMP:(#963) Added ability to add snatched to filter when viewing Wanted issues on Wanted tab, FIX: When importing and then selecting a series to import via the select screen, will now flip back to the importresults and add the selected series in the background, IMP:(#952) Main page is now sorted in ascending order by Continuing/Ended status (and subbed by whether is Active/Paused).Custom sorting is still available, FIX: Dupecheck will now automatically assume existing 0-byte files are to be overwritten when performing post-processing, FIX: If publication date for series contained a '?' (usually with brand new series) will force to 'Present' to allow for pull-list comparisons to take place, FIX: Mylar will now disallow search results which have 'covers only' or 'variant' in the filename, IMP: Better nzbname generation/retrieval (will check inside nzb for possible names) to be used when post-processing, IMP: DB Update will now perform update to all active comics in descending order by Latest Date (instead of random order), FIX: Enforce the 5hr limit rule when running DB update (will only update series that haven't been updated in >5 hours), FIX: Annuals will now have/retain the proper status upon doing DB Update, FIX: Have totals will now be updated when doing a recheck files (sometimes wouldn't get updated depending on various states of status'), FIX:(#966) Added urllib2.URLError exeception trap when attempting to check Git for updates, IMP: Removed the individual sqlite calls for weeklypull, and brought them into line with using the db module (which will minimize concurrent access, which seemed to be causing db locks), IMP: Cleaned up some code and shuffled some functions so they are in more appropriate locations
2015-03-27 17:27:59 +00:00
if safechk is None:
#safetycheck when checking comicvine. If it times out, increment the chk on retry attempts up until 5 tries then abort.
safechk = 1
elif safechk > 4:
logger.error('Unable to add / refresh the series due to inablity to retrieve data from ComicVine. You might want to try abit later and/or make sure ComicVine is up.')
return
#comicvine isn't as up-to-date with issue counts..
#so this can get really buggered, really fast.
tracks = dom.getElementsByTagName('issue')
try:
cntit = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
except:
cntit = len(tracks)
trackcnt = len(tracks)
logger.fdebug("number of issues I counted: " + str(trackcnt))
logger.fdebug("number of issues CV says it has: " + str(cntit))
# if the two don't match, use trackcnt as count_of_issues might be not upto-date for some reason
if int(trackcnt) != int(cntit):
cntit = trackcnt
vari = "yes"
else: vari = "no"
logger.fdebug("vari is set to: " + str(vari))
#if str(trackcnt) != str(int(cntit)+2):
# cntit = int(cntit) + 1
comic = {}
comicchoice = []
cntit = int(cntit)
#retrieve the first xml tag (<tag>data</tag>)
#that the parser finds with name tagName:
# to return the parent name of the <name> node : dom.getElementsByTagName('name')[0].parentNode.nodeName
# where [0] denotes the number of the name field(s)
# where nodeName denotes the parentNode : ComicName = results, publisher = publisher, issues = issue
try:
2015-05-22 08:32:51 +00:00
names = len(dom.getElementsByTagName('name'))
n = 0
2015-05-22 08:32:51 +00:00
while (n < names):
if dom.getElementsByTagName('name')[n].parentNode.nodeName == 'results':
try:
comic['ComicName'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
2015-05-22 08:32:51 +00:00
comic['ComicName'] = comic['ComicName'].rstrip()
except:
logger.error('There was a problem retrieving the given data from ComicVine. Ensure that www.comicvine.com is accessible AND that you have provided your OWN ComicVine API key.')
return
elif dom.getElementsByTagName('name')[n].parentNode.nodeName == 'publisher':
try:
comic['ComicPublisher'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
except:
comic['ComicPublisher'] = "Unknown"
2015-05-22 08:32:51 +00:00
n += 1
except:
logger.warn('Something went wrong retrieving from ComicVine. Ensure your API is up-to-date and that comicvine is accessible')
return
try:
comic['ComicYear'] = dom.getElementsByTagName('start_year')[0].firstChild.wholeText
except:
comic['ComicYear'] = '0000'
IMP: Cleaned up interface for StoryArcs / Story Arc Details, IMP: Cleaned up interface for Reading list Management, IMP: Added better reading list management - new status (added, downloaded, read), IMP: Added sync option for use with another device for reading list transfer (ie. tablet) Android only, IMP: Autopopulate new weekly pull releases to reading list, IMP: 'Watch' option in weekly pull list now fully functional. Will watch CV for series' that do not have any series data yet as they are new starting series. Will auto-add once available, IMP: Auto-watch check is run after every refresh/recreate of the weeklypull list, IMP: Improved the Add a Series option where it will now look for issues that are 'new' or 'wanted' during add sequence, IMP: Main page interface now has coloured have/total bars to denote series completion, IMP: New scheduler / threading locks in place in an attempt to avoid database locks, FIX: Removed some erroneous locking that was going on when importing a directory was being run, IMP: Stat counter now present when post-processing multiple issues in sequence, FIX: for issue number error when post-processing and issue number was a non-alphanumeric, FIX: for metatagging: when original file was .cbz, would try to convert and fail, FIX: for issues that were negative and were preceeded by a # in the filename (filechecker), FIX: for publisher having non-alphanumeric character in name when attempting to determine publisher, FIX: if annuals enabled, would incorrectly show as being 'already in library' when viewing search results if results constained annuals, FIX:(#944) for incorrect nzbname being used when post-processing was being performed from an nzb client (experimental mainly), IMP: Turned off logging for ComicVine API counter, FIX: Added retry attempts when connecting to ComicVine in order to avoid errors when adding a series, IMP:(#963) Added ability to add snatched to filter when viewing Wanted issues on Wanted tab, FIX: When importing and then selecting a series to import via the select screen, will now flip back to the importresults and add the selected series in the background, IMP:(#952) Main page is now sorted in ascending order by Continuing/Ended status (and subbed by whether is Active/Paused).Custom sorting is still available, FIX: Dupecheck will now automatically assume existing 0-byte files are to be overwritten when performing post-processing, FIX: If publication date for series contained a '?' (usually with brand new series) will force to 'Present' to allow for pull-list comparisons to take place, FIX: Mylar will now disallow search results which have 'covers only' or 'variant' in the filename, IMP: Better nzbname generation/retrieval (will check inside nzb for possible names) to be used when post-processing, IMP: DB Update will now perform update to all active comics in descending order by Latest Date (instead of random order), FIX: Enforce the 5hr limit rule when running DB update (will only update series that haven't been updated in >5 hours), FIX: Annuals will now have/retain the proper status upon doing DB Update, FIX: Have totals will now be updated when doing a recheck files (sometimes wouldn't get updated depending on various states of status'), FIX:(#966) Added urllib2.URLError exeception trap when attempting to check Git for updates, IMP: Removed the individual sqlite calls for weeklypull, and brought them into line with using the db module (which will minimize concurrent access, which seemed to be causing db locks), IMP: Cleaned up some code and shuffled some functions so they are in more appropriate locations
2015-03-27 17:27:59 +00:00
try:
comic['ComicURL'] = dom.getElementsByTagName('site_detail_url')[trackcnt].firstChild.wholeText
except:
#this should never be an exception. If it is, it's probably due to CV timing out - so let's sleep for abit then retry.
logger.warn('Unable to retrieve URL for volume. This is usually due to a timeout to CV, or going over the API. Retrying again in 10s.')
time.sleep(10)
safechk +=1
GetComicInfo(comicid, dom, safechk)
2015-05-22 08:32:51 +00:00
desdeck = 0
#the description field actually holds the Volume# - so let's grab it
try:
descchunk = dom.getElementsByTagName('description')[0].firstChild.wholeText
comic_desc = drophtml(descchunk)
desdeck +=1
except:
comic_desc = 'None'
#sometimes the deck has volume labels
try:
deckchunk = dom.getElementsByTagName('deck')[0].firstChild.wholeText
comic_deck = deckchunk
desdeck +=1
except:
comic_deck = 'None'
#comic['ComicDescription'] = comic_desc
try:
comic['Aliases'] = dom.getElementsByTagName('aliases')[0].firstChild.wholeText
#logger.fdebug('Aliases: ' + str(aliases))
except:
comic['Aliases'] = 'None'
comic['ComicVersion'] = 'noversion'
#logger.info('comic_desc:' + comic_desc)
#logger.info('comic_deck:' + comic_deck)
#logger.info('desdeck: ' + str(desdeck))
while (desdeck > 0):
if desdeck == 1:
if comic_desc == 'None':
comicDes = comic_deck[:30]
else:
#extract the first 60 characters
comicDes = comic_desc[:60].replace('New 52', '')
elif desdeck == 2:
#extract the characters from the deck
comicDes = comic_deck[:30].replace('New 52', '')
else:
break
i = 0
while (i < 2):
if 'volume' in comicDes.lower():
#found volume - let's grab it.
v_find = comicDes.lower().find('volume')
#arbitrarily grab the next 10 chars (6 for volume + 1 for space + 3 for the actual vol #)
#increased to 10 to allow for text numbering (+5 max)
#sometimes it's volume 5 and ocassionally it's fifth volume.
if i == 0:
2015-05-22 08:32:51 +00:00
vfind = comicDes[v_find:v_find +15] #if it's volume 5 format
basenums = {'zero': '0', 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5', 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5'}
IMP: Ability to now specify search provider order (regardless of torrents or nzb) within the config.ini, IMP: (#667) Changed the db module to try to accomodate db locking errors and lowering the amount of actual write transactions that were committed along with a new scheduler system, IMP: Changed sabnzbd directory to post-processing, and included subdirs for sabnzbd & nzbget ComicRN scripts, IMP: NZBGet Post-Processing ComicRN.py script (updated for use with nzbget v11.0+)added & updated in post-processing/nzbget directory (thnx ministoat), FIX: If Issue Location was None, and status was Downloaded would cause error in GUI and break series, IMP: (#689) Minimum # of seeders added (will work with KAT), IMP: (#680) Added Boxcar 2 IO Notifications, IMP: Added PushBullet Notifications, IMP: Cleaned up some notification messages so it's not so cluttered, IMP: Added Clickable series link in History tab, IMP: Added Post-Processed as a status to History tab to show manually post-processed items, IMP: Removed log level dropdown from Logs page & added 'ThreadName' as a column, IMP: Added Force Check Availability & View Future Pull-list to Upcoming sub-tabs, IMP: Added '--safe' option to startup options which will redirect directly to Manage Comics screen incase things are broken, FIX: Added proper month conversions for manual post-processing when doing comparitive issue analysis for matches, FIX: (#613) Allow for negative issue numbers in post-processing when renaming and issue padding is enabled, FIX: File Permissions on post-processing would stop post-processing if couldn't change, now will just log the error and continue, IMP: Added Scheduler (from sickbeard) to allow for threadnaming and better scheduling, IMP: Filenames in the format of ' () ' will now get scanned in, IMP: During manual post-processing will now stop looking for matches upon a successful match, IMP: A Refresh/Weeklypull series check will now just scan in issue data, instead of series info,etc, IMP: Removed some legacy GCD code that is no longer in use, IMP: Exception/traceback handling will now be logged, FIX: Unable to grab torrents from KAT due to content-encoding detection failing, IMP: Added universal date-time conversion to allow for non-english based dates to be properly compared when checking search results against publication dates, FIX: Annuals will now get proper notification (prior was leaving out the word 'annual' from notification/logs), IMP: Improved future pull-list detection and increased retension (now ~5 months), IMP: Will now mark new issues as Wanted on a Refresh Series if autowant upcoming is enabled (was reverting to a status of None previously), IMP: Cannot change status to Downloaded if current status is Skipped or Wanted, FIX: (#704) UnSkipped will now work (X in options column on comic details page), IMP: future_check will check upcoming future issues (future pull-list) that have no series data yet (ie. #1's) and auto-add them to watchlist when the data is available and auto-want accordingly, IMP: (#706) Downloading issues to local machine (via comicdetails screen) with special characters in filename now will work, IMP: improved comparison checks during weekly pull list and improved speed abit since only refreshing issue data now instead of entire series, Other Referenced issues: (#670)(#690) and some others....
2014-05-25 18:32:11 +00:00
logger.fdebug('volume X format - ' + str(i) + ': ' + vfind)
else:
vfind = comicDes[:v_find] # if it's fifth volume format
2015-05-22 08:32:51 +00:00
basenums = {'zero': '0', 'first': '1', 'second': '2', 'third': '3', 'fourth': '4', 'fifth': '5', 'sixth': '6', 'seventh': '7', 'eighth': '8', 'nineth': '9', 'tenth': '10', 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5'}
IMP: Ability to now specify search provider order (regardless of torrents or nzb) within the config.ini, IMP: (#667) Changed the db module to try to accomodate db locking errors and lowering the amount of actual write transactions that were committed along with a new scheduler system, IMP: Changed sabnzbd directory to post-processing, and included subdirs for sabnzbd & nzbget ComicRN scripts, IMP: NZBGet Post-Processing ComicRN.py script (updated for use with nzbget v11.0+)added & updated in post-processing/nzbget directory (thnx ministoat), FIX: If Issue Location was None, and status was Downloaded would cause error in GUI and break series, IMP: (#689) Minimum # of seeders added (will work with KAT), IMP: (#680) Added Boxcar 2 IO Notifications, IMP: Added PushBullet Notifications, IMP: Cleaned up some notification messages so it's not so cluttered, IMP: Added Clickable series link in History tab, IMP: Added Post-Processed as a status to History tab to show manually post-processed items, IMP: Removed log level dropdown from Logs page & added 'ThreadName' as a column, IMP: Added Force Check Availability & View Future Pull-list to Upcoming sub-tabs, IMP: Added '--safe' option to startup options which will redirect directly to Manage Comics screen incase things are broken, FIX: Added proper month conversions for manual post-processing when doing comparitive issue analysis for matches, FIX: (#613) Allow for negative issue numbers in post-processing when renaming and issue padding is enabled, FIX: File Permissions on post-processing would stop post-processing if couldn't change, now will just log the error and continue, IMP: Added Scheduler (from sickbeard) to allow for threadnaming and better scheduling, IMP: Filenames in the format of ' () ' will now get scanned in, IMP: During manual post-processing will now stop looking for matches upon a successful match, IMP: A Refresh/Weeklypull series check will now just scan in issue data, instead of series info,etc, IMP: Removed some legacy GCD code that is no longer in use, IMP: Exception/traceback handling will now be logged, FIX: Unable to grab torrents from KAT due to content-encoding detection failing, IMP: Added universal date-time conversion to allow for non-english based dates to be properly compared when checking search results against publication dates, FIX: Annuals will now get proper notification (prior was leaving out the word 'annual' from notification/logs), IMP: Improved future pull-list detection and increased retension (now ~5 months), IMP: Will now mark new issues as Wanted on a Refresh Series if autowant upcoming is enabled (was reverting to a status of None previously), IMP: Cannot change status to Downloaded if current status is Skipped or Wanted, FIX: (#704) UnSkipped will now work (X in options column on comic details page), IMP: future_check will check upcoming future issues (future pull-list) that have no series data yet (ie. #1's) and auto-add them to watchlist when the data is available and auto-want accordingly, IMP: (#706) Downloading issues to local machine (via comicdetails screen) with special characters in filename now will work, IMP: improved comparison checks during weekly pull list and improved speed abit since only refreshing issue data now instead of entire series, Other Referenced issues: (#670)(#690) and some others....
2014-05-25 18:32:11 +00:00
logger.fdebug('X volume format - ' + str(i) + ': ' + vfind)
volconv = ''
for nums in basenums:
if nums in vfind.lower():
sconv = basenums[nums]
vfind = re.sub(nums, sconv, vfind.lower())
2015-05-22 08:32:51 +00:00
break
#logger.info('volconv: ' + str(volconv))
#now we attempt to find the character position after the word 'volume'
if i == 0:
volthis = vfind.lower().find('volume')
2015-05-22 08:32:51 +00:00
volthis = volthis + 6 # add on the actual word to the position so that we can grab the subsequent digit
vfind = vfind[volthis:volthis + 4] # grab the next 4 characters ;)
elif i == 1:
volthis = vfind.lower().find('volume')
2015-05-22 08:32:51 +00:00
vfind = vfind[volthis - 4:volthis] # grab the next 4 characters ;)
if '(' in vfind:
#bracket detected in versioning'
vfindit = re.findall('[^()]+', vfind)
vfind = vfindit[0]
vf = re.findall('[^<>]+', vfind)
ledigit = re.sub("[^0-9]", "", vf[0])
if ledigit != '':
comic['ComicVersion'] = ledigit
logger.fdebug("Volume information found! Adding to series record : volume " + comic['ComicVersion'])
break
2015-05-22 08:32:51 +00:00
i += 1
else:
2015-05-22 08:32:51 +00:00
i += 1
if comic['ComicVersion'] == 'noversion':
IMP: Ability to now specify search provider order (regardless of torrents or nzb) within the config.ini, IMP: (#667) Changed the db module to try to accomodate db locking errors and lowering the amount of actual write transactions that were committed along with a new scheduler system, IMP: Changed sabnzbd directory to post-processing, and included subdirs for sabnzbd & nzbget ComicRN scripts, IMP: NZBGet Post-Processing ComicRN.py script (updated for use with nzbget v11.0+)added & updated in post-processing/nzbget directory (thnx ministoat), FIX: If Issue Location was None, and status was Downloaded would cause error in GUI and break series, IMP: (#689) Minimum # of seeders added (will work with KAT), IMP: (#680) Added Boxcar 2 IO Notifications, IMP: Added PushBullet Notifications, IMP: Cleaned up some notification messages so it's not so cluttered, IMP: Added Clickable series link in History tab, IMP: Added Post-Processed as a status to History tab to show manually post-processed items, IMP: Removed log level dropdown from Logs page & added 'ThreadName' as a column, IMP: Added Force Check Availability & View Future Pull-list to Upcoming sub-tabs, IMP: Added '--safe' option to startup options which will redirect directly to Manage Comics screen incase things are broken, FIX: Added proper month conversions for manual post-processing when doing comparitive issue analysis for matches, FIX: (#613) Allow for negative issue numbers in post-processing when renaming and issue padding is enabled, FIX: File Permissions on post-processing would stop post-processing if couldn't change, now will just log the error and continue, IMP: Added Scheduler (from sickbeard) to allow for threadnaming and better scheduling, IMP: Filenames in the format of ' () ' will now get scanned in, IMP: During manual post-processing will now stop looking for matches upon a successful match, IMP: A Refresh/Weeklypull series check will now just scan in issue data, instead of series info,etc, IMP: Removed some legacy GCD code that is no longer in use, IMP: Exception/traceback handling will now be logged, FIX: Unable to grab torrents from KAT due to content-encoding detection failing, IMP: Added universal date-time conversion to allow for non-english based dates to be properly compared when checking search results against publication dates, FIX: Annuals will now get proper notification (prior was leaving out the word 'annual' from notification/logs), IMP: Improved future pull-list detection and increased retension (now ~5 months), IMP: Will now mark new issues as Wanted on a Refresh Series if autowant upcoming is enabled (was reverting to a status of None previously), IMP: Cannot change status to Downloaded if current status is Skipped or Wanted, FIX: (#704) UnSkipped will now work (X in options column on comic details page), IMP: future_check will check upcoming future issues (future pull-list) that have no series data yet (ie. #1's) and auto-add them to watchlist when the data is available and auto-want accordingly, IMP: (#706) Downloading issues to local machine (via comicdetails screen) with special characters in filename now will work, IMP: improved comparison checks during weekly pull list and improved speed abit since only refreshing issue data now instead of entire series, Other Referenced issues: (#670)(#690) and some others....
2014-05-25 18:32:11 +00:00
logger.fdebug('comic[ComicVersion]:' + str(comic['ComicVersion']))
2015-05-22 08:32:51 +00:00
desdeck -= 1
else:
break
2015-05-22 08:32:51 +00:00
if vari == "yes":
comic['ComicIssues'] = str(cntit)
else:
comic['ComicIssues'] = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
comic['ComicImage'] = dom.getElementsByTagName('super_url')[0].firstChild.wholeText
comic['ComicImageALT'] = dom.getElementsByTagName('small_url')[0].firstChild.wholeText
comic['FirstIssueID'] = dom.getElementsByTagName('id')[0].firstChild.wholeText
# print ("fistIss:" + str(comic['FirstIssueID']))
# comicchoice.append({
# 'ComicName': comic['ComicName'],
# 'ComicYear': comic['ComicYear'],
# 'Comicid': comicid,
# 'ComicURL': comic['ComicURL'],
# 'ComicIssues': comic['ComicIssues'],
# 'ComicImage': comic['ComicImage'],
# 'ComicVolume': ParseVol,
# 'ComicPublisher': comic['ComicPublisher']
# })
# comic['comicchoice'] = comicchoice
return comic
2015-05-22 08:32:51 +00:00
def GetIssuesInfo(comicid, dom, arcid=None):
subtracks = dom.getElementsByTagName('issue')
if not mylar.CV_ONLY:
cntiss = dom.getElementsByTagName('count_of_issues')[0].firstChild.wholeText
logger.fdebug("issues I've counted: " + str(len(subtracks)))
logger.fdebug("issues CV says it has: " + str(int(cntiss)))
if int(len(subtracks)) != int(cntiss):
logger.fdebug("CV's count is wrong, I counted different...going with my count for physicals" + str(len(subtracks)))
cntiss = len(subtracks) # assume count of issues is wrong, go with ACTUAL physical api count
cntiss = int(cntiss)
2015-05-22 08:32:51 +00:00
n = cntiss -1
else:
n = int(len(subtracks))
tempissue = {}
issuech = []
firstdate = '2099-00-00'
for subtrack in subtracks:
if not mylar.CV_ONLY:
if (dom.getElementsByTagName('name')[n].firstChild) is not None:
issue['Issue_Name'] = dom.getElementsByTagName('name')[n].firstChild.wholeText
else:
issue['Issue_Name'] = 'None'
issue['Issue_ID'] = dom.getElementsByTagName('id')[n].firstChild.wholeText
issue['Issue_Number'] = dom.getElementsByTagName('issue_number')[n].firstChild.wholeText
2015-05-22 08:32:51 +00:00
issuech.append({
'Issue_ID': issue['Issue_ID'],
'Issue_Number': issue['Issue_Number'],
'Issue_Name': issue['Issue_Name']
})
else:
try:
2015-05-22 08:32:51 +00:00
totnames = len(subtrack.getElementsByTagName('name'))
tot = 0
while (tot < totnames):
if subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'volume':
tempissue['ComicName'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
elif subtrack.getElementsByTagName('name')[tot].parentNode.nodeName == 'issue':
try:
tempissue['Issue_Name'] = subtrack.getElementsByTagName('name')[tot].firstChild.wholeText
except:
tempissue['Issue_Name'] = None
2015-05-22 08:32:51 +00:00
tot += 1
except:
tempissue['ComicName'] = 'None'
try:
2015-05-22 08:32:51 +00:00
totids = len(subtrack.getElementsByTagName('id'))
idt = 0
while (idt < totids):
if subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'volume':
tempissue['Comic_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
elif subtrack.getElementsByTagName('id')[idt].parentNode.nodeName == 'issue':
tempissue['Issue_ID'] = subtrack.getElementsByTagName('id')[idt].firstChild.wholeText
2015-05-22 08:32:51 +00:00
idt += 1
except:
tempissue['Issue_Name'] = 'None'
try:
tempissue['CoverDate'] = subtrack.getElementsByTagName('cover_date')[0].firstChild.wholeText
except:
tempissue['CoverDate'] = '0000-00-00'
try:
tempissue['StoreDate'] = subtrack.getElementsByTagName('store_date')[0].firstChild.wholeText
except:
tempissue['StoreDate'] = '0000-00-00'
FIX: undefined on filter box on startup, IMP: Added Meta-Tagging options on a series / issue basis on comic details screen, IMP: Issue Information is now available per issue and is extracted currently from the cbz file to display (if no cbz is present, the option isn't available), IMP: Failed Download handling is implemented and available in GUI - required to replace existing autoProcessComics.py and ComicRN.py scripts, IMP: Added ability to specify post-processing script instead of ComicRN.py, IMP: Added abilty to edit the issue date for a given issue by simply clicking on it - this will help to avoid dates that have incorrect or 0000-00-00, IMP: Story Arc searching is working (not adding yet), IMP: Added Archived/Ignored options to Upcoming/Wanted tab, IMP: Fixed some alignment and display issues on the Upcoming section, IMP: Added better directory handling for Updating Comic Location when it needs to get changed for all existing series (locmove in config.ini), IMP: Added better handling for unicode characters in series titles when searching / filechecking, IMP: When adding a new series with no data, Mylar would error out (now will add and just retain 0 issues), FIX: When year was fuzzied, search would still attempt to do a date-check comparison and say everything failed, IMP: Better handling of nzb names when retaining for post-processing comparisons, IMP: Future Upcoming now will use actual shipping date of issue if available in order to see if issue is available, FIX: If annuals were enabled, refreshing a series would put some issues into an Archived status because the actual counts would be off, IMP: When file checking, Alternate Naming would be searched last which resulted in matching incorrectly to the series title or other alternate naming for the given series, now will check the Alternate Naming first for a match, then drop back down to the series name itself otherwise, IMP: Improved Annual detection when integrated with a given series, IMP: Improved the checking of the future Upcoming list for issues marked as Wanted but not available yet and then auto-adding, IMP: Improved upon story arc checking for missing issues / searching for wanted, IMP: Enabling Annuals support now within Configuration GUI, bunch of other things....
2014-07-28 19:28:09 +00:00
try:
tempissue['Issue_Number'] = subtrack.getElementsByTagName('issue_number')[0].firstChild.wholeText
except:
logger.fdebug('No Issue Number available - Trade Paperbacks, Graphic Novels and Compendiums are not supported as of yet.')
if arcid is None:
issuech.append({
'Comic_ID': comicid,
'Issue_ID': tempissue['Issue_ID'],
'Issue_Number': tempissue['Issue_Number'],
'Issue_Date': tempissue['CoverDate'],
'Store_Date': tempissue['StoreDate'],
'Issue_Name': tempissue['Issue_Name']
})
else:
issuech.append({
'ArcID': arcid,
'ComicName': tempissue['ComicName'],
'ComicID': tempissue['Comic_ID'],
'IssueID': tempissue['Issue_ID'],
'Issue_Number': tempissue['Issue_Number'],
'Issue_Date': tempissue['CoverDate'],
'Store_Date': tempissue['StoreDate'],
'Issue_Name': tempissue['Issue_Name']
})
if tempissue['CoverDate'] < firstdate and tempissue['CoverDate'] != '0000-00-00':
firstdate = tempissue['CoverDate']
2015-05-22 08:32:51 +00:00
n-= 1
#issue['firstdate'] = firstdate
return issuech, firstdate
2015-05-22 08:32:51 +00:00
def GetFirstIssue(issueid, dom):
#if the Series Year doesn't exist, get the first issue and take the date from that
try:
first_year = dom.getElementsByTagName('cover_date')[0].firstChild.wholeText
except:
first_year = '0000'
return first_year
the_year = first_year[:4]
the_month = first_year[5:7]
the_date = the_year + '-' + the_month
return the_year
def GetSeriesYears(dom):
#used by the 'add a story arc' option to individually populate the Series Year for each series within the given arc.
#series year is required for alot of functionality.
series = dom.getElementsByTagName('volume')
tempseries = {}
serieslist = []
for dm in series:
try:
2015-05-22 08:32:51 +00:00
totids = len(dm.getElementsByTagName('id'))
idc = 0
while (idc < totids):
if dm.getElementsByTagName('id')[idc].parentNode.nodeName == 'volume':
tempseries['ComicID'] = dm.getElementsByTagName('id')[idc].firstChild.wholeText
idc+=1
except:
logger.warn('There was a problem retrieving a comicid for a series within the arc. This will have to manually corrected most likely.')
tempseries['ComicID'] = 'None'
tempseries['Series'] = 'None'
tempseries['Publisher'] = 'None'
try:
2015-05-22 08:32:51 +00:00
totnames = len(dm.getElementsByTagName('name'))
namesc = 0
while (namesc < totnames):
if dm.getElementsByTagName('name')[namesc].parentNode.nodeName == 'volume':
tempseries['Series'] = dm.getElementsByTagName('name')[namesc].firstChild.wholeText
elif dm.getElementsByTagName('name')[namesc].parentNode.nodeName == 'publisher':
tempseries['Publisher'] = dm.getElementsByTagName('name')[namesc].firstChild.wholeText
namesc+=1
except:
logger.warn('There was a problem retrieving a Series Name or Publisher for a series within the arc. This will have to manually corrected.')
try:
tempseries['SeriesYear'] = dm.getElementsByTagName('start_year')[0].firstChild.wholeText
except:
logger.warn('There was a problem retrieving the start year for a particular series within the story arc.')
tempseries['SeriesYear'] = '0000'
serieslist.append({"ComicID": tempseries['ComicID'],
"ComicName": tempseries['Series'],
"SeriesYear": tempseries['SeriesYear'],
"Publisher": tempseries['Publisher']})
return serieslist
2015-05-22 08:32:51 +00:00
def drophtml(html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html)
text_parts = soup.findAll(text=True)
#print ''.join(text_parts)
return ''.join(text_parts)