mylar/lib/comictaggerlib/comicvinetalker.py

771 lines
25 KiB
Python
Raw Normal View History

"""
A python class to manage communication with Comic Vine's REST API
"""
"""
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import json
from pprint import pprint
import urllib2, urllib
import math
import re
import time
import datetime
import ctversion
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
import os, sys
from bs4 import BeautifulSoup
try:
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, pyqtSignal, QObject, QByteArray
except ImportError:
# No Qt, so define a few dummy QObjects to help us compile
class QObject():
def __init__(self,*args):
pass
class pyqtSignal():
def __init__(self,*args):
pass
def emit(a,b,c):
pass
import utils
from settings import ComicTaggerSettings
from comicvinecacher import ComicVineCacher
from genericmetadata import GenericMetadata
from issuestring import IssueString
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
try:
lib_path = os.path.join(ComicTaggerSettings.baseDir(), '..')
sys.path.append(lib_path)
import requests
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
except ImportError:
print "Unable to use requests module. This is a CRITICAL error and ComicTagger cannot proceed. Exiting."
class CVTypeID:
Volume = "4050"
Issue = "4000"
class ComicVineTalkerException(Exception):
Unknown = -1
Network = -2
InvalidKey = 100
RateLimit = 107
def __init__(self, code=-1, desc=""):
self.desc = desc
self.code = code
def __str__(self):
if (self.code == ComicVineTalkerException.Unknown or
self.code == ComicVineTalkerException.Network):
return self.desc
else:
return "CV error #{0}: [{1}]. \n".format( self.code, self.desc )
class ComicVineTalker(QObject):
logo_url = "http://static.comicvine.com/bundles/comicvinesite/images/logo.png"
api_key = ""
@staticmethod
def getRateLimitMessage():
if ComicVineTalker.api_key == "":
return "Comic Vine rate limit exceeded. You should configue your own Comic Vine API key."
else:
return "Comic Vine rate limit exceeded. Please wait a bit."
def __init__(self):
QObject.__init__(self)
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
self.api_base_url = "http://comicvine.gamespot.com/api"
self.wait_for_rate_limit = False
# key that is registered to comictagger
default_api_key = '27431e6787042105bd3e47e169a624521f89f3a4'
if ComicVineTalker.api_key == "":
self.api_key = default_api_key
else:
self.api_key = ComicVineTalker.api_key
FIX:(#1222) Fixed usage with NZBHydra - will now be able to properly grab the nzb files, as well as proper logging and handling of Failed downloading, IMP: Duplicate Directory Dump option available in Configuration GUI. Any duplicates discovered during post-processing will be moved into this directory (if enabled) depending on the dupe constraints, FIX: Better handling of titles with '&' and '-' in the titles (as well as annuals) when adding series which should mean more accurate results when trying to add a series, FIX:(#1142) If files didn't have the pages field metadata within the comicinfo.xml file, would error out and either fail to display the issue, or fail to scan the issue during an import scan, FIX: When adding/refreshing a series, if the cover image from CV is unable to be retrieved or is not of an adequate size, fallback to a differnt quality image from CV, FIX: When refreshing/adding a series, annuals will only be checked against once (it was previously running through the entire annual check twice), FIX: During RSS scans/checks if a title in the results had an encoded & (&), would store it as the html which would never match up when doing actual comparison searches, IMP: Fixed usage of feedparser module in rss feeds so that it only uses the retrieved data and doesn't do the actual polling against the url (requests module now does), IMP: Added proper handling of error code 910 with dognzb (max api hits), so that once it hits the 910 error it will disable dognzb as a provider, FIX: When attempting to display issue details on a series detail page (the i icon in the issues table), if the metadata in the .cbz cannot be read or doesn't exist, will now display a graphical warning instead of a 500 error, IMP: Added fork/fork version/tag to comictagger user-agent/version, IMP: Removed configparser dependency from ComicTagger, FIX: When performing searches, improved volume label matching regardless of how the volume label is represented
2016-03-04 20:04:19 +00:00
self.cv_headers = {'User-Agent': 'ComicTagger ' + str(ctversion.version) + ' [' + ctversion.fork + ' / ' + ctversion.fork_tag + ']'}
self.log_func = None
def setLogFunc( self , log_func ):
self.log_func = log_func
def writeLog( self , text ):
if self.log_func is None:
#sys.stdout.write(text.encode( errors='replace') )
#sys.stdout.flush()
print >> sys.stderr, text
else:
self.log_func( text )
def parseDateStr( self, date_str):
day = None
month = None
year = None
if date_str is not None:
parts = date_str.split('-')
year = parts[0]
if len(parts) > 1:
month = parts[1]
if len(parts) > 2:
day = parts[2]
return day, month, year
def testKey( self, key ):
test_url = self.api_base_url + "/issue/1/?api_key=" + key + "&format=json&field_list=name"
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
r = requests.get(test_url, headers=self.cv_headers)
#resp = urllib2.urlopen( test_url )
#content = resp.read()
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
cv_response = r.json() #json.loads( r.content )
# Bogus request, but if the key is wrong, you get error 100: "Invalid API Key"
return cv_response[ 'status_code' ] != 100
"""
Get the contect from the CV server. If we're in "wait mode" and status code is a rate limit error
sleep for a bit and retry.
"""
def getCVContent(self, url):
total_time_waited = 0
limit_wait_time = 1
counter = 0
wait_times = [1,2,3,4]
while True:
time.sleep(2) #imposed by new CV rate limiting - 1 api request / second (maximum) - safetyset to 2s intervals
#self.writeLog( "Self imposed rate-limiting of 1 api request / second\n" )
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
cv_response = self.getUrlContent(url)
#cv_response = json.loads(content)
if self.wait_for_rate_limit and cv_response[ 'status_code' ] == ComicVineTalkerException.RateLimit:
self.writeLog( "Rate limit encountered. Waiting for {0} minutes\n".format(limit_wait_time))
time.sleep(limit_wait_time * 60)
total_time_waited += limit_wait_time
limit_wait_time = wait_times[counter]
if counter < 3:
counter += 1
# don't wait much more than 20 minutes
if total_time_waited < 20:
continue
if cv_response[ 'status_code' ] != 1:
self.writeLog( "Comic Vine query failed with error #{0}: [{1}]. \n".format( cv_response[ 'status_code' ], cv_response[ 'error' ] ))
raise ComicVineTalkerException(cv_response[ 'status_code' ], cv_response[ 'error' ] )
else:
# it's all good
break
return cv_response
def getUrlContent( self, url ):
# connect to server:
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
#print "ATB---", url
for tries in range(3):
try:
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
r = requests.get(url, headers=self.cv_headers)
#resp = urllib2.urlopen( url )
return r.json()
except urllib2.HTTPError as e:
if e.getcode() == 500:
self.writeLog( "Try #{0}: ".format(tries+1) )
time.sleep(1)
self.writeLog( str(e) + "\n" )
if e.getcode() != 500:
break
except Exception as e:
self.writeLog( str(e) + "\n" )
raise ComicVineTalkerException(ComicVineTalkerException.Network, "Network Error!")
raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server")
def searchForSeries( self, series_name , callback=None, refresh_cache=False ):
# remove cruft from the search string
series_name = utils.removearticles( series_name ).lower().strip()
# before we search online, look in our cache, since we might have
# done this same search recently
cvc = ComicVineCacher( )
if not refresh_cache:
cached_search_results = cvc.get_search_results( series_name )
if len (cached_search_results) > 0:
return cached_search_results
original_series_name = series_name
# We need to make the series name into an "AND"ed query list
query_word_list = series_name.split()
and_list = ['AND'] * (len(query_word_list)-1)
and_list.append('')
# zipper up the two lists
query_list = zip(query_word_list, and_list)
# flatten the list
query_list = [ item for sublist in query_list for item in sublist]
# convert back to a string
query_string = " ".join( query_list ).strip()
#print "Query string = ", query_string
query_string = urllib.quote_plus(query_string.encode("utf-8"))
search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + query_string + "&field_list=name,id,start_year,publisher,image,description,count_of_issues"
cv_response = self.getCVContent(search_url + "&page=1")
search_results = list()
# see http://api.comicvine.com/documentation/#handling_responses
limit = cv_response['limit']
current_result_count = cv_response['number_of_page_results']
total_result_count = cv_response['number_of_total_results']
if callback is None:
self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
search_results.extend( cv_response['results'])
page = 1
if callback is not None:
callback( current_result_count, total_result_count )
# see if we need to keep asking for more pages...
while ( current_result_count < total_result_count ):
if callback is None:
self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count))
page += 1
cv_response = self.getCVContent(search_url + "&page="+str(page))
search_results.extend( cv_response['results'])
current_result_count += cv_response['number_of_page_results']
if callback is not None:
callback( current_result_count, total_result_count )
#for record in search_results:
# #print( u"{0}: {1} ({2})".format(record['id'], record['name'] , record['start_year'] ) )
# #print record
# #record['count_of_issues'] = record['count_of_isssues']
#print u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year'] )
# cache these search results
cvc.add_search_results( original_series_name, search_results )
return search_results
def fetchVolumeData( self, series_id ):
# before we search online, look in our cache, since we might already
# have this info
cvc = ComicVineCacher( )
cached_volume_result = cvc.get_volume_info( series_id )
if cached_volume_result is not None:
return cached_volume_result
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id) + "/?api_key=" + self.api_key + "&field_list=name,id,start_year,publisher,count_of_issues&format=json"
cv_response = self.getCVContent(volume_url)
volume_results = cv_response['results']
cvc.add_volume_info( volume_results )
return volume_results
def fetchIssuesByVolume( self, series_id ):
# before we search online, look in our cache, since we might already
# have this info
cvc = ComicVineCacher( )
cached_volume_issues_result = cvc.get_volume_issues_info( series_id )
if cached_volume_issues_result is not None:
return cached_volume_issues_result
#---------------------------------
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + str(series_id) + "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
#------------------------------------
limit = cv_response['limit']
current_result_count = cv_response['number_of_page_results']
total_result_count = cv_response['number_of_total_results']
#print "ATB total_result_count", total_result_count
#print "ATB Found {0} of {1} results".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])
volume_issues_result = cv_response['results']
page = 1
offset = 0
# see if we need to keep asking for more pages...
while ( current_result_count < total_result_count ):
#print "ATB getting another page of issue results {0} of {1}...".format( current_result_count, total_result_count)
page += 1
offset += cv_response['number_of_page_results']
#print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(issues_url + "&offset="+str(offset))
volume_issues_result.extend( cv_response['results'])
current_result_count += cv_response['number_of_page_results']
self.repairUrls( volume_issues_result )
cvc.add_volume_issues_info( series_id, volume_issues_result )
return volume_issues_result
def fetchIssuesByVolumeIssueNumAndYear( self, volume_id_list, issue_number, year ):
volume_filter = "volume:"
for vid in volume_id_list:
volume_filter += str(vid) + "|"
year_filter = ""
if year is not None and str(year).isdigit():
year_filter = ",cover_date:{0}-1-1|{1}-1-1".format(year, int(year)+1)
issue_number = urllib.quote_plus(unicode(issue_number).encode("utf-8"))
filter = "&filter=" + volume_filter + year_filter + ",issue_number:" + issue_number
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
#------------------------------------
limit = cv_response['limit']
current_result_count = cv_response['number_of_page_results']
total_result_count = cv_response['number_of_total_results']
#print "ATB total_result_count", total_result_count
#print "ATB Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])
filtered_issues_result = cv_response['results']
page = 1
offset = 0
# see if we need to keep asking for more pages...
while ( current_result_count < total_result_count ):
#print "ATB getting another page of issue results {0} of {1}...\n".format( current_result_count, total_result_count)
page += 1
offset += cv_response['number_of_page_results']
#print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(issues_url + "&offset="+str(offset))
filtered_issues_result.extend( cv_response['results'])
current_result_count += cv_response['number_of_page_results']
self.repairUrls( filtered_issues_result )
return filtered_issues_result
def fetchIssueData( self, series_id, issue_number, settings ):
volume_results = self.fetchVolumeData( series_id )
issues_list_results = self.fetchIssuesByVolume( series_id )
found = False
for record in issues_list_results:
if IssueString(issue_number).asString() is None:
issue_number = 1
if IssueString(record['issue_number']).asString().lower() == IssueString(issue_number).asString().lower():
found = True
break
if (found):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id']) + "/?api_key=" + self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_results = cv_response['results']
else:
return None
# now, map the comicvine data to generic metadata
return self.mapCVDataToMetadata( volume_results, issue_results, settings )
def fetchIssueDataByIssueID( self, issue_id, settings ):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_results = cv_response['results']
volume_results = self.fetchVolumeData( issue_results['volume']['id'] )
# now, map the comicvine data to generic metadata
md = self.mapCVDataToMetadata( volume_results, issue_results, settings )
md.isEmpty = False
return md
def mapCVDataToMetadata(self, volume_results, issue_results, settings ):
# now, map the comicvine data to generic metadata
metadata = GenericMetadata()
metadata.series = issue_results['volume']['name']
num_s = IssueString(issue_results['issue_number']).asString()
metadata.issue = num_s
metadata.title = issue_results['name']
metadata.publisher = volume_results['publisher']['name']
metadata.day, metadata.month, metadata.year = self.parseDateStr( issue_results['cover_date'] )
#metadata.issueCount = volume_results['count_of_issues']
metadata.comments = self.cleanup_html(issue_results['description'], settings.remove_html_tables)
if settings.use_series_start_as_volume:
metadata.volume = volume_results['start_year']
FIX:(#1222) Fixed usage with NZBHydra - will now be able to properly grab the nzb files, as well as proper logging and handling of Failed downloading, IMP: Duplicate Directory Dump option available in Configuration GUI. Any duplicates discovered during post-processing will be moved into this directory (if enabled) depending on the dupe constraints, FIX: Better handling of titles with '&' and '-' in the titles (as well as annuals) when adding series which should mean more accurate results when trying to add a series, FIX:(#1142) If files didn't have the pages field metadata within the comicinfo.xml file, would error out and either fail to display the issue, or fail to scan the issue during an import scan, FIX: When adding/refreshing a series, if the cover image from CV is unable to be retrieved or is not of an adequate size, fallback to a differnt quality image from CV, FIX: When refreshing/adding a series, annuals will only be checked against once (it was previously running through the entire annual check twice), FIX: During RSS scans/checks if a title in the results had an encoded & (&amp;), would store it as the html which would never match up when doing actual comparison searches, IMP: Fixed usage of feedparser module in rss feeds so that it only uses the retrieved data and doesn't do the actual polling against the url (requests module now does), IMP: Added proper handling of error code 910 with dognzb (max api hits), so that once it hits the 910 error it will disable dognzb as a provider, FIX: When attempting to display issue details on a series detail page (the i icon in the issues table), if the metadata in the .cbz cannot be read or doesn't exist, will now display a graphical warning instead of a 500 error, IMP: Added fork/fork version/tag to comictagger user-agent/version, IMP: Removed configparser dependency from ComicTagger, FIX: When performing searches, improved volume label matching regardless of how the volume label is represented
2016-03-04 20:04:19 +00:00
metadata.notes = "Tagged with the {1} fork of ComicTagger {0} using info from Comic Vine on {3}. [Issue ID {4}]".format(
ctversion.version,
FIX:(#1222) Fixed usage with NZBHydra - will now be able to properly grab the nzb files, as well as proper logging and handling of Failed downloading, IMP: Duplicate Directory Dump option available in Configuration GUI. Any duplicates discovered during post-processing will be moved into this directory (if enabled) depending on the dupe constraints, FIX: Better handling of titles with '&' and '-' in the titles (as well as annuals) when adding series which should mean more accurate results when trying to add a series, FIX:(#1142) If files didn't have the pages field metadata within the comicinfo.xml file, would error out and either fail to display the issue, or fail to scan the issue during an import scan, FIX: When adding/refreshing a series, if the cover image from CV is unable to be retrieved or is not of an adequate size, fallback to a differnt quality image from CV, FIX: When refreshing/adding a series, annuals will only be checked against once (it was previously running through the entire annual check twice), FIX: During RSS scans/checks if a title in the results had an encoded & (&amp;), would store it as the html which would never match up when doing actual comparison searches, IMP: Fixed usage of feedparser module in rss feeds so that it only uses the retrieved data and doesn't do the actual polling against the url (requests module now does), IMP: Added proper handling of error code 910 with dognzb (max api hits), so that once it hits the 910 error it will disable dognzb as a provider, FIX: When attempting to display issue details on a series detail page (the i icon in the issues table), if the metadata in the .cbz cannot be read or doesn't exist, will now display a graphical warning instead of a 500 error, IMP: Added fork/fork version/tag to comictagger user-agent/version, IMP: Removed configparser dependency from ComicTagger, FIX: When performing searches, improved volume label matching regardless of how the volume label is represented
2016-03-04 20:04:19 +00:00
ctversion.fork,
ctversion.fork_tag,
datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
issue_results['id'])
#metadata.notes += issue_results['site_detail_url']
metadata.webLink = issue_results['site_detail_url']
person_credits = issue_results['person_credits']
for person in person_credits:
if person.has_key('role'):
roles = person['role'].split(',')
for role in roles:
# can we determine 'primary' from CV??
metadata.addCredit( person['name'], role.title().strip(), False )
character_credits = issue_results['character_credits']
character_list = list()
for character in character_credits:
character_list.append( character['name'] )
metadata.characters = utils.listToString( character_list )
team_credits = issue_results['team_credits']
team_list = list()
for team in team_credits:
team_list.append( team['name'] )
metadata.teams = utils.listToString( team_list )
location_credits = issue_results['location_credits']
location_list = list()
for location in location_credits:
location_list.append( location['name'] )
metadata.locations = utils.listToString( location_list )
story_arc_credits = issue_results['story_arc_credits']
arc_list = []
for arc in story_arc_credits:
arc_list.append(arc['name'])
if len(arc_list) > 0:
metadata.storyArc = utils.listToString(arc_list)
return metadata
def cleanup_html( self, string, remove_html_tables):
"""
converter = html2text.HTML2Text()
#converter.emphasis_mark = '*'
#converter.ignore_links = True
converter.body_width = 0
print html2text.html2text(string)
return string
#return converter.handle(string)
"""
if string is None:
return ""
# find any tables
soup = BeautifulSoup(string)
tables = soup.findAll('table')
# remove all newlines first
string = string.replace("\n", "")
#put in our own
string = string.replace("<br>", "\n")
string = string.replace("</p>", "\n\n")
string = string.replace("<h4>", "*")
string = string.replace("</h4>", "*\n")
#remove the tables
p = re.compile(r'<table[^<]*?>.*?<\/table>')
if remove_html_tables:
string = p.sub('',string)
string = string.replace("*List of covers and their creators:*","")
else:
string = p.sub('{}',string)
# now strip all other tags
p = re.compile(r'<[^<]*?>')
newstring = p.sub('',string)
newstring = newstring.replace('&nbsp;',' ')
newstring = newstring.replace('&amp;','&')
newstring = newstring.strip()
if not remove_html_tables:
# now rebuild the tables into text from BSoup
try:
table_strings = []
for table in tables:
rows = []
hdrs = []
col_widths = []
for hdr in table.findAll('th'):
item = hdr.string.strip()
hdrs.append(item)
col_widths.append(len(item))
rows.append(hdrs)
for row in table.findAll('tr'):
cols = []
col = row.findAll('td')
i = 0
for c in col:
item = c.string.strip()
cols.append(item)
if len(item) > col_widths[i]:
col_widths[i] = len(item)
i += 1
if len(cols) != 0:
rows.append(cols)
# now we have the data, make it into text
fmtstr =""
for w in col_widths:
fmtstr += " {{:{}}}|".format(w+1)
width = sum(col_widths) + len(col_widths)*2
table_text = ""
counter = 0
for row in rows:
table_text += fmtstr.format(*row) + "\n"
if counter == 0 and len(hdrs)!= 0:
table_text += "-" * width + "\n"
counter += 1
table_strings.append(table_text)
newstring = newstring.format(*table_strings)
except:
# we caught an error rebuilding the table.
# just bail and remove the formatting
print "table parse error"
newstring.replace("{}", "")
return newstring
def fetchIssueDate( self, issue_id ):
details = self.fetchIssueSelectDetails( issue_id )
day, month, year = self.parseDateStr( details['cover_date'] )
return month, year
def fetchIssueCoverURLs( self, issue_id ):
details = self.fetchIssueSelectDetails( issue_id )
return details['image_url'], details['thumb_image_url']
def fetchIssuePageURL( self, issue_id ):
details = self.fetchIssueSelectDetails( issue_id )
return details['site_detail_url']
def fetchIssueSelectDetails( self, issue_id ):
#cached_image_url,cached_thumb_url,cached_month,cached_year = self.fetchCachedIssueSelectDetails( issue_id )
cached_details = self.fetchCachedIssueSelectDetails( issue_id )
if cached_details['image_url'] is not None:
return cached_details
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,cover_date,site_detail_url"
details = dict()
details['image_url'] = None
details['thumb_image_url'] = None
details['cover_date'] = None
details['site_detail_url'] = None
cv_response = self.getCVContent(issue_url)
details['image_url'] = cv_response['results']['image']['super_url']
details['thumb_image_url'] = cv_response['results']['image']['thumb_url']
details['cover_date'] = cv_response['results']['cover_date']
details['site_detail_url'] = cv_response['results']['site_detail_url']
if details['image_url'] is not None:
self.cacheIssueSelectDetails( issue_id,
details['image_url'],
details['thumb_image_url'],
details['cover_date'],
details['site_detail_url'] )
#print details['site_detail_url']
return details
def fetchCachedIssueSelectDetails( self, issue_id ):
# before we search online, look in our cache, since we might already
# have this info
cvc = ComicVineCacher( )
return cvc.get_issue_select_details( issue_id )
def cacheIssueSelectDetails( self, issue_id, image_url, thumb_url, cover_date, page_url ):
cvc = ComicVineCacher( )
cvc.add_issue_select_details( issue_id, image_url, thumb_url, cover_date, page_url )
def fetchAlternateCoverURLs(self, issue_id, issue_page_url):
url_list = self.fetchCachedAlternateCoverURLs( issue_id )
if url_list is not None:
return url_list
# scrape the CV issue page URL to get the alternate cover URLs
resp = urllib2.urlopen( issue_page_url )
content = resp.read()
FIX: Added user-agent to all CV queries, and switched all queries to utilize the requests module, FIX: Added User-Agent to ComicTagger and switched to requests module for all CV queries, FIX: Removed libraryscan option from GUI as no longer used, FIX: Ability to sort 'Have' column on main index page properly now (by % complete), FIX: Fixed some display issues with the series detail page and displaying, FIX: Metatagging should now be working again for story-arc, and one-off post-processing, FIX: Fixed post-processing problem with decimal places if number was padded, FIX: Updated CV-URL to point to new CV api location, FIX: Fixed problem when file-checking and series contained numbers and/or decimals, would take the modified series name instead of the actual series name resulting in some missed matches, IMP: Added another keyword to ignore when checking for annuals against a particular series, FIX:(#1210) When importing files, if issues were meta-tagged with CVS would not scan the metadata properly, FIX: Fixed checkboxes on annuals table so drop-down will work again (just doesn't refresh the screen afterwards atm), FIX:(#1182) Notifications not respecting http base, IMP: Added option to specify SSL Verification on/off per provider when searching/downloading, FIX: Possible fix for cache_dir not being used on restarts of mylar, FIX: configparser check would only check on linux systems, and if not present would cause errors during post-processing on non nix-based systems, FIX:(#1181) Manual renaming an entire series would fail if lowercase_filenames was enabled as an option
2016-02-25 16:40:09 +00:00
alt_cover_url_list = self.parseOutAltCoverUrls(content)
# cache this alt cover URL list
self.cacheAlternateCoverURLs( issue_id, alt_cover_url_list )
return alt_cover_url_list
def parseOutAltCoverUrls( self, page_html ):
soup = BeautifulSoup( page_html )
alt_cover_url_list = []
# Using knowledge of the layout of the ComicVine issue page here:
# look for the divs that are in the classes 'content-pod' and 'alt-cover'
div_list = soup.find_all( 'div')
covers_found = 0
for d in div_list:
if d.has_key('class'):
c = d['class']
if 'imgboxart' in c and 'issue-cover' in c:
covers_found += 1
if covers_found != 1:
alt_cover_url_list.append( d.img['src'] )
return alt_cover_url_list
def fetchCachedAlternateCoverURLs( self, issue_id ):
# before we search online, look in our cache, since we might already
# have this info
cvc = ComicVineCacher( )
url_list = cvc.get_alt_covers( issue_id )
if url_list is not None:
return url_list
else:
return None
def cacheAlternateCoverURLs( self, issue_id, url_list ):
cvc = ComicVineCacher( )
cvc.add_alt_covers( issue_id, url_list )
#---------------------------------------------------------------------------
urlFetchComplete = pyqtSignal( str , str, int)
def asyncFetchIssueCoverURLs( self, issue_id ):
self.issue_id = issue_id
details = self.fetchCachedIssueSelectDetails( issue_id )
if details['image_url'] is not None:
self.urlFetchComplete.emit( details['image_url'],details['thumb_image_url'], self.issue_id )
return
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,cover_date,site_detail_url"
self.nam = QNetworkAccessManager()
self.nam.finished.connect( self.asyncFetchIssueCoverURLComplete )
self.nam.get(QNetworkRequest(QUrl(issue_url)))
def asyncFetchIssueCoverURLComplete( self, reply ):
# read in the response
data = reply.readAll()
try:
cv_response = json.loads(str(data))
except:
print >> sys.stderr, "Comic Vine query failed to get JSON data"
print >> sys.stderr, str(data)
return
if cv_response[ 'status_code' ] != 1:
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
return
image_url = cv_response['results']['image']['super_url']
thumb_url = cv_response['results']['image']['thumb_url']
cover_date = cv_response['results']['cover_date']
page_url = cv_response['results']['site_detail_url']
self.cacheIssueSelectDetails( self.issue_id, image_url, thumb_url, cover_date, page_url )
self.urlFetchComplete.emit( image_url, thumb_url, self.issue_id )
altUrlListFetchComplete = pyqtSignal( list, int)
def asyncFetchAlternateCoverURLs( self, issue_id, issue_page_url ):
# This async version requires the issue page url to be provided!
self.issue_id = issue_id
url_list = self.fetchCachedAlternateCoverURLs( issue_id )
if url_list is not None:
self.altUrlListFetchComplete.emit( url_list, int(self.issue_id) )
return
self.nam = QNetworkAccessManager()
self.nam.finished.connect( self.asyncFetchAlternateCoverURLsComplete )
self.nam.get(QNetworkRequest(QUrl(str(issue_page_url))))
def asyncFetchAlternateCoverURLsComplete( self, reply ):
# read in the response
html = str(reply.readAll())
alt_cover_url_list = self.parseOutAltCoverUrls( html )
# cache this alt cover URL list
self.cacheAlternateCoverURLs( self.issue_id, alt_cover_url_list )
self.altUrlListFetchComplete.emit( alt_cover_url_list, int(self.issue_id) )
def repairUrls(self, issue_list):
#make sure there are URLs for the image fields
for issue in issue_list:
if issue['image'] is None:
issue['image'] = dict()
issue['image']['super_url'] = ComicVineTalker.logo_url
issue['image']['thumb_url'] = ComicVineTalker.logo_url