1
0
Fork 0
mirror of https://github.com/evilhero/mylar synced 2024-12-26 09:36:53 +00:00

attempted fix for : and & in Comic Titles when adding

This commit is contained in:
evilhero 2012-09-13 11:43:21 -04:00
commit b290f770af

View file

@ -106,7 +106,7 @@ def GCDScraper(ComicName, ComicYear, Total, ComicID):
cnt = int(cnt1 + cnt2) cnt = int(cnt1 + cnt2)
print (str(cnt) + " results") #print (str(cnt) + " results")
global resultPublished global resultPublished
@ -127,10 +127,14 @@ def GCDScraper(ComicName, ComicYear, Total, ComicID):
resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd] resultp = soup.findAll("tr", {"class" : "listing_odd"})[n_odd]
rtp = resultp('a')[1] rtp = resultp('a')[1]
resultName.append(helpers.cleanName(rtp.findNext(text=True))) resultName.append(helpers.cleanName(rtp.findNext(text=True)))
print ( "Comic Name: " + str(resultName[n]) ) #print ( "Comic Name: " + str(resultName[n]) )
fip = resultp('a',href=True)[1] fip = resultp('a',href=True)[1]
resultID.append(fip['href']) resultID.append(fip['href'])
print ( "ID: " + str(resultID[n]) ) #print ( "ID: " + str(resultID[n]) )
#print ( "Comic Name: " + str(resultName[n]) )
fip = resultp('a',href=True)[1]
resultID.append(fip['href'])
#print ( "ID: " + str(resultID[n]) )
subtxt3 = resultp('td')[3] subtxt3 = resultp('td')[3]
resultYear.append(subtxt3.findNext(text=True)) resultYear.append(subtxt3.findNext(text=True))
@ -141,17 +145,18 @@ def GCDScraper(ComicName, ComicYear, Total, ComicID):
resiss = int(resiss) resiss = int(resiss)
resultIssues[n] = resultIssues[n].replace('','')[:resiss] resultIssues[n] = resultIssues[n].replace('','')[:resiss]
resultIssues[n] = resultIssues[n].replace(' ','') resultIssues[n] = resultIssues[n].replace(' ','')
print ( "Year: " + str(resultYear[n]) ) #print ( "Year: " + str(resultYear[n]) )
print ( "Issues: " + str(resultIssues[n]) ) #print ( "Issues: " + str(resultIssues[n]) )
if resultName[n].lower() == str(ComicName).lower(): if resultName[n].lower() == str(ComicName).lower():
print ("n:" + str(n) + "...matched by name to Mylar!") #print ("n:" + str(n) + "...matched by name to Mylar!")
#this has been seen in a few instances already, so trying to adjust. #this has been seen in a few instances already, so trying to adjust.
#when the series year is 2011, in gcd it might be 2012 due to publication #when the series year is 2011, in gcd it might be 2012 due to publication
#dates overlapping between Dec/11 and Jan/12. Let's accept a match with a #dates overlapping between Dec/11 and Jan/12. Let's accept a match with a
#1 year grace space, and then pull in the first issue to see the actual pub #1 year grace space, and then pull in the first issue to see the actual pub
# date and if coincides with the other date..match it. # date and if coincides with the other date..match it.
if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1): if resultYear[n] == ComicYear or resultYear[n] == str(int(ComicYear)+1):
print ("n:" + str(n) + "...matched by year to Mylar!") #print ("n:" + str(n) + "...matched by year to Mylar!")
#print ( "Year: " + str(resultYear[n]) )
#Occasionally there are discrepancies in comic count between #Occasionally there are discrepancies in comic count between
#GCD and CV. 99% it's CV not updating to the newest issue as fast #GCD and CV. 99% it's CV not updating to the newest issue as fast
#as GCD does. Therefore, let's increase the CV count by 1 to get it #as GCD does. Therefore, let's increase the CV count by 1 to get it
@ -162,8 +167,8 @@ def GCDScraper(ComicName, ComicYear, Total, ComicID):
issvariation = "yes" issvariation = "yes"
else: else:
issvariation = "no" issvariation = "no"
print ("n:" + str(n) + "...matched by issues to Mylar! - GCDVariation: " + str(issvariation)) #print ("n:" + str(n) + "...matched by issues to Mylar!")
print ("complete match!...proceeding") #print ("complete match!...proceeding")
resultURL = str(resultID[n]) resultURL = str(resultID[n])
rptxt = resultp('td')[6] rptxt = resultp('td')[6]
resultPublished = rptxt.findNext(text=True) resultPublished = rptxt.findNext(text=True)
@ -176,16 +181,13 @@ def GCDScraper(ComicName, ComicYear, Total, ComicID):
# has the wrong title and won't match 100%... # has the wrong title and won't match 100%...
# (ie. The Flash-2011 on comicvine is Flash-2011 on gcd) # (ie. The Flash-2011 on comicvine is Flash-2011 on gcd)
if resultURL is None: if resultURL is None:
#print ("comicnm:" + str(ComicName))
if ComicName.startswith('The '): if ComicName.startswith('The '):
#print ("No match found - detected The in title...performing deeper analysis")
ComicName = ComicName[4:] ComicName = ComicName[4:]
return GCDScraper(ComicName, ComicYear, Total, ComicID) return GCDScraper(ComicName, ComicYear, Total, ComicID)
if 'and' in ComicName.lower(): if 'and' in ComicName.lower():
ComicName = ComicName.replace('and', '&') ComicName = ComicName.replace('and', '&')
return GCDScraper(ComicName, ComicYear, Total, ComicID) return GCDScraper(ComicName, ComicYear, Total, ComicID)
else: else:
#print ("no match found...cannot proceed.")
return 'No Match' return 'No Match'
gcdinfo = {} gcdinfo = {}
gcdchoice = [] gcdchoice = []