mirror of https://github.com/evilhero/mylar
FIX: identical issue #'s with differing dates would appear as only one date
This commit is contained in:
parent
5d23ccaff4
commit
cf4e605876
135
mylar/parseit.py
135
mylar/parseit.py
|
@ -338,18 +338,92 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
|
|||
# else:
|
||||
# ParseIssue = ParseIssue + isschk_decval
|
||||
|
||||
datematch="false"
|
||||
|
||||
if not any(d.get('GCDIssue', None) == str(ParseIssue) for d in gcdchoice):
|
||||
logger.fdebug("preparing to add issue to db : " + str(ParseIssue))
|
||||
else:
|
||||
logger.fdebug("2 identical issue #'s have been found...determining if it's intentional")
|
||||
#get current issue & publication date.
|
||||
logger.fdebug("Issue #:" + str(ParseIssue))
|
||||
logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate']))
|
||||
#get conflicting issue from tuple
|
||||
for d in gcdchoice:
|
||||
if str(d['GCDIssue']) == str(ParseIssue):
|
||||
logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) )
|
||||
if str(d['GCDDate']) == str(gcdinfo['ComicDate']):
|
||||
logger.fdebug("Issue #'s and dates match...skipping.")
|
||||
datematch="true"
|
||||
else:
|
||||
logger.fdebug("Issue#'s match but different publication dates, not skipping.")
|
||||
datematch="false"
|
||||
|
||||
if datematch == "false":
|
||||
gcdinfo['ComicIssue'] = ParseIssue
|
||||
#--- let's use pubdate.
|
||||
#try publicationd date first
|
||||
ParseDate = GettheDate(parsed,PrevYRMO)
|
||||
|
||||
ParseDate = ParseDate.replace(' ','')
|
||||
PrevYRMO = ParseDate
|
||||
gcdinfo['ComicDate'] = ParseDate
|
||||
#^^ will retrieve date #
|
||||
#logger.fdebug("adding: " + str(gcdinfo['ComicIssue']))
|
||||
if ComicID[:1] == "G":
|
||||
gcdchoice.append({
|
||||
'GCDid': ComicID,
|
||||
'IssueID': resultID,
|
||||
'GCDIssue': gcdinfo['ComicIssue'],
|
||||
'GCDDate': gcdinfo['ComicDate']
|
||||
})
|
||||
gcount+=1
|
||||
else:
|
||||
gcdchoice.append({
|
||||
'GCDid': ComicID,
|
||||
'GCDIssue': gcdinfo['ComicIssue'],
|
||||
'GCDDate': gcdinfo['ComicDate']
|
||||
})
|
||||
|
||||
gcdinfo['gcdchoice'] = gcdchoice
|
||||
|
||||
# else:
|
||||
# logger.fdebug("2 identical issue #'s have been found...determining if it's intentional")
|
||||
# #get current issue & publication date.
|
||||
# logger.fdebug("Issue #:" + str(ParseIssue))
|
||||
# logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate']))
|
||||
# #get conflicting issue from tuple
|
||||
# for d in gcdchoice:
|
||||
# if str(d['GCDIssue']) == str(gcdinfo['ComicIssue']):
|
||||
# logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) )
|
||||
# if str(d['GCDDate']) == str(gcdinfo['ComicDate']):
|
||||
# logger.fdebug("Issue #'s and dates match...skipping.")
|
||||
# else:
|
||||
# logger.fdebug("Issue#'s match but different publication dates, not skipping.")
|
||||
|
||||
altcount = 0
|
||||
n+=1
|
||||
i+=1
|
||||
gcdinfo['gcdvariation'] = issvariation
|
||||
if ComicID[:1] == "G":
|
||||
gcdinfo['totalissues'] = gcount
|
||||
else:
|
||||
gcdinfo['totalissues'] = TotalIssues
|
||||
gcdinfo['ComicImage'] = gcdcover
|
||||
gcdinfo['resultPublished'] = resultPublished
|
||||
return gcdinfo
|
||||
## -- end (GCD) -- ##
|
||||
|
||||
def GettheDate(parsed,PrevYRMO):
|
||||
#--- let's use pubdate.
|
||||
#try publicationd date first
|
||||
logger.fdebug("parsed:" + str(parsed))
|
||||
subtxt1 = parsed('td')[1]
|
||||
ParseDate = subtxt1.findNext(text=True)
|
||||
basmonths = {'january':'01','february':'02','march':'03','april':'04','may':'05','june':'06','july':'07','august':'08','september':'09','october':'10','november':'11','december':'12'}
|
||||
pdlen = len(ParseDate)
|
||||
pdfind = ParseDate.find(' ',2)
|
||||
#logger.fdebug("length: " + str(pdlen) + "....first space @ pos " + str(pdfind))
|
||||
#logger.fdebug("this should be the year: " + str(ParseDate[pdfind+1:pdlen-1]))
|
||||
logger.fdebug("length: " + str(pdlen) + "....first space @ pos " + str(pdfind))
|
||||
logger.fdebug("this should be the year: " + str(ParseDate[pdfind+1:pdlen-1]))
|
||||
if ParseDate[pdfind+1:pdlen-1].isdigit():
|
||||
#assume valid date.
|
||||
#search for number as text, and change to numeric
|
||||
|
@ -358,7 +432,7 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
|
|||
pconv = basmonths[numbs]
|
||||
ParseYear = re.sub('/s','',ParseDate[-5:])
|
||||
ParseDate = str(ParseYear) + "-" + str(pconv)
|
||||
#logger.fdebug("!success - Publication date: " + str(ParseDate))
|
||||
logger.fdebug("!success - Publication date: " + str(ParseDate))
|
||||
break
|
||||
else:
|
||||
# #try key date
|
||||
|
@ -385,59 +459,8 @@ def GCDdetails(comseries, resultURL, vari_loop, ComicID, TotalIssues, issvariati
|
|||
if int(PrevMO) < 10:
|
||||
PrevMO = "0" + str(PrevMO)
|
||||
ParseDate = str(PrevYR) + "-" + str(PrevMO)
|
||||
ParseDate = ParseDate.replace(' ','')
|
||||
PrevYRMO = ParseDate
|
||||
gcdinfo['ComicDate'] = ParseDate
|
||||
#^^ will retrieve date #
|
||||
#logger.fdebug("adding: " + str(gcdinfo['ComicIssue']))
|
||||
if ComicID[:1] == "G":
|
||||
gcdchoice.append({
|
||||
'GCDid': ComicID,
|
||||
'IssueID': resultID,
|
||||
'GCDIssue': gcdinfo['ComicIssue'],
|
||||
'GCDDate': gcdinfo['ComicDate']
|
||||
})
|
||||
gcount+=1
|
||||
else:
|
||||
gcdchoice.append({
|
||||
'GCDid': ComicID,
|
||||
'GCDIssue': gcdinfo['ComicIssue'],
|
||||
'GCDDate': gcdinfo['ComicDate']
|
||||
})
|
||||
|
||||
gcdinfo['gcdchoice'] = gcdchoice
|
||||
|
||||
altcount = 0
|
||||
n+=1
|
||||
# ---redundant---
|
||||
# else:
|
||||
# #--if 2 identical issue numbers legitimately exist, but have different
|
||||
# #--publication dates, try to distinguish
|
||||
# logger.fdebug("2 identical issue #'s have been found...determining if it's intentional.")
|
||||
# #get current issue & publication date.
|
||||
# logger.fdebug("Issue #:" + str(ParseIssue))
|
||||
# logger.fdebug("IssueDate: " + str(gcdinfo['ComicDate']))
|
||||
# #get conflicting issue from tuple
|
||||
# for d in gcdchoice:
|
||||
# if str(d['GCDIssue']) == str(gcdinfo['ComicIssue']):
|
||||
# logger.fdebug("Issue # already in tuple - checking IssueDate:" + str(d['GCDDate']) )
|
||||
# if str(d['GCDDate']) == str(gcdinfo['ComicDate']):
|
||||
# logger.fdebug("Issue #'s and dates match...skipping.")
|
||||
# else:
|
||||
# logger.fdebug("Issue#'s match but different publication dates, not skipping.")
|
||||
#pass
|
||||
#logger.fdebug("Duplicate issue detected in DB - ignoring subsequent issue # " + str(gcdinfo['ComicIssue']))
|
||||
|
||||
i+=1
|
||||
gcdinfo['gcdvariation'] = issvariation
|
||||
if ComicID[:1] == "G":
|
||||
gcdinfo['totalissues'] = gcount
|
||||
else:
|
||||
gcdinfo['totalissues'] = TotalIssues
|
||||
gcdinfo['ComicImage'] = gcdcover
|
||||
gcdinfo['resultPublished'] = resultPublished
|
||||
return gcdinfo
|
||||
## -- end (GCD) -- ##
|
||||
logger.fdebug("parseDAte:" + str(ParseDate))
|
||||
return ParseDate
|
||||
|
||||
def GCDAdd(gcdcomicid):
|
||||
serieschoice = []
|
||||
|
|
Loading…
Reference in New Issue