mirror of https://github.com/evilhero/mylar
124 lines
4.7 KiB
Python
Executable File
124 lines
4.7 KiB
Python
Executable File
|
|
from bs4 import BeautifulSoup, UnicodeDammit
|
|
import urllib2
|
|
import csv
|
|
import fileinput
|
|
import sys
|
|
import re
|
|
import os
|
|
import sqlite3
|
|
import datetime
|
|
import unicodedata
|
|
from decimal import Decimal
|
|
from HTMLParser import HTMLParseError
|
|
from time import strptime
|
|
|
|
import mylar
|
|
from mylar import logger
|
|
|
|
def newpull():
|
|
pagelinks = "http://www.previewsworld.com/Home/1/1/71/952"
|
|
|
|
pageresponse = urllib2.urlopen (pagelinks)
|
|
soup = BeautifulSoup (pageresponse)
|
|
getthedate = soup.findAll("div", {"class": "Headline"})[0]
|
|
#the date will be in the FIRST ahref
|
|
try:
|
|
getdate_link = getthedate('a')[0]
|
|
newdates = getdate_link.findNext(text=True).strip()
|
|
except IndexError:
|
|
newdates = getthedate.findNext(text=True).strip()
|
|
logger.fdebug('New Releases date detected as : ' + re.sub('New Releases For', '', newdates).strip())
|
|
cntlinks = soup.findAll('tr')
|
|
lenlinks = len(cntlinks)
|
|
|
|
publish = []
|
|
resultURL = []
|
|
resultmonth = []
|
|
resultyear = []
|
|
|
|
x = 0
|
|
cnt = 0
|
|
endthis = False
|
|
pull_list = []
|
|
|
|
publishers = {'914': 'DARK HORSE COMICS', '915': 'DC COMICS', '916': 'IDW PUBLISHING', '917': 'IMAGE COMICS', '918': 'MARVEL COMICS', '952': 'COMICS & GRAPHIC NOVELS'}
|
|
|
|
while (x < lenlinks):
|
|
headt = cntlinks[x] #iterate through the hrefs pulling out only results.
|
|
if 'STK669382' in str(headt):
|
|
x+=1
|
|
continue
|
|
elif '?stockItemID=' in str(headt):
|
|
#914 - Dark Horse Comics
|
|
#915 - DC Comics
|
|
#916 - IDW Publishing
|
|
#917 - Image Comics
|
|
#918 - Marvel Comics
|
|
#952 - Comics & Graphic Novels
|
|
# - Magazines
|
|
findurl_link = headt.findAll('a', href=True)[0]
|
|
urlID = findurl_link.findNext(text=True)
|
|
issue_link = findurl_link['href']
|
|
issue_lk = issue_link.find('?stockItemID=')
|
|
if issue_lk == -1:
|
|
continue
|
|
#headName = headt.findNext(text=True)
|
|
publisher_id = issue_link[issue_lk -3:issue_lk]
|
|
for pub in publishers:
|
|
if pub == publisher_id:
|
|
isspublisher = publishers[pub]
|
|
#logger.fdebug('publisher:' + str(isspublisher))
|
|
found_iss = headt.findAll('td')
|
|
if "Home/1/1/71/920" in issue_link:
|
|
#logger.fdebug('Ignoring - menu option.')
|
|
return
|
|
if "PREVIEWS" in headt:
|
|
#logger.fdebug('Ignoring: ' + found_iss[0])
|
|
break
|
|
if "MAGAZINES" in headt:
|
|
#logger.fdebug('End.')
|
|
endthis = True
|
|
break
|
|
if len(found_iss) > 0:
|
|
pull_list.append({"iss_url": found_iss[0],
|
|
"name": found_iss[1].findNext(text=True),
|
|
"price": found_iss[2],
|
|
"publisher": isspublisher,
|
|
"ID": urlID})
|
|
|
|
if endthis == True: break
|
|
x+=1
|
|
|
|
logger.fdebug('Saving new pull-list information into local file for subsequent merge')
|
|
except_file = os.path.join(mylar.CACHE_DIR, 'newreleases.txt')
|
|
try:
|
|
csvfile = open(str(except_file), 'rb')
|
|
csvfile.close()
|
|
except (OSError, IOError):
|
|
logger.fdebug('file does not exist - continuing.')
|
|
else:
|
|
logger.fdebug('file exists - removing.')
|
|
os.remove(except_file)
|
|
|
|
oldpub = None
|
|
breakhtml = {"<td>", "<tr>", "</td>", "</tr>"}
|
|
with open(str(except_file), 'wb') as f:
|
|
f.write('%s\n' % (newdates))
|
|
for pl in pull_list:
|
|
if pl['publisher'] == oldpub:
|
|
exceptln = str(pl['ID']) + "\t" + str(pl['name']) + "\t" + str(pl['price'])
|
|
else:
|
|
exceptln = pl['publisher'] + "\n" + str(pl['ID']) + "\t" + str(pl['name']) + "\t" + str(pl['price'])
|
|
|
|
for lb in breakhtml:
|
|
exceptln = re.sub(lb, '', exceptln).strip()
|
|
|
|
exceptline = exceptln.decode('utf-8', 'ignore')
|
|
f.write('%s\n' % (exceptline.encode('ascii', 'replace').strip()))
|
|
oldpub = pl['publisher']
|
|
|
|
|
|
if __name__ == '__main__':
|
|
newpull()
|