2014-11-21 17:16:28 +00:00
|
|
|
|
|
|
|
from bs4 import BeautifulSoup, UnicodeDammit
|
|
|
|
import urllib2
|
|
|
|
import csv
|
|
|
|
import fileinput
|
|
|
|
import sys
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
import sqlite3
|
|
|
|
import datetime
|
|
|
|
import unicodedata
|
|
|
|
from decimal import Decimal
|
|
|
|
from HTMLParser import HTMLParseError
|
|
|
|
from time import strptime
|
FIX: One-off Failed Download handling will now work, IMP: Some better error handling when detecting one-off's during post-processing, FIX:(#1143) When series contained a digit preceded by a dash, would incorrectly assume it as a negative issue number, FIX: Improved being able to detect corresponding annuals on CV when refreshing/importing a series when the annuals are new (ie. no data on CV), FIX: Alt_Pull method for weekly pull list retrieval working again, FIX: Fixed nzbid detection for experimental search, IMP: Mass Import button now available on Import Results screen (will attempt to import all series that are in a 'Not Imported' status), IMP: When searching for arc issues using the 'Search for Missing' option, the call is now threaded so it runs in the background, IMP: Changed IssueYear to IssueDate for Story Arc Issues on the arc details page, FIX:(#1156) Typo that caused error when attempting to view cbz comics in the series detail page, FIX:(#1145) Select All option via top checkbox (on series detail page), FIX: Auto-Want feature via weeklypull will better match to titles that contain 'the' and have hypens in differing character positions, FIX:(#1160) Would incorrectly take the length of a decimal placed issue when searching and never complete the issue number cylcing search, FIX:(#1161) When annuals not enabled, and series in watchlist and series annual in pullist, would error out trying to link series to pullist.
2015-11-18 06:32:40 +00:00
|
|
|
import lib.requests as requests
|
2014-11-21 17:16:28 +00:00
|
|
|
|
|
|
|
import mylar
|
|
|
|
from mylar import logger
|
|
|
|
|
|
|
|
def newpull():
|
|
|
|
pagelinks = "http://www.previewsworld.com/Home/1/1/71/952"
|
|
|
|
|
FIX: One-off Failed Download handling will now work, IMP: Some better error handling when detecting one-off's during post-processing, FIX:(#1143) When series contained a digit preceded by a dash, would incorrectly assume it as a negative issue number, FIX: Improved being able to detect corresponding annuals on CV when refreshing/importing a series when the annuals are new (ie. no data on CV), FIX: Alt_Pull method for weekly pull list retrieval working again, FIX: Fixed nzbid detection for experimental search, IMP: Mass Import button now available on Import Results screen (will attempt to import all series that are in a 'Not Imported' status), IMP: When searching for arc issues using the 'Search for Missing' option, the call is now threaded so it runs in the background, IMP: Changed IssueYear to IssueDate for Story Arc Issues on the arc details page, FIX:(#1156) Typo that caused error when attempting to view cbz comics in the series detail page, FIX:(#1145) Select All option via top checkbox (on series detail page), FIX: Auto-Want feature via weeklypull will better match to titles that contain 'the' and have hypens in differing character positions, FIX:(#1160) Would incorrectly take the length of a decimal placed issue when searching and never complete the issue number cylcing search, FIX:(#1161) When annuals not enabled, and series in watchlist and series annual in pullist, would error out trying to link series to pullist.
2015-11-18 06:32:40 +00:00
|
|
|
try:
|
|
|
|
r = requests.get(pagelinks, verify=False)
|
|
|
|
|
|
|
|
except Exception, e:
|
|
|
|
logger.warn('Error fetching data: %s' % (tmpprov, e))
|
|
|
|
|
|
|
|
soup = BeautifulSoup(r.content)
|
2014-11-21 17:16:28 +00:00
|
|
|
getthedate = soup.findAll("div", {"class": "Headline"})[0]
|
FIX: One-off Failed Download handling will now work, IMP: Some better error handling when detecting one-off's during post-processing, FIX:(#1143) When series contained a digit preceded by a dash, would incorrectly assume it as a negative issue number, FIX: Improved being able to detect corresponding annuals on CV when refreshing/importing a series when the annuals are new (ie. no data on CV), FIX: Alt_Pull method for weekly pull list retrieval working again, FIX: Fixed nzbid detection for experimental search, IMP: Mass Import button now available on Import Results screen (will attempt to import all series that are in a 'Not Imported' status), IMP: When searching for arc issues using the 'Search for Missing' option, the call is now threaded so it runs in the background, IMP: Changed IssueYear to IssueDate for Story Arc Issues on the arc details page, FIX:(#1156) Typo that caused error when attempting to view cbz comics in the series detail page, FIX:(#1145) Select All option via top checkbox (on series detail page), FIX: Auto-Want feature via weeklypull will better match to titles that contain 'the' and have hypens in differing character positions, FIX:(#1160) Would incorrectly take the length of a decimal placed issue when searching and never complete the issue number cylcing search, FIX:(#1161) When annuals not enabled, and series in watchlist and series annual in pullist, would error out trying to link series to pullist.
2015-11-18 06:32:40 +00:00
|
|
|
|
2014-11-21 17:16:28 +00:00
|
|
|
#the date will be in the FIRST ahref
|
2015-02-02 20:29:21 +00:00
|
|
|
try:
|
|
|
|
getdate_link = getthedate('a')[0]
|
|
|
|
newdates = getdate_link.findNext(text=True).strip()
|
|
|
|
except IndexError:
|
|
|
|
newdates = getthedate.findNext(text=True).strip()
|
2014-11-21 17:16:28 +00:00
|
|
|
logger.fdebug('New Releases date detected as : ' + re.sub('New Releases For', '', newdates).strip())
|
|
|
|
cntlinks = soup.findAll('tr')
|
|
|
|
lenlinks = len(cntlinks)
|
|
|
|
|
|
|
|
publish = []
|
|
|
|
resultURL = []
|
|
|
|
resultmonth = []
|
|
|
|
resultyear = []
|
|
|
|
|
|
|
|
x = 0
|
|
|
|
cnt = 0
|
|
|
|
endthis = False
|
|
|
|
pull_list = []
|
|
|
|
|
FIX: One-off Failed Download handling will now work, IMP: Some better error handling when detecting one-off's during post-processing, FIX:(#1143) When series contained a digit preceded by a dash, would incorrectly assume it as a negative issue number, FIX: Improved being able to detect corresponding annuals on CV when refreshing/importing a series when the annuals are new (ie. no data on CV), FIX: Alt_Pull method for weekly pull list retrieval working again, FIX: Fixed nzbid detection for experimental search, IMP: Mass Import button now available on Import Results screen (will attempt to import all series that are in a 'Not Imported' status), IMP: When searching for arc issues using the 'Search for Missing' option, the call is now threaded so it runs in the background, IMP: Changed IssueYear to IssueDate for Story Arc Issues on the arc details page, FIX:(#1156) Typo that caused error when attempting to view cbz comics in the series detail page, FIX:(#1145) Select All option via top checkbox (on series detail page), FIX: Auto-Want feature via weeklypull will better match to titles that contain 'the' and have hypens in differing character positions, FIX:(#1160) Would incorrectly take the length of a decimal placed issue when searching and never complete the issue number cylcing search, FIX:(#1161) When annuals not enabled, and series in watchlist and series annual in pullist, would error out trying to link series to pullist.
2015-11-18 06:32:40 +00:00
|
|
|
publishers = {'PREVIEWS PUBLICATIONS', 'DARK HORSE COMICS', 'DC COMICS', 'IDW PUBLISHING', 'IMAGE COMICS', 'MARVEL COMICS', 'COMICS & GRAPHIC NOVELS'}
|
|
|
|
isspublisher = None
|
2014-11-21 17:16:28 +00:00
|
|
|
|
|
|
|
while (x < lenlinks):
|
|
|
|
headt = cntlinks[x] #iterate through the hrefs pulling out only results.
|
FIX: One-off Failed Download handling will now work, IMP: Some better error handling when detecting one-off's during post-processing, FIX:(#1143) When series contained a digit preceded by a dash, would incorrectly assume it as a negative issue number, FIX: Improved being able to detect corresponding annuals on CV when refreshing/importing a series when the annuals are new (ie. no data on CV), FIX: Alt_Pull method for weekly pull list retrieval working again, FIX: Fixed nzbid detection for experimental search, IMP: Mass Import button now available on Import Results screen (will attempt to import all series that are in a 'Not Imported' status), IMP: When searching for arc issues using the 'Search for Missing' option, the call is now threaded so it runs in the background, IMP: Changed IssueYear to IssueDate for Story Arc Issues on the arc details page, FIX:(#1156) Typo that caused error when attempting to view cbz comics in the series detail page, FIX:(#1145) Select All option via top checkbox (on series detail page), FIX: Auto-Want feature via weeklypull will better match to titles that contain 'the' and have hypens in differing character positions, FIX:(#1160) Would incorrectly take the length of a decimal placed issue when searching and never complete the issue number cylcing search, FIX:(#1161) When annuals not enabled, and series in watchlist and series annual in pullist, would error out trying to link series to pullist.
2015-11-18 06:32:40 +00:00
|
|
|
found_iss = headt.findAll('td')
|
|
|
|
pubcheck = found_iss[0].text.strip() #.findNext(text=True)
|
|
|
|
for pub in publishers:
|
|
|
|
if pub in pubcheck:
|
|
|
|
chklink = found_iss[0].findAll('a', href=True) #make sure it doesn't have a link in it.
|
|
|
|
if not chklink:
|
|
|
|
isspublisher = pub
|
|
|
|
break
|
|
|
|
|
|
|
|
if isspublisher == 'PREVIEWS PUBLICATIONS' or isspublisher is None:
|
|
|
|
pass
|
|
|
|
|
|
|
|
else:
|
|
|
|
if '/Catalog/' in str(headt):
|
|
|
|
findurl_link = headt.findAll('a', href=True)[0]
|
|
|
|
urlID = findurl_link.findNext(text=True)
|
|
|
|
issue_link = findurl_link['href']
|
|
|
|
issue_lk = issue_link.find('/Catalog/')
|
|
|
|
if issue_lk == -1:
|
|
|
|
x+=1
|
|
|
|
continue
|
|
|
|
elif "Home/1/1/71" in issue_link:
|
|
|
|
#logger.fdebug('Ignoring - menu option.')
|
|
|
|
x+=1
|
|
|
|
continue
|
|
|
|
|
|
|
|
if len(found_iss) > 0:
|
|
|
|
pull_list.append({"iss_url": issue_link,
|
|
|
|
"name": found_iss[1].findNext(text=True),
|
|
|
|
"price": found_iss[2],
|
|
|
|
"publisher": isspublisher,
|
|
|
|
"ID": urlID})
|
|
|
|
|
|
|
|
if "PREVIEWS" in headt:
|
|
|
|
#logger.fdebug('Ignoring: ' + found_iss[0])
|
|
|
|
break
|
|
|
|
if "MAGAZINES" in headt:
|
|
|
|
#logger.fdebug('End.')
|
|
|
|
endthis = True
|
|
|
|
break
|
|
|
|
|
2014-11-21 17:16:28 +00:00
|
|
|
x+=1
|
|
|
|
|
|
|
|
logger.fdebug('Saving new pull-list information into local file for subsequent merge')
|
2014-11-25 16:59:56 +00:00
|
|
|
except_file = os.path.join(mylar.CACHE_DIR, 'newreleases.txt')
|
2014-11-21 17:16:28 +00:00
|
|
|
try:
|
|
|
|
csvfile = open(str(except_file), 'rb')
|
|
|
|
csvfile.close()
|
2015-05-22 08:32:51 +00:00
|
|
|
except (OSError, IOError):
|
2014-11-21 17:16:28 +00:00
|
|
|
logger.fdebug('file does not exist - continuing.')
|
|
|
|
else:
|
|
|
|
logger.fdebug('file exists - removing.')
|
|
|
|
os.remove(except_file)
|
|
|
|
|
|
|
|
oldpub = None
|
|
|
|
breakhtml = {"<td>", "<tr>", "</td>", "</tr>"}
|
|
|
|
with open(str(except_file), 'wb') as f:
|
|
|
|
f.write('%s\n' % (newdates))
|
|
|
|
for pl in pull_list:
|
|
|
|
if pl['publisher'] == oldpub:
|
FIX: included version of comictagger should now work with both Windows and *nix based OS' again, IMP: Global Copy/Move option available when performing post-processing, IMP: Added a verbose file-checking option (FOLDER_SCAN_LOG_VERBOSE) - when enabled will log as it currently does during manual post-processing/file-checking runs, when disabled it will not spam the log nearly as much resulting in more readable log files, IMP: Added Verbose debug logging both via startup option(-v), as well as toggle button in Log GUI (from headphones), as well as per-page loading of log file(s) in GUI, FIX: When doing manual post-processing on issues that were in story arcs, will now indicate X story-arc issues were post-processed for better visibility, FIX: Fixed an issue with deleting from the nzblog table when story arc issues were post-processed, IMP: Added WEEKFOLDER_LOC to the config.ini to allow for specification of where the weekly download directories will default to (as opposed to off of ComicLocation root), IMP: Better handling of some special character references in series titles when looking for series on the auto-wanted list, IMP: 32P will now auto-disable provider if logon returns invalid credentials, FIX: When using alt_pull on weekly pull list, xA0 unicode character caused error, FIX: If title had invalid character in filename that was replaced with a character that already existed in the title, would not scan in during file-checking, FIX: When searching for a series (weeklypull-list/add a series), if the title contained 'and' or '&' would return really mixed up results, FIX: When Post-Processing, if filename being processed had special characters (ie. comma) and was different than nzbname, in some cases would fail to find/move issues, IMP: Utilize internal comictagger to convert from cbr/cbz, IMP: Added more checks when post-processing to ensure files are handled correctly, IMP: Added meta-tag reading when importing series/issues - if previously tagged with CT, will reverse look-up the provided IssueID to reference the correct ComicID, IMP: If scanned directory during import contins cvinfo file, use that and force the ComicID to entire directory when importing a series, IMP: Manual meta-tagging issues will no longer create temporary directories and/or create files in the Comic Location root causing problems for some users, FIX: Annuals weren't properly sorted upon loading of comic details page for some series, IMP: Added some extra checks when validating/creating directories, FIX: Fixed a problem when displaying some covers of .cbz files on the comic details page
2016-01-26 07:49:56 +00:00
|
|
|
exceptln = str(pl['ID']) + "\t" + pl['name'].replace(u"\xA0", u" ") + "\t" + str(pl['price'])
|
2014-11-21 17:16:28 +00:00
|
|
|
else:
|
FIX: included version of comictagger should now work with both Windows and *nix based OS' again, IMP: Global Copy/Move option available when performing post-processing, IMP: Added a verbose file-checking option (FOLDER_SCAN_LOG_VERBOSE) - when enabled will log as it currently does during manual post-processing/file-checking runs, when disabled it will not spam the log nearly as much resulting in more readable log files, IMP: Added Verbose debug logging both via startup option(-v), as well as toggle button in Log GUI (from headphones), as well as per-page loading of log file(s) in GUI, FIX: When doing manual post-processing on issues that were in story arcs, will now indicate X story-arc issues were post-processed for better visibility, FIX: Fixed an issue with deleting from the nzblog table when story arc issues were post-processed, IMP: Added WEEKFOLDER_LOC to the config.ini to allow for specification of where the weekly download directories will default to (as opposed to off of ComicLocation root), IMP: Better handling of some special character references in series titles when looking for series on the auto-wanted list, IMP: 32P will now auto-disable provider if logon returns invalid credentials, FIX: When using alt_pull on weekly pull list, xA0 unicode character caused error, FIX: If title had invalid character in filename that was replaced with a character that already existed in the title, would not scan in during file-checking, FIX: When searching for a series (weeklypull-list/add a series), if the title contained 'and' or '&' would return really mixed up results, FIX: When Post-Processing, if filename being processed had special characters (ie. comma) and was different than nzbname, in some cases would fail to find/move issues, IMP: Utilize internal comictagger to convert from cbr/cbz, IMP: Added more checks when post-processing to ensure files are handled correctly, IMP: Added meta-tag reading when importing series/issues - if previously tagged with CT, will reverse look-up the provided IssueID to reference the correct ComicID, IMP: If scanned directory during import contins cvinfo file, use that and force the ComicID to entire directory when importing a series, IMP: Manual meta-tagging issues will no longer create temporary directories and/or create files in the Comic Location root causing problems for some users, FIX: Annuals weren't properly sorted upon loading of comic details page for some series, IMP: Added some extra checks when validating/creating directories, FIX: Fixed a problem when displaying some covers of .cbz files on the comic details page
2016-01-26 07:49:56 +00:00
|
|
|
exceptln = pl['publisher'] + "\n" + str(pl['ID']) + "\t" + pl['name'].replace(u"\xA0", u" ") + "\t" + str(pl['price'])
|
2014-11-21 17:16:28 +00:00
|
|
|
|
|
|
|
for lb in breakhtml:
|
2015-05-22 08:32:51 +00:00
|
|
|
exceptln = re.sub(lb, '', exceptln).strip()
|
2014-11-21 17:16:28 +00:00
|
|
|
|
2015-05-22 08:32:51 +00:00
|
|
|
exceptline = exceptln.decode('utf-8', 'ignore')
|
|
|
|
f.write('%s\n' % (exceptline.encode('ascii', 'replace').strip()))
|
2014-11-21 17:16:28 +00:00
|
|
|
oldpub = pl['publisher']
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
newpull()
|