2019-01-16 19:32:37 +00:00
# -*- coding: utf-8 -*-
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Mylar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mylar. If not, see <http://www.gnu.org/licenses/>.
from StringIO import StringIO
import urllib
from threading import Thread
import os
import sys
import re
import gzip
import time
import datetime
import json
from bs4 import BeautifulSoup
import requests
import cfscrape
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
import zipfile
2019-01-17 18:22:36 +00:00
import logger
2019-01-16 19:32:37 +00:00
import mylar
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
from mylar import db
2019-01-16 19:32:37 +00:00
class GC ( object ) :
2019-03-27 13:20:28 +00:00
def __init__ ( self , query = None , issueid = None , comicid = None , oneoff = False ) :
2019-01-16 19:32:37 +00:00
self . valreturn = [ ]
self . url = ' https://getcomics.info '
self . query = query
2019-01-17 18:22:36 +00:00
self . comicid = comicid
self . issueid = issueid
2019-03-27 13:20:28 +00:00
self . oneoff = oneoff
2019-01-16 19:32:37 +00:00
self . local_filename = os . path . join ( mylar . CONFIG . CACHE_DIR , " getcomics.html " )
self . headers = { ' Accept-encoding ' : ' gzip ' , ' User-Agent ' : ' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1 ' , ' Referer ' : ' https://getcomics.info/ ' }
def search ( self ) :
with cfscrape . create_scraper ( ) as s :
cf_cookievalue , cf_user_agent = s . get_tokens ( self . url , headers = self . headers )
2019-03-27 13:20:28 +00:00
t = s . get ( self . url + ' / ' , params = { ' s ' : self . query } , verify = True , cookies = cf_cookievalue , headers = self . headers , stream = True , timeout = 30 )
2019-01-16 19:32:37 +00:00
with open ( self . local_filename , ' wb ' ) as f :
for chunk in t . iter_content ( chunk_size = 1024 ) :
if chunk : # filter out keep-alive new chunks
f . write ( chunk )
f . flush ( )
return self . search_results ( )
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
def loadsite ( self , id , link ) :
title = os . path . join ( mylar . CONFIG . CACHE_DIR , ' getcomics- ' + id )
2019-01-16 19:32:37 +00:00
with cfscrape . create_scraper ( ) as s :
self . cf_cookievalue , cf_user_agent = s . get_tokens ( link , headers = self . headers )
2019-03-27 13:20:28 +00:00
t = s . get ( link , verify = True , cookies = self . cf_cookievalue , headers = self . headers , stream = True , timeout = 30 )
2019-01-16 19:32:37 +00:00
with open ( title + ' .html ' , ' wb ' ) as f :
for chunk in t . iter_content ( chunk_size = 1024 ) :
if chunk : # filter out keep-alive new chunks
f . write ( chunk )
f . flush ( )
def search_results ( self ) :
results = { }
resultlist = [ ]
soup = BeautifulSoup ( open ( self . local_filename ) , ' html.parser ' )
resultline = soup . find ( " span " , { " class " : " cover-article-count " } ) . get_text ( strip = True )
logger . info ( ' There are %s results ' % re . sub ( ' Articles ' , ' ' , resultline ) . strip ( ) )
for f in soup . findAll ( " article " ) :
id = f [ ' id ' ]
lk = f . find ( ' a ' )
link = lk [ ' href ' ]
titlefind = f . find ( " h1 " , { " class " : " post-title " } )
title = titlefind . get_text ( strip = True )
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
title = re . sub ( u ' \u2013 ' , ' - ' , title ) . strip ( )
filename = title
issues = None
pack = False
#see if it's a pack type
issfind_st = title . find ( ' # ' )
issfind_en = title . find ( ' - ' , issfind_st )
if issfind_en != - 1 :
if all ( [ title [ issfind_en + 1 ] == ' ' , title [ issfind_en + 2 ] . isdigit ( ) ] ) :
iss_en = title . find ( ' ' , issfind_en + 2 )
if iss_en != - 1 :
issues = title [ issfind_st + 1 : iss_en ]
pack = True
if title [ issfind_en + 1 ] . isdigit ( ) :
iss_en = title . find ( ' ' , issfind_en + 1 )
if iss_en != - 1 :
issues = title [ issfind_st + 1 : iss_en ]
pack = True
# if it's a pack - remove the issue-range and the possible issue years (cause it most likely will span) and pass thru as separate items
if pack is True :
title = re . sub ( issues , ' ' , title ) . strip ( )
if title . endswith ( ' # ' ) :
title = title [ : - 1 ] . strip ( )
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
else :
if any ( [ ' Marvel Week+ ' in title , ' INDIE Week+ ' in title , ' Image Week ' in title , ' DC Week+ ' in title ] ) :
continue
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
2019-01-16 19:32:37 +00:00
option_find = f . find ( " p " , { " style " : " text-align: center; " } )
i = 0
while i < = 2 :
option_find = option_find . findNext ( text = True )
if ' Year ' in option_find :
year = option_find . findNext ( text = True )
2019-01-17 18:22:36 +00:00
year = re . sub ( ' \ | ' , ' ' , year ) . strip ( )
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
if pack is True and ' - ' in year :
title = re . sub ( ' \ ( ' + year + ' \ ) ' , ' ' , title ) . strip ( )
2019-01-16 19:32:37 +00:00
else :
size = option_find . findNext ( text = True )
2019-01-23 20:05:02 +00:00
if all ( [ re . sub ( ' : ' , ' ' , size ) . strip ( ) != ' Size ' , len ( re . sub ( ' [^0-9] ' , ' ' , size ) . strip ( ) ) > 0 ] ) :
if ' MB ' in size :
size = re . sub ( ' MB ' , ' M ' , size ) . strip ( )
elif ' GB ' in size :
size = re . sub ( ' GB ' , ' G ' , size ) . strip ( )
if ' // ' in size :
nwsize = size . find ( ' // ' )
size = re . sub ( ' \ [ ' , ' ' , size [ : nwsize ] ) . strip ( )
else :
2019-03-08 21:56:05 +00:00
size = ' 0M '
2019-01-16 19:32:37 +00:00
i + = 1
dateline = f . find ( ' time ' )
datefull = dateline [ ' datetime ' ]
datestamp = time . mktime ( time . strptime ( datefull , " % Y- % m- %d " ) )
resultlist . append ( { " title " : title ,
" pubdate " : datetime . datetime . fromtimestamp ( float ( datestamp ) ) . strftime ( ' %a , %d % b % Y % H: % M: % S ' ) ,
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
" filename " : filename ,
2019-01-16 19:32:37 +00:00
" size " : re . sub ( ' ' , ' ' , size ) . strip ( ) ,
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
" pack " : pack ,
" issues " : issues ,
2019-01-16 19:32:37 +00:00
" link " : link ,
" year " : year ,
" id " : re . sub ( ' post- ' , ' ' , id ) . strip ( ) ,
" site " : ' DDL ' } )
logger . fdebug ( ' %s [ %s ] ' % ( title , size ) )
results [ ' entries ' ] = resultlist
return results
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
def parse_downloadresults ( self , id , mainlink ) :
myDB = db . DBConnection ( )
2019-03-08 21:56:05 +00:00
series = None
year = None
size = None
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
title = os . path . join ( mylar . CONFIG . CACHE_DIR , ' getcomics- ' + id )
2019-01-16 19:32:37 +00:00
soup = BeautifulSoup ( open ( title + ' .html ' ) , ' html.parser ' )
orig_find = soup . find ( " p " , { " style " : " text-align: center; " } )
i = 0
option_find = orig_find
2019-03-08 21:56:05 +00:00
possible_more = None
2019-01-16 19:32:37 +00:00
while True : #i <= 10:
prev_option = option_find
option_find = option_find . findNext ( text = True )
2019-03-08 21:56:05 +00:00
if i == 0 and series is None :
2019-01-16 19:32:37 +00:00
series = option_find
elif ' Year ' in option_find :
year = option_find . findNext ( text = True )
2019-01-27 16:46:59 +00:00
year = re . sub ( ' \ | ' , ' ' , year ) . strip ( )
2019-01-16 19:32:37 +00:00
else :
if ' Size ' in prev_option :
size = option_find #.findNext(text=True)
possible_more = orig_find . next_sibling
break
i + = 1
2019-01-16 22:09:51 +00:00
logger . fdebug ( ' Now downloading: %s [ %s ] / %s ... this can take a while (go get some take-out)... ' % ( series , year , size ) )
2019-01-16 19:32:37 +00:00
link = None
for f in soup . findAll ( " div " , { " class " : " aio-pulse " } ) :
lk = f . find ( ' a ' )
if lk [ ' title ' ] == ' Download Now ' :
2019-03-08 21:56:05 +00:00
link = { " series " : series ,
" site " : lk [ ' title ' ] ,
" year " : year ,
" issues " : None ,
" size " : size ,
" link " : lk [ ' href ' ] }
2019-01-16 19:32:37 +00:00
break #get the first link just to test
links = [ ]
2019-02-26 20:06:21 +00:00
if link is None and possible_more . name == ' ul ' :
2019-03-08 21:56:05 +00:00
try :
bb = possible_more . findAll ( ' li ' )
except :
pass
else :
for x in bb :
linkline = x . find ( ' a ' )
if linkline :
if ' go.php ' in linkline [ ' href ' ] :
volume = x . findNext ( text = True )
if u ' \u2013 ' in volume :
volume = re . sub ( u ' \u2013 ' , ' - ' , volume )
#volume label contains series, issue(s), year(s), and size
series_st = volume . find ( ' ( ' )
issues_st = volume . find ( ' # ' )
series = volume [ : series_st ]
if any ( [ issues_st == - 1 , series_st == - 1 ] ) :
issues = None
else :
series = volume [ : issues_st ] . strip ( )
issues = volume [ issues_st + 1 : series_st ] . strip ( )
year_end = volume . find ( ' ) ' , series_st + 1 )
year = re . sub ( ' [ \ ( \ )] ' , ' ' , volume [ series_st + 1 : year_end ] ) . strip ( )
size_end = volume . find ( ' ) ' , year_end + 1 )
size = re . sub ( ' [ \ ( \ )] ' , ' ' , volume [ year_end + 1 : size_end ] ) . strip ( )
linked = linkline [ ' href ' ]
site = linkline . findNext ( text = True )
if site == ' Main Server ' :
links . append ( { " series " : series ,
" site " : site ,
" year " : year ,
" issues " : issues ,
" size " : size ,
" link " : linked } )
2019-01-16 19:32:37 +00:00
else :
check_extras = soup . findAll ( " h3 " )
for sb in check_extras :
header = sb . findNext ( text = True )
if header == ' TPBs ' :
nxt = sb . next_sibling
if nxt . name == ' ul ' :
bb = nxt . findAll ( ' li ' )
for x in bb :
volume = x . findNext ( text = True )
if u ' \u2013 ' in volume :
volume = re . sub ( u ' \u2013 ' , ' - ' , volume )
2019-03-12 19:17:22 +00:00
series_st = volume . find ( ' ( ' )
issues_st = volume . find ( ' # ' )
series = volume [ : issues_st ] . strip ( )
issues = volume [ issues_st : series_st ] . strip ( )
year_end = volume . find ( ' ) ' , series_st + 1 )
year = re . sub ( ' [ \ ( \ ) \ |] ' , ' ' , volume [ series_st + 1 : year_end ] ) . strip ( )
size_end = volume . find ( ' ) ' , year_end + 1 )
size = re . sub ( ' [ \ ( \ ) \ |] ' , ' ' , volume [ year_end + 1 : size_end ] ) . strip ( )
2019-01-16 19:32:37 +00:00
linkline = x . find ( ' a ' )
2019-03-08 21:56:05 +00:00
linked = linkline [ ' href ' ]
2019-01-16 19:32:37 +00:00
site = linkline . findNext ( text = True )
2019-03-12 19:17:22 +00:00
links . append ( { " series " : series ,
" volume " : volume ,
" site " : site ,
" year " : year ,
" issues " : issues ,
" size " : size ,
" link " : linked } )
2019-01-16 19:32:37 +00:00
2019-03-08 21:56:05 +00:00
if all ( [ link is None , len ( links ) == 0 ] ) :
2019-01-16 19:32:37 +00:00
logger . warn ( ' Unable to retrieve any valid immediate download links. They might not exist. ' )
2019-01-17 18:22:36 +00:00
return { ' success ' : False }
2019-03-08 21:56:05 +00:00
if all ( [ link is not None , len ( links ) == 0 ] ) :
logger . info ( ' only one item discovered, changing queue length to accomodate: %s [ %s ] ' % ( link , type ( link ) ) )
links = [ link ]
elif len ( links ) > 0 :
2019-03-12 19:17:22 +00:00
if link is not None :
links . append ( link )
logger . fdebug ( ' [DDL-QUEUE] Making sure we download the original item in addition to the extra packs. ' )
2019-03-08 21:56:05 +00:00
if len ( links ) > 1 :
2019-03-12 19:17:22 +00:00
logger . fdebug ( ' [DDL-QUEUER] This pack has been broken up into %s separate packs - queueing each in sequence for your enjoyment. ' % len ( links ) )
2019-03-08 21:56:05 +00:00
cnt = 1
2019-01-16 19:32:37 +00:00
for x in links :
2019-03-08 21:56:05 +00:00
if len ( links ) == 1 :
mod_id = id
else :
mod_id = id + ' - ' + str ( cnt )
#logger.fdebug('[%s] %s (%s) %s [%s][%s]' % (x['site'], x['series'], x['year'], x['issues'], x['size'], x['link']))
ctrlval = { ' id ' : mod_id }
vals = { ' series ' : x [ ' series ' ] ,
' year ' : x [ ' year ' ] ,
' size ' : x [ ' size ' ] ,
' issues ' : x [ ' issues ' ] ,
' issueid ' : self . issueid ,
' comicid ' : self . comicid ,
' link ' : x [ ' link ' ] ,
' mainlink ' : mainlink ,
' updated_date ' : datetime . datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M ' ) ,
' status ' : ' Queued ' }
myDB . upsert ( ' ddl_info ' , vals , ctrlval )
mylar . DDL_QUEUE . put ( { ' link ' : x [ ' link ' ] ,
' mainlink ' : mainlink ,
' series ' : x [ ' series ' ] ,
' year ' : x [ ' year ' ] ,
' size ' : x [ ' size ' ] ,
' comicid ' : self . comicid ,
' issueid ' : self . issueid ,
2019-03-27 13:20:28 +00:00
' oneoff ' : self . oneoff ,
2019-03-08 21:56:05 +00:00
' id ' : mod_id ,
' resume ' : None } )
cnt + = 1
2019-01-17 18:22:36 +00:00
return { ' success ' : True }
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
def downloadit ( self , id , link , mainlink , resume = None ) :
2019-03-12 19:17:22 +00:00
#logger.info('[%s] %s -- mainlink: %s' % (id, link, mainlink))
2019-01-17 18:22:36 +00:00
if mylar . DDL_LOCK is True :
logger . fdebug ( ' [DDL] Another item is currently downloading via DDL. Only one item can be downloaded at a time using DDL. Patience. ' )
return
else :
mylar . DDL_LOCK = True
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
myDB = db . DBConnection ( )
2019-01-16 19:32:37 +00:00
filename = None
try :
2019-01-17 18:22:36 +00:00
with cfscrape . create_scraper ( ) as s :
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
if resume is not None :
logger . info ( ' [DDL-RESUME] Attempting to resume from: %s bytes ' % resume )
self . headers [ ' Range ' ] = ' bytes= %d - ' % resume
2019-03-27 13:20:28 +00:00
cf_cookievalue , cf_user_agent = s . get_tokens ( mainlink , headers = self . headers , timeout = 30 )
t = s . get ( link , verify = True , cookies = cf_cookievalue , headers = self . headers , stream = True , timeout = 30 )
2019-01-16 19:32:37 +00:00
2019-01-17 18:22:36 +00:00
filename = os . path . basename ( urllib . unquote ( t . url ) . decode ( ' utf-8 ' ) )
2019-03-08 21:56:05 +00:00
if ' GetComics.INFO ' in filename :
filename = re . sub ( ' GetComics.INFO ' , ' ' , filename , re . I ) . strip ( )
2019-01-16 19:32:37 +00:00
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
try :
remote_filesize = int ( t . headers [ ' Content-length ' ] )
logger . fdebug ( ' remote filesize: %s ' % remote_filesize )
except Exception as e :
2019-03-27 13:20:28 +00:00
if ' go.php-urls ' not in link :
link = re . sub ( ' go.php-url= ' , ' go.php-urls ' , link )
t = s . get ( link , verify = True , cookies = cf_cookievalue , headers = self . headers , stream = True , timeout = 30 )
filename = os . path . basename ( urllib . unquote ( t . url ) . decode ( ' utf-8 ' ) )
if ' GetComics.INFO ' in filename :
filename = re . sub ( ' GetComics.INFO ' , ' ' , filename , re . I ) . strip ( )
try :
remote_filesize = int ( t . headers [ ' Content-length ' ] )
logger . fdebug ( ' remote filesize: %s ' % remote_filesize )
except Exception as e :
logger . warn ( ' [WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s ' % e )
logger . warn ( ' [WARNING] Considering this particular download as invalid and will ignore this result. ' )
remote_filesize = 0
mylar . DDL_LOCK = False
return ( { " success " : False ,
" filename " : filename ,
" path " : None } )
else :
logger . warn ( ' [WARNING] Unable to retrieve remote file size - this is usually due to the page being behind a different click-bait/ad page. Error returned as : %s ' % e )
logger . warn ( ' [WARNING] Considering this particular download as invalid and will ignore this result. ' )
remote_filesize = 0
mylar . DDL_LOCK = False
return ( { " success " : False ,
" filename " : filename ,
" path " : None } )
2019-03-08 21:56:05 +00:00
#write the filename to the db for tracking purposes...
myDB . upsert ( ' ddl_info ' , { ' filename ' : filename , ' remote_filesize ' : remote_filesize } , { ' id ' : id } )
2019-01-16 19:32:37 +00:00
2019-10-04 18:05:34 +00:00
if mylar . CONFIG . DDL_LOCATION is not None and not os . path . isdir ( mylar . CONFIG . DDL_LOCATION ) :
checkdirectory = mylar . filechecker . validateAndCreateDirectory ( mylar . CONFIG . DDL_LOCATION , True )
if not checkdirectory :
logger . warn ( ' [ABORTING] Error trying to validate/create DDL download directory: %s . ' % mylar . CONFIG . DDL_LOCATION )
return ( { " success " : False ,
" filename " : filename ,
" path " : None } )
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
path = os . path . join ( mylar . CONFIG . DDL_LOCATION , filename )
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
2019-01-17 18:22:36 +00:00
if t . headers . get ( ' content-encoding ' ) == ' gzip ' : #.get('Content-Encoding') == 'gzip':
buf = StringIO ( t . content )
f = gzip . GzipFile ( fileobj = buf )
2019-01-16 19:32:37 +00:00
FIX: Corrected issues in an arc being set to None when initially adding an arc, IMP: Changed arc searches & one-off pull-list searches to both use the search-queue, and other queues as required, IMP: Changed 'search for missing' in the arc options to follow search-queue workflow, IMP: Allowed arcs / pullist one-offs to be searched based on one-shot inclusion (ie. no issue number for one-shot issues), IMP: formatted logging lines for arcs to avoid logging errors, IMP: added code to allow requeuing of ddl downloads that fail to resume from point of failure, IMP: added code to display current percentage of ddl progress, IMP: added 'clear status' option to arc details to clear current status of a given issue within an arc, FIX: allow for series within an arc that don't populate a year properly to still have a year designated, IMP: Removed SSL tick box from rtorrent GUI configuration. If previously enabled, will auto-correct host to use https upon initial startup if required,
IMP: When adding a series, if there's only one issue available (and it's not ongoing) - mark it as one-shot edition to allow for snatches with no issue number,
IMP: Ignore Week+/Week packs for the time being when using the DDL provider option, FIX: When weekly pull could not be retrieved, would drop to alt_pull=0 option temporarily. If config was saved, would overwrite current alt_pull setting and cause subsequent problems when retrieving the pull, FIX: Fixed some post-processing problems when post-processing story-arc issues
2019-02-19 17:02:05 +00:00
if resume is not None :
with open ( path , ' ab ' ) as f :
for chunk in t . iter_content ( chunk_size = 1024 ) :
if chunk :
f . write ( chunk )
f . flush ( )
else :
with open ( path , ' wb ' ) as f :
for chunk in t . iter_content ( chunk_size = 1024 ) :
if chunk :
f . write ( chunk )
f . flush ( )
2019-01-16 19:32:37 +00:00
2019-01-27 16:46:59 +00:00
except Exception as e :
2019-01-17 18:22:36 +00:00
logger . error ( ' [ERROR] %s ' % e )
mylar . DDL_LOCK = False
return ( { " success " : False ,
" filename " : filename ,
" path " : None } )
2019-01-16 19:32:37 +00:00
else :
2019-01-17 18:22:36 +00:00
mylar . DDL_LOCK = False
if os . path . isfile ( path ) :
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
if path . endswith ( ' .zip ' ) :
new_path = os . path . join ( mylar . CONFIG . DDL_LOCATION , re . sub ( ' .zip ' , ' ' , filename ) . strip ( ) )
logger . info ( ' Zip file detected. Unzipping into new modified path location: %s ' % new_path )
try :
zip_f = zipfile . ZipFile ( path , ' r ' )
zip_f . extractall ( new_path )
zip_f . close ( )
except Exception as e :
logger . warn ( ' [ERROR: %s ] Unable to extract zip file: %s ' % ( e , new_path ) )
return ( { " success " : False ,
" filename " : filename ,
" path " : None } )
else :
try :
os . remove ( path )
except Exception as e :
logger . warn ( ' [ERROR: %s ] Unable to remove zip file from %s after extraction. ' % ( e , path ) )
filename = None
else :
new_path = path
2019-01-17 18:22:36 +00:00
return ( { " success " : True ,
" filename " : filename ,
FIX:(#2179) Post-processing item would fail if match would occur on story-arc check, FIX: Fixed some sub-directory problems when doing various types of scans, IMP: Added booktype to filechecker parsing results, FIX: When downloading via DDL, would not adhere to the booktype as a restraint, IMP: Pack support added for DDL (available as a per series option), IMP: Added BookType & Aliases to the arc's section which will impact how issues/series are searched/post-processed/cheked when they're an issue from an arc, IMP: Initial codebase for the a queue manager section, IMP: Write DDL-Queue data to the sql table so that stalled/broken downloads can be resumed/deleted etc eventually, FIX: If a filename didn't have a valid issue number and it is a Print Edition, will now throw a warning indicating other options to try instead of causing a traceback, IMP: Updated snatch notifications so the notification header will just say 'Issue Snatched' with a brief description, FIX: Removed multiple import db lines from the helpers module, IMP: cleanup_cache variable (true/false) added to config section which will initiate a cleanup of items in the cache directory on startup which will remove items that are no longer needed, IMP: Changed some logging string concatenation lines to try and avoid traceback errors due to logging
2019-02-01 21:25:24 +00:00
" path " : new_path } )
2019-01-16 19:32:37 +00:00
def issue_list ( self , pack ) :
#packlist = [x.strip() for x in pack.split(',)]
packlist = pack . replace ( ' + ' , ' ' ) . replace ( ' , ' , ' ' ) . split ( )
print packlist
plist = [ ]
pack_issues = [ ]
for pl in packlist :
if ' - ' in pl :
plist . append ( range ( int ( pl [ : pl . find ( ' - ' ) ] ) , int ( pl [ pl . find ( ' - ' ) + 1 : ] ) + 1 ) )
else :
if ' TPBs ' not in pl :
plist . append ( int ( pl ) )
else :
plist . append ( ' TPBs ' )
for pi in plist :
if type ( pi ) == list :
for x in pi :
pack_issues . append ( x )
else :
pack_issues . append ( pi )
pack_issues . sort ( )
print " pack_issues: %s " % pack_issues
#if __name__ == '__main__':
# ab = GC(sys.argv[1]) #'justice league aquaman') #sys.argv[0])
# #c = ab.search()
# b = ab.loadsite('test', sys.argv[2])
# c = ab.parse_downloadresults('test', '60MB')
# #c = ab.issue_list(sys.argv[2])