2018-03-10 08:05:56 +00:00
using System ;
using System.Collections ;
2016-10-27 07:30:03 +00:00
using System.Collections.Generic ;
2017-04-15 08:45:10 +00:00
using System.Collections.Specialized ;
2018-03-10 08:05:56 +00:00
using System.Linq ;
using System.Net ;
2017-04-15 08:45:10 +00:00
using System.Text ;
using System.Text.RegularExpressions ;
2018-03-10 08:05:56 +00:00
using System.Threading.Tasks ;
2017-04-15 08:45:10 +00:00
using AngleSharp.Dom ;
2019-01-20 00:09:27 +00:00
using AngleSharp.Html.Dom ;
using AngleSharp.Html.Parser ;
2018-03-10 08:05:56 +00:00
using Jackett.Common.Helpers ;
using Jackett.Common.Models ;
using Jackett.Common.Models.IndexerConfig ;
using static Jackett . Common . Models . IndexerConfig . ConfigurationData ;
using Jackett.Common.Services.Interfaces ;
using Jackett.Common.Utils ;
using Jackett.Common.Utils.Clients ;
2017-11-05 09:42:03 +00:00
using Microsoft.AspNetCore.WebUtilities ;
2018-03-10 08:05:56 +00:00
using Newtonsoft.Json.Linq ;
using NLog ;
2017-04-15 08:45:10 +00:00
2018-03-10 08:05:56 +00:00
namespace Jackett.Common.Indexers
2016-10-27 07:30:03 +00:00
{
2017-07-10 20:58:44 +00:00
public class CardigannIndexer : BaseWebIndexer
2016-10-27 07:30:03 +00:00
{
2017-04-15 08:45:10 +00:00
protected IndexerDefinition Definition ;
2017-07-10 20:58:44 +00:00
public override string ID { get { return ( Definition ! = null ? Definition . Site : GetIndexerID ( GetType ( ) ) ) ; } }
2017-04-15 08:45:10 +00:00
2016-11-29 18:32:50 +00:00
protected WebClientStringResult landingResult ;
2017-04-15 08:45:10 +00:00
protected IHtmlDocument landingResultDocument ;
2018-04-01 12:56:45 +00:00
protected List < string > DefaultCategories = new List < string > ( ) ;
2016-10-27 07:30:03 +00:00
new ConfigurationData configData
{
get { return ( ConfigurationData ) base . configData ; }
set { base . configData = value ; }
2017-04-15 08:45:10 +00:00
}
protected readonly string [ ] OptionalFileds = new string [ ] { "imdb" , "rageid" , "tvdbid" , "banner" } ;
2017-11-05 09:42:03 +00:00
public CardigannIndexer ( IIndexerConfigurationService configService , Utils . Clients . WebClient wc , Logger l , IProtectionService ps , IndexerDefinition Definition )
2017-07-10 20:58:44 +00:00
: base ( configService : configService ,
2016-10-27 07:30:03 +00:00
client : wc ,
logger : l ,
p : ps )
{
2017-07-10 20:58:44 +00:00
this . Definition = Definition ;
2016-10-27 07:30:03 +00:00
// Add default data if necessary
2017-04-15 08:45:10 +00:00
if ( Definition . Settings = = null )
2017-07-10 20:58:44 +00:00
{
2016-10-27 07:30:03 +00:00
Definition . Settings = new List < settingsField > ( ) ;
2017-04-15 08:45:10 +00:00
Definition . Settings . Add ( new settingsField { Name = "username" , Label = "Username" , Type = "text" } ) ;
Definition . Settings . Add ( new settingsField { Name = "password" , Label = "Password" , Type = "password" } ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Encoding = = null )
2017-02-06 15:33:59 +00:00
Definition . Encoding = "UTF-8" ;
2016-11-28 18:31:12 +00:00
2016-11-29 18:32:50 +00:00
if ( Definition . Login ! = null & & Definition . Login . Method = = null )
2017-04-15 08:45:10 +00:00
Definition . Login . Method = "form" ;
if ( Definition . Search . Paths = = null )
{
Definition . Search . Paths = new List < searchPathBlock > ( ) ;
}
// convert definitions with a single search Path to a Paths entry
if ( Definition . Search . Path ! = null )
{
var legacySearchPath = new searchPathBlock ( ) ;
legacySearchPath . Path = Definition . Search . Path ;
legacySearchPath . Inheritinputs = true ;
Definition . Search . Paths . Add ( legacySearchPath ) ;
}
// init missing mandatory attributes
DisplayName = Definition . Name ;
DisplayDescription = Definition . Description ;
if ( Definition . Links . Count > 1 )
AlternativeSiteLinks = Definition . Links . ToArray ( ) ;
DefaultSiteLink = Definition . Links [ 0 ] ;
2017-08-30 16:46:36 +00:00
if ( Definition . Legacylinks ! = null )
LegacySiteLinks = Definition . Legacylinks . ToArray ( ) ;
2017-04-15 08:45:10 +00:00
Encoding = Encoding . GetEncoding ( Definition . Encoding ) ;
if ( ! DefaultSiteLink . EndsWith ( "/" ) )
DefaultSiteLink + = "/" ;
Language = Definition . Language ;
Type = Definition . Type ;
TorznabCaps = new TorznabCapabilities ( ) ;
2019-05-11 03:27:25 +00:00
TorznabCaps . SupportsImdbMovieSearch = Definition . Caps . Modes . Where ( c = > c . Key = = "movie-search" & & c . Value . Contains ( "imdbid" ) ) . Any ( ) ;
2017-10-19 14:46:34 +00:00
if ( Definition . Caps . Modes . ContainsKey ( "music-search" ) )
TorznabCaps . SupportedMusicSearchParamsList = Definition . Caps . Modes [ "music-search" ] ;
2017-04-15 08:45:10 +00:00
// init config Data
2016-10-27 07:30:03 +00:00
configData = new ConfigurationData ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var Setting in Definition . Settings )
{
Item item ;
2017-05-28 16:12:41 +00:00
if ( Setting . Type ! = null )
2017-04-15 08:45:10 +00:00
{
2017-05-28 16:12:41 +00:00
switch ( Setting . Type )
{
case "checkbox" :
2017-07-10 20:58:44 +00:00
item = new BoolItem { Value = false } ;
2017-05-28 16:12:41 +00:00
if ( Setting . Default ! = null & & Setting . Default = = "true" )
{
2017-07-10 20:58:44 +00:00
( ( BoolItem ) item ) . Value = true ;
2017-05-28 16:12:41 +00:00
}
break ;
case "password" :
case "text" :
item = new StringItem { Value = Setting . Default } ;
break ;
case "select" :
if ( Setting . Options = = null )
{
throw new Exception ( "Options must be given for the 'select' type." ) ;
}
item = new SelectItem ( Setting . Options ) { Value = Setting . Default } ;
break ;
2017-09-20 09:38:50 +00:00
case "info" :
item = new DisplayItem ( Setting . Default ) ;
break ;
2017-05-28 16:12:41 +00:00
default :
throw new Exception ( $"Invalid setting type '{Setting.Type}' specified." ) ;
}
2017-04-15 08:45:10 +00:00
}
else
{
2017-05-28 16:12:41 +00:00
item = new StringItem { Value = Setting . Default } ; ;
2017-04-15 08:45:10 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
item . Name = Setting . Label ;
2017-09-20 09:38:50 +00:00
if ( item . Name = = null )
item . Name = Setting . Name ;
2017-04-15 08:45:10 +00:00
configData . AddDynamic ( Setting . Name , item ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Caps . Categories ! = null )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
foreach ( var Category in Definition . Caps . Categories )
{
var cat = TorznabCatType . GetCatByName ( Category . Value ) ;
if ( cat = = null )
{
logger . Error ( string . Format ( "CardigannIndexer ({0}): invalid Torznab category for id {1}: {2}" , ID , Category . Key , Category . Value ) ) ;
continue ;
}
AddCategoryMapping ( Category . Key , cat ) ;
}
}
if ( Definition . Caps . Categorymappings ! = null )
{
foreach ( var Categorymapping in Definition . Caps . Categorymappings )
{
TorznabCategory TorznabCat = null ;
if ( Categorymapping . cat ! = null )
{
TorznabCat = TorznabCatType . GetCatByName ( Categorymapping . cat ) ;
if ( TorznabCat = = null )
{
logger . Error ( string . Format ( "CardigannIndexer ({0}): invalid Torznab category for id {1}: {2}" , ID , Categorymapping . id , Categorymapping . cat ) ) ;
continue ;
}
}
AddCategoryMapping ( Categorymapping . id , TorznabCat , Categorymapping . desc ) ;
2018-04-01 12:56:45 +00:00
if ( Categorymapping . Default )
DefaultCategories . Add ( Categorymapping . id ) ;
2017-04-15 08:45:10 +00:00
}
}
LoadValuesFromJson ( null ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
public override void LoadValuesFromJson ( JToken jsonConfig , bool useProtectionService = false )
{
base . LoadValuesFromJson ( jsonConfig , useProtectionService ) ;
// add self signed cert to trusted certs
if ( Definition . Certificates ! = null )
{
foreach ( var certificateHash in Definition . Certificates )
webclient . AddTrustedCertificate ( new Uri ( SiteLink ) . Host , certificateHash ) ;
}
}
protected Dictionary < string , object > getTemplateVariablesFromConfigData ( )
{
Dictionary < string , object > variables = new Dictionary < string , object > ( ) ;
2018-05-19 04:32:16 +00:00
variables [ ".Config.sitelink" ] = SiteLink ;
2017-04-15 08:45:10 +00:00
foreach ( settingsField Setting in Definition . Settings )
{
string value ;
var item = configData . GetDynamic ( Setting . Name ) ;
if ( item . GetType ( ) = = typeof ( BoolItem ) )
{
value = ( ( ( BoolItem ) item ) . Value = = true ? "true" : "" ) ;
}
2017-05-28 16:12:41 +00:00
else if ( item . GetType ( ) = = typeof ( SelectItem ) )
{
value = ( ( SelectItem ) item ) . Value ;
}
2017-04-15 08:45:10 +00:00
else
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
value = ( ( StringItem ) item ) . Value ;
}
2017-07-10 20:58:44 +00:00
variables [ ".Config." + Setting . Name ] = value ;
2017-04-15 08:45:10 +00:00
}
return variables ;
2017-02-28 19:05:57 +00:00
}
2017-04-15 08:45:10 +00:00
// A very bad implementation of the golang template/text templating engine.
2016-10-27 07:30:03 +00:00
// But it should work for most basic constucts used by Cardigann definitions.
2017-02-15 10:41:07 +00:00
protected delegate string TemplateTextModifier ( string str ) ;
2017-04-15 08:45:10 +00:00
protected string applyGoTemplateText ( string template , Dictionary < string , object > variables = null , TemplateTextModifier modifier = null )
{
if ( variables = = null )
{
variables = getTemplateVariablesFromConfigData ( ) ;
}
// handle re_replace expression
// Example: {{ re_replace .Query.Keywords "[^a-zA-Z0-9]+" "%" }}
2017-10-24 09:15:46 +00:00
Regex ReReplaceRegex = new Regex ( @"{{\s*re_replace\s+(\..+?)\s+""(.*?)""\s+""(.*?)""\s*}}" ) ;
2017-04-15 08:45:10 +00:00
var ReReplaceRegexMatches = ReReplaceRegex . Match ( template ) ;
while ( ReReplaceRegexMatches . Success )
{
string all = ReReplaceRegexMatches . Groups [ 0 ] . Value ;
string variable = ReReplaceRegexMatches . Groups [ 1 ] . Value ;
string regexp = ReReplaceRegexMatches . Groups [ 2 ] . Value ;
string newvalue = ReReplaceRegexMatches . Groups [ 3 ] . Value ;
Regex ReplaceRegex = new Regex ( regexp ) ;
var input = ( string ) variables [ variable ] ;
var expanded = ReplaceRegex . Replace ( input , newvalue ) ;
if ( modifier ! = null )
expanded = modifier ( expanded ) ;
template = template . Replace ( all , expanded ) ;
ReReplaceRegexMatches = ReReplaceRegexMatches . NextMatch ( ) ;
}
2018-04-02 00:22:13 +00:00
// handle join expression
// Example: {{ join .Categories "," }}
Regex JoinRegex = new Regex ( @"{{\s*join\s+(\..+?)\s+""(.*?)""\s*}}" ) ;
var JoinMatches = JoinRegex . Match ( template ) ;
while ( JoinMatches . Success )
{
string all = JoinMatches . Groups [ 0 ] . Value ;
string variable = JoinMatches . Groups [ 1 ] . Value ;
string delimiter = JoinMatches . Groups [ 2 ] . Value ;
var input = ( ICollection < string > ) variables [ variable ] ;
var expanded = string . Join ( delimiter , input ) ;
if ( modifier ! = null )
expanded = modifier ( expanded ) ;
template = template . Replace ( all , expanded ) ;
JoinMatches = JoinMatches . NextMatch ( ) ;
}
2017-04-15 08:45:10 +00:00
// handle if ... else ... expression
Regex IfElseRegex = new Regex ( @"{{\s*if\s*(.+?)\s*}}(.*?){{\s*else\s*}}(.*?){{\s*end\s*}}" ) ;
var IfElseRegexMatches = IfElseRegex . Match ( template ) ;
while ( IfElseRegexMatches . Success )
{
string conditionResult = null ;
string all = IfElseRegexMatches . Groups [ 0 ] . Value ;
string condition = IfElseRegexMatches . Groups [ 1 ] . Value ;
string onTrue = IfElseRegexMatches . Groups [ 2 ] . Value ;
string onFalse = IfElseRegexMatches . Groups [ 3 ] . Value ;
if ( condition . StartsWith ( "." ) )
{
2017-11-06 16:16:29 +00:00
var conditionResultState = false ;
var value = variables [ condition ] ;
2017-11-08 15:18:28 +00:00
if ( value = = null )
conditionResultState = false ;
else if ( value is string )
2017-11-06 16:16:29 +00:00
conditionResultState = ! string . IsNullOrWhiteSpace ( ( string ) value ) ;
2017-11-08 15:18:28 +00:00
else if ( value is ICollection )
2017-11-06 16:16:29 +00:00
conditionResultState = ( ( ICollection ) value ) . Count > 0 ;
else
throw new Exception ( string . Format ( "Unexpceted type for variable {0}: {1}" , condition , value . GetType ( ) ) ) ;
if ( conditionResultState )
2017-04-15 08:45:10 +00:00
{
conditionResult = onTrue ;
}
else
{
conditionResult = onFalse ;
}
}
else
{
throw new NotImplementedException ( "CardigannIndexer: Condition operation '" + condition + "' not implemented" ) ;
}
template = template . Replace ( all , conditionResult ) ;
IfElseRegexMatches = IfElseRegexMatches . NextMatch ( ) ;
}
// handle range expression
Regex RangeRegex = new Regex ( @"{{\s*range\s*(.+?)\s*}}(.*?){{\.}}(.*?){{end}}" ) ;
var RangeRegexMatches = RangeRegex . Match ( template ) ;
while ( RangeRegexMatches . Success )
{
string expanded = string . Empty ;
string all = RangeRegexMatches . Groups [ 0 ] . Value ;
string variable = RangeRegexMatches . Groups [ 1 ] . Value ;
string prefix = RangeRegexMatches . Groups [ 2 ] . Value ;
string postfix = RangeRegexMatches . Groups [ 3 ] . Value ;
2017-10-19 14:46:34 +00:00
foreach ( string value in ( ICollection < string > ) variables [ variable ] )
2017-04-15 08:45:10 +00:00
{
var newvalue = value ;
if ( modifier ! = null )
newvalue = modifier ( newvalue ) ;
expanded + = prefix + newvalue + postfix ;
}
template = template . Replace ( all , expanded ) ;
RangeRegexMatches = RangeRegexMatches . NextMatch ( ) ;
}
// handle simple variables
Regex VariablesRegEx = new Regex ( @"{{\s*(\..+?)\s*}}" ) ;
var VariablesRegExMatches = VariablesRegEx . Match ( template ) ;
while ( VariablesRegExMatches . Success )
{
string expanded = string . Empty ;
string all = VariablesRegExMatches . Groups [ 0 ] . Value ;
string variable = VariablesRegExMatches . Groups [ 1 ] . Value ;
string value = ( string ) variables [ variable ] ;
if ( modifier ! = null )
value = modifier ( value ) ;
template = template . Replace ( all , value ) ;
VariablesRegExMatches = VariablesRegExMatches . NextMatch ( ) ;
}
return template ;
}
protected bool checkForError ( WebClientStringResult loginResult , IList < errorBlock > errorBlocks )
{
2018-05-17 14:31:24 +00:00
if ( loginResult . Status = = HttpStatusCode . Unauthorized ) // e.g. used by YGGtorrent
throw new ExceptionWithConfigData ( "401 Unauthorized, check your credentials" , configData ) ;
2017-04-15 08:45:10 +00:00
if ( errorBlocks = = null )
return true ; // no error
var ResultParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
var ResultDocument = ResultParser . ParseDocument ( loginResult . Content ) ;
2017-04-15 08:45:10 +00:00
foreach ( errorBlock error in errorBlocks )
{
var selection = ResultDocument . QuerySelector ( error . Selector ) ;
if ( selection ! = null )
{
string errorMessage = selection . TextContent ;
if ( error . Message ! = null )
{
errorMessage = handleSelector ( error . Message , ResultDocument . FirstElementChild ) ;
}
throw new ExceptionWithConfigData ( string . Format ( "Error: {0}" , errorMessage . Trim ( ) ) , configData ) ;
}
}
return true ; // no error
2016-10-27 07:30:03 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
protected async Task < bool > DoLogin ( )
{
2016-10-27 07:30:03 +00:00
var Login = Definition . Login ;
if ( Login = = null )
2017-01-27 12:04:12 +00:00
return true ;
2016-10-27 07:30:03 +00:00
2017-04-15 08:45:10 +00:00
if ( Login . Method = = "post" )
{
2016-10-27 07:30:03 +00:00
var pairs = new Dictionary < string , string > ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
pairs . Add ( Input . Key , value ) ;
2016-10-27 07:30:03 +00:00
}
2016-10-30 14:16:28 +00:00
2017-01-06 14:05:51 +00:00
var LoginUrl = resolvePath ( Login . Path ) . ToString ( ) ;
2016-10-27 07:30:03 +00:00
configData . CookieHeader . Value = null ;
var loginResult = await RequestLoginAndFollowRedirect ( LoginUrl , pairs , null , true , null , SiteLink , true ) ;
configData . CookieHeader . Value = loginResult . Cookies ;
2017-04-15 08:45:10 +00:00
2017-03-07 11:27:34 +00:00
checkForError ( loginResult , Definition . Login . Error ) ;
2017-04-15 08:45:10 +00:00
}
else if ( Login . Method = = "form" )
{
var LoginUrl = resolvePath ( Login . Path ) . ToString ( ) ;
var queryCollection = new NameValueCollection ( ) ;
var pairs = new Dictionary < string , string > ( ) ;
var CaptchaConfigItem = ( RecaptchaItem ) configData . GetDynamic ( "Captcha" ) ;
if ( CaptchaConfigItem ! = null )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
if ( ! string . IsNullOrWhiteSpace ( CaptchaConfigItem . Cookie ) )
{
// for remote users just set the cookie and return
CookieHeader = CaptchaConfigItem . Cookie ;
return true ;
}
var CloudFlareCaptchaChallenge = landingResultDocument . QuerySelector ( "script[src=\"/cdn-cgi/scripts/cf.challenge.js\"]" ) ;
if ( CloudFlareCaptchaChallenge ! = null )
{
var CloudFlareQueryCollection = new NameValueCollection ( ) ;
CloudFlareQueryCollection [ "id" ] = CloudFlareCaptchaChallenge . GetAttribute ( "data-ray" ) ;
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
CloudFlareQueryCollection [ "g-recaptcha-response" ] = CaptchaConfigItem . Value ;
var ClearanceUrl = resolvePath ( "/cdn-cgi/l/chk_captcha?" + CloudFlareQueryCollection . GetQueryString ( ) ) ;
var ClearanceResult = await RequestStringWithCookies ( ClearanceUrl . ToString ( ) , null , SiteLink ) ;
if ( ClearanceResult . IsRedirect ) // clearance successfull
{
// request real login page again
landingResult = await RequestStringWithCookies ( LoginUrl , null , SiteLink ) ;
var htmlParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
landingResultDocument = htmlParser . ParseDocument ( landingResult . Content ) ;
2017-04-15 08:45:10 +00:00
}
else
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: Cloudflare clearance failed using cookies {0}: {1}" , CookieHeader , ClearanceResult . Content ) , configData ) ;
}
}
else
{
pairs . Add ( "g-recaptcha-response" , CaptchaConfigItem . Value ) ;
}
}
var FormSelector = Login . Form ;
if ( FormSelector = = null )
FormSelector = "form" ;
// landingResultDocument might not be initiated if the login is caused by a relogin during a query
if ( landingResultDocument = = null )
{
var ConfigurationResult = await GetConfigurationForSetup ( true ) ;
if ( ConfigurationResult = = null ) // got captcha
{
return false ;
}
}
var form = landingResultDocument . QuerySelector ( FormSelector ) ;
if ( form = = null )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No form found on {0} using form selector {1}" , LoginUrl , FormSelector ) , configData ) ;
}
var inputs = form . QuerySelectorAll ( "input" ) ;
if ( inputs = = null )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No inputs found on {0} using form selector {1}" , LoginUrl , FormSelector ) , configData ) ;
}
var submitUrlstr = form . GetAttribute ( "action" ) ;
if ( Login . Submitpath ! = null )
submitUrlstr = Login . Submitpath ;
foreach ( var input in inputs )
{
var name = input . GetAttribute ( "name" ) ;
if ( name = = null )
continue ;
var value = input . GetAttribute ( "value" ) ;
if ( value = = null )
value = "" ;
pairs [ name ] = value ;
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
var input = Input . Key ;
if ( Login . Selectors )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
var inputElement = landingResultDocument . QuerySelector ( Input . Key ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No input found using selector {0}" , Input . Key ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = value ;
}
// selector inputs
if ( Login . Selectorinputs ! = null )
{
foreach ( var Selectorinput in Login . Selectorinputs )
{
string value = null ;
try
{
value = handleSelector ( Selectorinput . Value , landingResultDocument . FirstElementChild ) ;
pairs [ Selectorinput . Key ] = value ;
}
catch ( Exception ex )
{
throw new Exception ( string . Format ( "Error while parsing selector input={0}, selector={1}, value={2}: {3}" , Selectorinput . Key , Selectorinput . Value . Selector , value , ex . Message ) ) ;
}
}
}
// getselector inputs
if ( Login . Getselectorinputs ! = null )
{
foreach ( var Selectorinput in Login . Getselectorinputs )
{
string value = null ;
try
{
value = handleSelector ( Selectorinput . Value , landingResultDocument . FirstElementChild ) ;
queryCollection [ Selectorinput . Key ] = value ;
}
catch ( Exception ex )
{
throw new Exception ( string . Format ( "Error while parsing get selector input={0}, selector={1}, value={2}: {3}" , Selectorinput . Key , Selectorinput . Value . Selector , value , ex . Message ) ) ;
}
}
}
if ( queryCollection . Count > 0 )
submitUrlstr + = "?" + queryCollection . GetQueryString ( ) ;
var submitUrl = resolvePath ( submitUrlstr , new Uri ( LoginUrl ) ) ;
// automatically solve simpleCaptchas, if used
var simpleCaptchaPresent = landingResultDocument . QuerySelector ( "script[src*=\"simpleCaptcha\"]" ) ;
2017-07-10 20:58:44 +00:00
if ( simpleCaptchaPresent ! = null )
2017-04-15 08:45:10 +00:00
{
var captchaUrl = resolvePath ( "simpleCaptcha.php?numImages=1" ) ;
var simpleCaptchaResult = await RequestStringWithCookies ( captchaUrl . ToString ( ) , null , LoginUrl ) ;
var simpleCaptchaJSON = JObject . Parse ( simpleCaptchaResult . Content ) ;
var captchaSelection = simpleCaptchaJSON [ "images" ] [ 0 ] [ "hash" ] . ToString ( ) ;
pairs [ "captchaSelection" ] = captchaSelection ;
pairs [ "submitme" ] = "X" ;
}
if ( Login . Captcha ! = null )
{
var Captcha = Login . Captcha ;
if ( Captcha . Type = = "image" )
{
var CaptchaText = ( StringItem ) configData . GetDynamic ( "CaptchaText" ) ;
if ( CaptchaText ! = null )
{
var input = Captcha . Input ;
if ( Login . Selectors )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
var inputElement = landingResultDocument . QuerySelector ( Captcha . Input ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No captcha input found using {0}" , Captcha . Input ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = CaptchaText . Value ;
}
}
2018-01-10 17:20:11 +00:00
if ( Captcha . Type = = "text" )
{
var CaptchaAnswer = ( StringItem ) configData . GetDynamic ( "CaptchaAnswer" ) ;
if ( CaptchaAnswer ! = null )
{
var input = Captcha . Input ;
if ( Login . Selectors )
{
var inputElement = landingResultDocument . QuerySelector ( Captcha . Input ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No captcha input found using {0}" , Captcha . Input ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = CaptchaAnswer . Value ;
}
}
2017-04-15 08:45:10 +00:00
}
// clear landingResults/Document, otherwise we might use an old version for a new relogin (if GetConfigurationForSetup() wasn't called before)
landingResult = null ;
landingResultDocument = null ;
WebClientStringResult loginResult = null ;
var enctype = form . GetAttribute ( "enctype" ) ;
if ( enctype = = "multipart/form-data" )
{
var headers = new Dictionary < string , string > ( ) ;
var boundary = "---------------------------" + ( DateTime . UtcNow . Subtract ( new DateTime ( 1970 , 1 , 1 ) ) ) . TotalSeconds . ToString ( ) . Replace ( "." , "" ) ;
var bodyParts = new List < string > ( ) ;
foreach ( var pair in pairs )
{
var part = "--" + boundary + "\r\n" +
"Content-Disposition: form-data; name=\"" + pair . Key + "\"\r\n" +
"\r\n" +
pair . Value ;
bodyParts . Add ( part ) ;
}
bodyParts . Add ( "--" + boundary + "--" ) ;
headers . Add ( "Content-Type" , "multipart/form-data; boundary=" + boundary ) ;
2017-07-10 20:58:44 +00:00
var body = string . Join ( "\r\n" , bodyParts ) ;
2017-04-15 08:45:10 +00:00
loginResult = await PostDataWithCookies ( submitUrl . ToString ( ) , pairs , configData . CookieHeader . Value , SiteLink , headers , body ) ;
2017-07-10 20:58:44 +00:00
}
else
{
2017-04-15 08:45:10 +00:00
loginResult = await RequestLoginAndFollowRedirect ( submitUrl . ToString ( ) , pairs , configData . CookieHeader . Value , true , null , LoginUrl , true ) ;
}
2016-11-19 11:46:31 +00:00
configData . CookieHeader . Value = loginResult . Cookies ;
2017-03-07 11:27:34 +00:00
checkForError ( loginResult , Definition . Login . Error ) ;
2017-04-15 08:45:10 +00:00
}
else if ( Login . Method = = "cookie" )
{
configData . CookieHeader . Value = ( ( StringItem ) configData . GetDynamic ( "cookie" ) ) . Value ;
}
else if ( Login . Method = = "get" )
{
var queryCollection = new NameValueCollection ( ) ;
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
queryCollection . Add ( Input . Key , value ) ;
}
var LoginUrl = resolvePath ( Login . Path + "?" + queryCollection . GetQueryString ( ) ) . ToString ( ) ;
configData . CookieHeader . Value = null ;
var loginResult = await RequestStringWithCookies ( LoginUrl , null , SiteLink ) ;
configData . CookieHeader . Value = loginResult . Cookies ;
checkForError ( loginResult , Definition . Login . Error ) ;
}
else
{
throw new NotImplementedException ( "Login method " + Definition . Login . Method + " not implemented" ) ;
}
logger . Debug ( string . Format ( "CardigannIndexer ({0}): Cookies after login: {1}" , ID , CookieHeader ) ) ;
return true ;
2016-10-27 07:30:03 +00:00
}
2018-01-22 16:35:56 +00:00
protected string getRedirectDomainHint ( string requestUrl , string RedirectUrl )
{
if ( requestUrl . StartsWith ( SiteLink ) & & ! RedirectUrl . StartsWith ( SiteLink ) )
{
var uri = new Uri ( RedirectUrl ) ;
return uri . Scheme + "://" + uri . Host + "/" ;
}
return null ;
}
protected string getRedirectDomainHint ( WebClientByteResult result )
{
return getRedirectDomainHint ( result . Request . Url , result . RedirectingTo ) ;
}
protected string getRedirectDomainHint ( WebClientStringResult result )
{
return getRedirectDomainHint ( result . Request . Url , result . RedirectingTo ) ;
}
2017-04-15 08:45:10 +00:00
protected async Task < bool > TestLogin ( )
{
var Login = Definition . Login ;
2016-10-27 07:30:03 +00:00
if ( Login = = null | | Login . Test = = null )
2017-04-15 08:45:10 +00:00
return false ;
2016-10-27 07:30:03 +00:00
// test if login was successful
2017-01-06 14:05:51 +00:00
var LoginTestUrl = resolvePath ( Login . Test . Path ) . ToString ( ) ;
2016-10-27 07:30:03 +00:00
var testResult = await RequestStringWithCookies ( LoginTestUrl ) ;
2017-04-15 08:45:10 +00:00
if ( testResult . IsRedirect )
{
2018-01-22 16:35:56 +00:00
var errormessage = "Login Failed, got redirected." ;
var DomainHint = getRedirectDomainHint ( testResult ) ;
if ( DomainHint ! = null )
2018-11-19 13:05:46 +00:00
{
2018-01-22 16:35:56 +00:00
errormessage + = " Try changing the indexer URL to " + DomainHint + "." ;
2018-11-19 13:05:46 +00:00
if ( Definition . Followredirect )
{
configData . SiteLink . Value = DomainHint ;
SiteLink = configData . SiteLink . Value ;
SaveConfig ( ) ;
errormessage + = " Updated site link, please try again." ;
}
}
2018-01-22 16:35:56 +00:00
throw new ExceptionWithConfigData ( errormessage , configData ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Login . Test . Selector ! = null )
{
var testResultParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
var testResultDocument = testResultParser . ParseDocument ( testResult . Content ) ;
2016-10-27 07:30:03 +00:00
var selection = testResultDocument . QuerySelectorAll ( Login . Test . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection . Length = = 0 )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: Selector \"{0}\" didn't match" , Login . Test . Selector ) , configData ) ;
}
}
return true ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected bool CheckIfLoginIsNeeded ( WebClientStringResult Result , IHtmlDocument document )
{
if ( Result . IsRedirect )
{
2018-01-22 16:35:56 +00:00
var DomainHint = getRedirectDomainHint ( Result ) ;
if ( DomainHint ! = null )
{
var errormessage = "Got redirected to another domain. Try changing the indexer URL to " + DomainHint + "." ;
2018-11-19 13:05:46 +00:00
if ( Definition . Followredirect )
{
configData . SiteLink . Value = DomainHint ;
SiteLink = configData . SiteLink . Value ;
SaveConfig ( ) ;
errormessage + = " Updated site link, please try again." ;
}
2018-01-22 16:35:56 +00:00
throw new ExceptionWithConfigData ( errormessage , configData ) ;
}
2017-04-15 08:45:10 +00:00
return true ;
2016-11-26 17:00:53 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Login = = null | | Definition . Login . Test = = null )
return false ;
if ( Definition . Login . Test . Selector ! = null )
{
2016-11-26 17:00:53 +00:00
var selection = document . QuerySelectorAll ( Definition . Login . Test . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection . Length = = 0 )
{
return true ;
}
}
return false ;
}
2016-11-29 18:32:50 +00:00
public override async Task < ConfigurationData > GetConfigurationForSetup ( )
2017-04-15 08:45:10 +00:00
{
return await GetConfigurationForSetup ( false ) ;
}
2017-01-27 12:04:12 +00:00
public async Task < ConfigurationData > GetConfigurationForSetup ( bool automaticlogin )
2017-04-15 08:45:10 +00:00
{
2016-11-29 18:32:50 +00:00
var Login = Definition . Login ;
if ( Login = = null | | Login . Method ! = "form" )
return configData ;
2017-04-15 08:45:10 +00:00
var LoginUrl = resolvePath ( Login . Path ) ;
configData . CookieHeader . Value = null ;
if ( Login . Cookies ! = null )
configData . CookieHeader . Value = String . Join ( "; " , Login . Cookies ) ;
landingResult = await RequestStringWithCookies ( LoginUrl . AbsoluteUri , null , SiteLink ) ;
var htmlParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
landingResultDocument = htmlParser . ParseDocument ( landingResult . Content ) ;
2017-04-15 08:45:10 +00:00
var hasCaptcha = false ;
2019-05-17 02:23:18 +00:00
var CloudFlareCaptchaChallenge = landingResultDocument . QuerySelector ( "script[src*=\"/recaptcha/api.js\"]" ) ;
2017-04-15 08:45:10 +00:00
var grecaptcha = landingResultDocument . QuerySelector ( ".g-recaptcha" ) ;
2019-05-17 02:23:18 +00:00
if ( CloudFlareCaptchaChallenge ! = null & & grecaptcha ! = null )
2017-04-15 08:45:10 +00:00
{
hasCaptcha = true ;
var CaptchaItem = new RecaptchaItem ( ) ;
CaptchaItem . Name = "Captcha" ;
CaptchaItem . Version = "2" ;
CaptchaItem . SiteKey = grecaptcha . GetAttribute ( "data-sitekey" ) ;
if ( CaptchaItem . SiteKey = = null ) // some sites don't store the sitekey in the .g-recaptcha div (e.g. cloudflare captcha challenge page)
CaptchaItem . SiteKey = landingResultDocument . QuerySelector ( "[data-sitekey]" ) . GetAttribute ( "data-sitekey" ) ;
configData . AddDynamic ( "Captcha" , CaptchaItem ) ;
2016-11-29 18:32:50 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Login . Captcha ! = null )
{
var Captcha = Login . Captcha ;
if ( Captcha . Type = = "image" )
{
2018-01-10 17:20:11 +00:00
var captchaElement = landingResultDocument . QuerySelector ( Captcha . Selector ) ;
2017-07-10 20:58:44 +00:00
if ( captchaElement ! = null )
{
2017-04-15 08:45:10 +00:00
hasCaptcha = true ;
var CaptchaUrl = resolvePath ( captchaElement . GetAttribute ( "src" ) , LoginUrl ) ;
var captchaImageData = await RequestBytesWithCookies ( CaptchaUrl . ToString ( ) , landingResult . Cookies , RequestType . GET , LoginUrl . AbsoluteUri ) ;
var CaptchaImage = new ImageItem { Name = "Captcha Image" } ;
var CaptchaText = new StringItem { Name = "Captcha Text" } ;
CaptchaImage . Value = captchaImageData . Content ;
configData . AddDynamic ( "CaptchaImage" , CaptchaImage ) ;
configData . AddDynamic ( "CaptchaText" , CaptchaText ) ;
}
else
{
logger . Debug ( string . Format ( "CardigannIndexer ({0}): No captcha image found" , ID ) ) ;
}
}
2018-01-10 17:20:11 +00:00
else if ( Captcha . Type = = "text" )
{
var captchaElement = landingResultDocument . QuerySelector ( Captcha . Selector ) ;
if ( captchaElement ! = null )
{
hasCaptcha = true ;
var CaptchaChallenge = new DisplayItem ( captchaElement . TextContent ) { Name = "Captcha Challenge" } ;
var CaptchaAnswer = new StringItem { Name = "Captcha Answer" } ;
configData . AddDynamic ( "CaptchaChallenge" , CaptchaChallenge ) ;
configData . AddDynamic ( "CaptchaAnswer" , CaptchaAnswer ) ;
}
else
{
logger . Debug ( string . Format ( "CardigannIndexer ({0}): No captcha image found" , ID ) ) ;
}
}
2017-04-15 08:45:10 +00:00
else
{
throw new NotImplementedException ( string . Format ( "Captcha type \"{0}\" is not implemented" , Captcha . Type ) ) ;
}
2016-12-23 16:18:37 +00:00
}
2017-04-15 08:45:10 +00:00
if ( hasCaptcha & & automaticlogin )
{
configData . LastError . Value = "Got captcha during automatic login, please reconfigure manually" ;
logger . Error ( string . Format ( "CardigannIndexer ({0}): Found captcha during automatic login, aborting" , ID ) ) ;
return null ;
2017-01-27 12:04:12 +00:00
}
2016-11-29 18:32:50 +00:00
return configData ;
2016-11-26 17:00:53 +00:00
}
2017-06-28 05:31:38 +00:00
public override async Task < IndexerConfigurationStatus > ApplyConfiguration ( JToken configJson )
2016-10-27 07:30:03 +00:00
{
2017-01-30 16:40:35 +00:00
LoadValuesFromJson ( configJson ) ;
2016-10-27 07:30:03 +00:00
await DoLogin ( ) ;
await TestLogin ( ) ;
2017-04-15 08:45:10 +00:00
IsConfigured = true ;
2017-05-06 17:39:02 +00:00
SaveConfig ( ) ;
2017-04-15 08:45:10 +00:00
return IndexerConfigurationStatus . Completed ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected string applyFilters ( string Data , List < filterBlock > Filters , Dictionary < string , object > variables = null )
{
if ( Filters = = null )
return Data ;
2017-07-10 20:58:44 +00:00
foreach ( filterBlock Filter in Filters )
2017-04-15 08:45:10 +00:00
{
switch ( Filter . Name )
{
case "querystring" :
var param = ( string ) Filter . Args ;
Data = ParseUtil . GetArgumentFromQueryString ( Data , param ) ;
break ;
case "timeparse" :
case "dateparse" :
var layout = ( string ) Filter . Args ;
try
{
var Date = DateTimeUtil . ParseDateTimeGoLang ( Data , layout ) ;
Data = Date . ToString ( DateTimeUtil . RFC1123ZPattern ) ;
}
catch ( FormatException ex )
{
logger . Debug ( ex . Message ) ;
}
2017-07-10 20:58:44 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "regexp" :
var pattern = ( string ) Filter . Args ;
var Regexp = new Regex ( pattern ) ;
var Match = Regexp . Match ( Data ) ;
Data = Match . Groups [ 1 ] . Value ;
break ;
case "re_replace" :
var regexpreplace_pattern = ( string ) Filter . Args [ 0 ] ;
var regexpreplace_replacement = ( string ) Filter . Args [ 1 ] ;
regexpreplace_replacement = applyGoTemplateText ( regexpreplace_replacement , variables ) ;
Regex regexpreplace_regex = new Regex ( regexpreplace_pattern ) ;
Data = regexpreplace_regex . Replace ( Data , regexpreplace_replacement ) ;
2017-07-10 20:58:44 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "split" :
var sep = ( string ) Filter . Args [ 0 ] ;
var pos = ( string ) Filter . Args [ 1 ] ;
var posInt = int . Parse ( pos ) ;
var strParts = Data . Split ( sep [ 0 ] ) ;
if ( posInt < 0 )
{
posInt + = strParts . Length ;
}
Data = strParts [ posInt ] ;
break ;
case "replace" :
var from = ( string ) Filter . Args [ 0 ] ;
var to = ( string ) Filter . Args [ 1 ] ;
to = applyGoTemplateText ( to , variables ) ;
Data = Data . Replace ( from , to ) ;
break ;
case "trim" :
var cutset = ( string ) Filter . Args ;
if ( cutset ! = null )
Data = Data . Trim ( cutset [ 0 ] ) ;
else
Data = Data . Trim ( ) ;
break ;
case "prepend" :
var prependstr = ( string ) Filter . Args ;
Data = applyGoTemplateText ( prependstr , variables ) + Data ;
break ;
case "append" :
var str = ( string ) Filter . Args ;
Data + = applyGoTemplateText ( str , variables ) ;
break ;
case "tolower" :
Data = Data . ToLower ( ) ;
break ;
case "toupper" :
Data = Data . ToUpper ( ) ;
break ;
case "urldecode" :
2017-11-05 09:42:03 +00:00
Data = WebUtilityHelpers . UrlDecode ( Data , Encoding ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-06-06 16:52:47 +00:00
case "urlencode" :
2017-11-05 09:42:03 +00:00
Data = WebUtilityHelpers . UrlEncode ( Data , Encoding ) ;
2017-06-06 16:52:47 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "timeago" :
case "reltime" :
Data = DateTimeUtil . FromTimeAgo ( Data ) . ToString ( DateTimeUtil . RFC1123ZPattern ) ;
break ;
case "fuzzytime" :
2017-05-07 12:05:39 +00:00
Data = DateTimeUtil . FromUnknown ( Data ) . ToString ( DateTimeUtil . RFC1123ZPattern ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-06-06 16:52:47 +00:00
case "validfilename" :
Data = StringUtil . MakeValidFileName ( Data , '_' , false ) ;
break ;
2017-09-08 17:00:25 +00:00
case "diacritics" :
var diacriticsOp = ( string ) Filter . Args ;
if ( diacriticsOp = = "replace" )
{
// Should replace diacritics charcaters with their base character
// It's not perfect, e.g. "ŠĐĆŽ - šđčćž" becomes "SĐCZ-sđccz"
string stFormD = Data . Normalize ( NormalizationForm . FormD ) ;
int len = stFormD . Length ;
StringBuilder sb = new StringBuilder ( ) ;
for ( int i = 0 ; i < len ; i + + )
{
System . Globalization . UnicodeCategory uc = System . Globalization . CharUnicodeInfo . GetUnicodeCategory ( stFormD [ i ] ) ;
if ( uc ! = System . Globalization . UnicodeCategory . NonSpacingMark )
{
sb . Append ( stFormD [ i ] ) ;
}
}
Data = ( sb . ToString ( ) . Normalize ( NormalizationForm . FormC ) ) ;
}
else
2017-09-08 17:05:10 +00:00
throw new Exception ( "unsupported diacritics filter argument" ) ;
2017-09-08 17:00:25 +00:00
break ;
2017-10-17 16:23:13 +00:00
case "jsonjoinarray" :
var jsonjoinarrayJSONPath = ( string ) Filter . Args [ 0 ] ;
var jsonjoinarraySeparator = ( string ) Filter . Args [ 1 ] ;
var jsonjoinarrayO = JObject . Parse ( Data ) ;
var jsonjoinarrayOResult = jsonjoinarrayO . SelectToken ( jsonjoinarrayJSONPath ) ;
var jsonjoinarrayOResultStrings = jsonjoinarrayOResult . Select ( j = > j . ToString ( ) ) ;
Data = string . Join ( jsonjoinarraySeparator , jsonjoinarrayOResultStrings ) ;
break ;
2017-04-15 08:45:10 +00:00
case "hexdump" :
// this is mainly for debugging invisible special char related issues
var HexData = string . Join ( "" , Data . Select ( c = > c + "(" + ( ( int ) c ) . ToString ( "X2" ) + ")" ) ) ;
2017-08-11 21:55:55 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): strdump: {1}" , ID , HexData ) ) ;
2017-04-15 08:45:10 +00:00
break ;
case "strdump" :
// for debugging
var DebugData = Data . Replace ( "\r" , "\\r" ) . Replace ( "\n" , "\\n" ) . Replace ( "\xA0" , "\\xA0" ) ;
2017-08-11 21:55:55 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): strdump: {1}" , ID , DebugData ) ) ;
2017-04-15 08:45:10 +00:00
break ;
default :
break ;
}
}
return Data ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected IElement QuerySelector ( IElement Element , string Selector )
{
// AngleSharp doesn't support the :root pseudo selector, so we check for it manually
if ( Selector . StartsWith ( ":root" ) )
{
Selector = Selector . Substring ( 5 ) ;
while ( Element . ParentElement ! = null )
{
Element = Element . ParentElement ;
}
}
return Element . QuerySelector ( Selector ) ;
2016-12-02 12:05:00 +00:00
}
2017-04-15 08:45:10 +00:00
protected string handleSelector ( selectorBlock Selector , IElement Dom , Dictionary < string , object > variables = null )
{
if ( Selector . Text ! = null )
{
return applyFilters ( applyGoTemplateText ( Selector . Text , variables ) , Selector . Filters , variables ) ;
}
IElement selection = Dom ;
string value = null ;
if ( Selector . Selector ! = null )
{
2017-05-07 12:05:39 +00:00
if ( Dom . Matches ( Selector . Selector ) )
selection = Dom ;
else
selection = QuerySelector ( Dom , Selector . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection = = null )
{
throw new Exception ( string . Format ( "Selector \"{0}\" didn't match {1}" , Selector . Selector , Dom . ToHtmlPretty ( ) ) ) ;
}
}
if ( Selector . Remove ! = null )
{
2017-07-10 20:58:44 +00:00
foreach ( var i in selection . QuerySelectorAll ( Selector . Remove ) )
2017-04-15 08:45:10 +00:00
{
i . Remove ( ) ;
}
}
if ( Selector . Case ! = null )
{
2017-07-10 20:58:44 +00:00
foreach ( var Case in Selector . Case )
2017-04-15 08:45:10 +00:00
{
if ( selection . Matches ( Case . Key ) | | QuerySelector ( selection , Case . Key ) ! = null )
{
value = Case . Value ;
break ;
}
}
2017-07-10 20:58:44 +00:00
if ( value = = null )
2017-04-15 08:45:10 +00:00
throw new Exception ( string . Format ( "None of the case selectors \"{0}\" matched {1}" , string . Join ( "," , Selector . Case ) , selection . ToHtmlPretty ( ) ) ) ;
}
else if ( Selector . Attribute ! = null )
{
value = selection . GetAttribute ( Selector . Attribute ) ;
if ( value = = null )
throw new Exception ( string . Format ( "Attribute \"{0}\" is not set for element {1}" , Selector . Attribute , selection . ToHtmlPretty ( ) ) ) ;
}
else
{
value = selection . TextContent ;
}
return applyFilters ( ParseUtil . NormalizeSpace ( value ) , Selector . Filters , variables ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected Uri resolvePath ( string path , Uri currentUrl = null )
{
if ( currentUrl = = null )
currentUrl = new Uri ( SiteLink ) ;
return new Uri ( currentUrl , path ) ;
2016-10-27 07:30:03 +00:00
}
2017-07-03 05:15:47 +00:00
protected override async Task < IEnumerable < ReleaseInfo > > PerformQuery ( TorznabQuery query )
2016-10-27 07:30:03 +00:00
{
var releases = new List < ReleaseInfo > ( ) ;
searchBlock Search = Definition . Search ;
// init template context
2017-04-15 08:45:10 +00:00
var variables = getTemplateVariablesFromConfigData ( ) ;
variables [ ".Query.Type" ] = query . QueryType ;
variables [ ".Query.Q" ] = query . SearchTerm ;
variables [ ".Query.Series" ] = null ;
variables [ ".Query.Ep" ] = query . Episode ;
variables [ ".Query.Season" ] = query . Season ;
variables [ ".Query.Movie" ] = null ;
2017-10-19 14:46:34 +00:00
variables [ ".Query.Year" ] = query . Year . ToString ( ) ;
2019-05-19 23:22:06 +00:00
variables [ ".Query.Limit" ] = query . Limit . ToString ( ) ;
variables [ ".Query.Offset" ] = query . Offset . ToString ( ) ;
variables [ ".Query.Extended" ] = query . Extended . ToString ( ) ;
2017-04-15 08:45:10 +00:00
variables [ ".Query.Categories" ] = query . Categories ;
variables [ ".Query.APIKey" ] = query . ApiKey ;
variables [ ".Query.TVDBID" ] = null ;
variables [ ".Query.TVRageID" ] = query . RageID ;
variables [ ".Query.IMDBID" ] = query . ImdbID ;
variables [ ".Query.IMDBIDShort" ] = query . ImdbIDShort ;
variables [ ".Query.TVMazeID" ] = null ;
variables [ ".Query.TraktID" ] = null ;
2017-10-19 14:46:34 +00:00
variables [ ".Query.Album" ] = query . Album ;
variables [ ".Query.Artist" ] = query . Artist ;
variables [ ".Query.Label" ] = query . Label ;
variables [ ".Query.Track" ] = query . Track ;
//variables[".Query.Genre"] = query.Genre ?? new List<string>();
2017-04-15 08:45:10 +00:00
variables [ ".Query.Episode" ] = query . GetEpisodeSearchString ( ) ;
var mappedCategories = MapTorznabCapsToTrackers ( query ) ;
2018-04-01 12:56:45 +00:00
if ( mappedCategories . Count = = 0 )
{
mappedCategories = this . DefaultCategories ;
}
2017-04-15 08:45:10 +00:00
variables [ ".Categories" ] = mappedCategories ;
var KeywordTokens = new List < string > ( ) ;
var KeywordTokenKeys = new List < string > { "Q" , "Series" , "Movie" , "Year" } ;
foreach ( var key in KeywordTokenKeys )
{
var Value = ( string ) variables [ ".Query." + key ] ;
if ( ! string . IsNullOrWhiteSpace ( Value ) )
KeywordTokens . Add ( Value ) ;
}
if ( ! string . IsNullOrWhiteSpace ( ( string ) variables [ ".Query.Episode" ] ) )
KeywordTokens . Add ( ( string ) variables [ ".Query.Episode" ] ) ;
variables [ ".Query.Keywords" ] = string . Join ( " " , KeywordTokens ) ;
variables [ ".Keywords" ] = applyFilters ( ( string ) variables [ ".Query.Keywords" ] , Search . Keywordsfilters ) ;
// TODO: prepare queries first and then send them parallel
var SearchPaths = Search . Paths ;
foreach ( var SearchPath in SearchPaths )
{
// skip path if categories don't match
if ( SearchPath . Categories ! = null & & mappedCategories . Count > 0 )
{
var invertMatch = ( SearchPath . Categories [ 0 ] = = "!" ) ;
var hasIntersect = mappedCategories . Intersect ( SearchPath . Categories ) . Any ( ) ;
if ( invertMatch )
hasIntersect = ! hasIntersect ;
if ( ! hasIntersect )
continue ;
}
// build search URL
// HttpUtility.UrlPathEncode seems to only encode spaces, we use UrlEncode and replace + with %20 as a workaround
2017-11-05 09:42:03 +00:00
var searchUrl = resolvePath ( applyGoTemplateText ( SearchPath . Path , variables , WebUtility . UrlEncode ) . Replace ( "+" , "%20" ) ) . AbsoluteUri ;
2017-04-15 08:45:10 +00:00
var queryCollection = new List < KeyValuePair < string , string > > ( ) ;
RequestType method = RequestType . GET ;
if ( String . Equals ( SearchPath . Method , "post" , StringComparison . OrdinalIgnoreCase ) )
{
method = RequestType . POST ;
}
var InputsList = new List < Dictionary < string , string > > ( ) ;
if ( SearchPath . Inheritinputs )
InputsList . Add ( Search . Inputs ) ;
InputsList . Add ( SearchPath . Inputs ) ;
foreach ( var Inputs in InputsList )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
if ( Inputs ! = null )
{
foreach ( var Input in Inputs )
{
if ( Input . Key = = "$raw" )
{
2017-11-05 09:42:03 +00:00
var rawStr = applyGoTemplateText ( Input . Value , variables , WebUtility . UrlEncode ) ;
2017-04-15 08:45:10 +00:00
foreach ( string part in rawStr . Split ( '&' ) )
{
var parts = part . Split ( new char [ ] { '=' } , 2 ) ;
var key = parts [ 0 ] ;
if ( key . Length = = 0 )
continue ;
var value = "" ;
if ( parts . Count ( ) = = 2 )
value = parts [ 1 ] ;
queryCollection . Add ( key , value ) ;
}
}
else
queryCollection . Add ( Input . Key , applyGoTemplateText ( Input . Value , variables ) ) ;
}
}
}
if ( method = = RequestType . GET )
{
if ( queryCollection . Count > 0 )
searchUrl + = "?" + queryCollection . GetQueryString ( Encoding ) ;
}
2017-04-21 18:22:47 +00:00
var searchUrlUri = new Uri ( searchUrl ) ;
2017-04-15 08:45:10 +00:00
// send HTTP request
WebClientStringResult response = null ;
2017-05-07 11:18:22 +00:00
Dictionary < string , string > headers = null ;
if ( Search . Headers ! = null )
{
// FIXME: fix jackett header handling (allow it to specifiy the same header multipe times)
headers = new Dictionary < string , string > ( ) ;
foreach ( var header in Search . Headers )
headers . Add ( header . Key , header . Value [ 0 ] ) ;
}
2017-04-15 08:45:10 +00:00
if ( method = = RequestType . POST )
2017-05-07 11:18:22 +00:00
response = await PostDataWithCookies ( searchUrl , queryCollection , null , null , headers ) ;
2017-03-10 10:05:08 +00:00
else
2017-05-07 11:18:22 +00:00
response = await RequestStringWithCookies ( searchUrl , null , null , headers ) ;
2018-12-01 13:19:50 +00:00
if ( response . IsRedirect & & SearchPath . Followredirect )
await FollowIfRedirect ( response ) ;
2017-02-17 18:48:13 +00:00
var results = response . Content ;
2017-10-17 16:23:13 +00:00
2018-12-01 13:19:50 +00:00
2017-02-17 18:48:13 +00:00
try
{
2017-04-15 08:45:10 +00:00
var SearchResultParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
var SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-04-15 08:45:10 +00:00
// check if we need to login again
var loginNeeded = CheckIfLoginIsNeeded ( response , SearchResultDocument ) ;
if ( loginNeeded )
{
logger . Info ( string . Format ( "CardigannIndexer ({0}): Relogin required" , ID ) ) ;
var LoginResult = await DoLogin ( ) ;
if ( ! LoginResult )
throw new Exception ( string . Format ( "Relogin failed" ) ) ;
await TestLogin ( ) ;
if ( method = = RequestType . POST )
response = await PostDataWithCookies ( searchUrl , queryCollection ) ;
else
response = await RequestStringWithCookies ( searchUrl ) ;
2018-12-01 13:19:50 +00:00
if ( response . IsRedirect & & SearchPath . Followredirect )
await FollowIfRedirect ( response ) ;
2017-04-15 08:45:10 +00:00
results = response . Content ;
2019-01-20 00:09:27 +00:00
SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-04-15 08:45:10 +00:00
}
checkForError ( response , Definition . Search . Error ) ;
2017-10-24 09:51:54 +00:00
if ( Search . Preprocessingfilters ! = null )
{
results = applyFilters ( results , Search . Preprocessingfilters , variables ) ;
2019-01-20 00:09:27 +00:00
SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-10-24 09:51:54 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): result after preprocessingfilters: {1}" , ID , results ) ) ;
}
2017-04-15 08:45:10 +00:00
2018-12-01 11:12:27 +00:00
var rowsSelector = applyGoTemplateText ( Search . Rows . Selector , variables ) ;
var RowsDom = SearchResultDocument . QuerySelectorAll ( rowsSelector ) ;
2017-04-15 08:45:10 +00:00
List < IElement > Rows = new List < IElement > ( ) ;
foreach ( var RowDom in RowsDom )
{
Rows . Add ( RowDom ) ;
}
// merge following rows for After selector
var After = Definition . Search . Rows . After ;
if ( After > 0 )
{
for ( int i = 0 ; i < Rows . Count ; i + = 1 )
{
var CurrentRow = Rows [ i ] ;
for ( int j = 0 ; j < After ; j + = 1 )
{
var MergeRowIndex = i + j + 1 ;
var MergeRow = Rows [ MergeRowIndex ] ;
List < INode > MergeNodes = new List < INode > ( ) ;
foreach ( var node in MergeRow . ChildNodes )
{
MergeNodes . Add ( node ) ;
}
CurrentRow . Append ( MergeNodes . ToArray ( ) ) ;
}
Rows . RemoveRange ( i + 1 , After ) ;
}
}
foreach ( var Row in Rows )
2017-02-17 18:48:13 +00:00
{
2017-04-15 08:45:10 +00:00
try
{
2017-02-17 18:48:13 +00:00
var release = new ReleaseInfo ( ) ;
2017-04-15 08:45:10 +00:00
release . MinimumRatio = 1 ;
2017-02-17 18:48:13 +00:00
release . MinimumSeedTime = 48 * 60 * 60 ;
// Parse fields
2017-04-15 08:45:10 +00:00
foreach ( var Field in Search . Fields )
{
var FieldParts = Field . Key . Split ( '|' ) ;
var FieldName = FieldParts [ 0 ] ;
var FieldModifiers = new List < string > ( ) ;
for ( var i = 1 ; i < FieldParts . Length ; i + + )
FieldModifiers . Add ( FieldParts [ i ] ) ;
string value = null ;
var variablesKey = ".Result." + FieldName ;
try
{
value = handleSelector ( Field . Value , Row , variables ) ;
switch ( FieldName )
{
2017-02-17 18:48:13 +00:00
case "download" :
2017-04-15 08:45:10 +00:00
if ( string . IsNullOrEmpty ( value ) )
{
value = null ;
release . Link = null ;
break ;
}
if ( value . StartsWith ( "magnet:" ) )
{
release . MagnetUri = new Uri ( value ) ;
//release.Link = release.MagnetUri;
value = release . MagnetUri . ToString ( ) ;
}
else
{
2017-04-21 18:22:47 +00:00
release . Link = resolvePath ( value , searchUrlUri ) ;
2017-04-15 08:45:10 +00:00
value = release . Link . ToString ( ) ;
2017-02-17 18:48:13 +00:00
}
2017-04-15 08:45:10 +00:00
break ;
case "magnet" :
release . MagnetUri = new Uri ( value ) ;
value = release . MagnetUri . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "details" :
2017-04-21 18:22:47 +00:00
var url = resolvePath ( value , searchUrlUri ) ;
2017-04-15 08:45:10 +00:00
release . Guid = url ;
release . Comments = url ;
if ( release . Guid = = null )
release . Guid = url ;
value = url . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "comments" :
2017-04-21 18:22:47 +00:00
var CommentsUrl = resolvePath ( value , searchUrlUri ) ;
2017-04-15 08:45:10 +00:00
if ( release . Comments = = null )
release . Comments = CommentsUrl ;
if ( release . Guid = = null )
release . Guid = CommentsUrl ;
value = CommentsUrl . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "title" :
if ( FieldModifiers . Contains ( "append" ) )
release . Title + = value ;
else
2017-04-15 08:45:10 +00:00
release . Title = value ;
value = release . Title ;
break ;
2017-02-17 18:48:13 +00:00
case "description" :
if ( FieldModifiers . Contains ( "append" ) )
release . Description + = value ;
else
release . Description = value ;
value = release . Description ;
2017-04-15 08:45:10 +00:00
break ;
2017-02-17 18:48:13 +00:00
case "category" :
2018-04-10 12:48:46 +00:00
var cats = MapTrackerCatToNewznab ( value ) ;
if ( release . Category = = null )
{
release . Category = cats ;
}
else
{
foreach ( var cat in cats )
{
if ( ! release . Category . Contains ( cat ) )
release . Category . Add ( cat ) ;
}
}
2017-04-15 08:45:10 +00:00
value = release . Category . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "size" :
2017-04-15 08:45:10 +00:00
release . Size = ReleaseInfo . GetBytes ( value ) ;
value = release . Size . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "leechers" :
var Leechers = ParseUtil . CoerceInt ( value ) ;
2017-04-15 08:45:10 +00:00
if ( release . Peers = = null )
release . Peers = Leechers ;
else
release . Peers + = Leechers ;
value = Leechers . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "seeders" :
2017-04-15 08:45:10 +00:00
release . Seeders = ParseUtil . CoerceInt ( value ) ;
if ( release . Peers = = null )
release . Peers = release . Seeders ;
else
release . Peers + = release . Seeders ;
value = release . Seeders . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "date" :
2017-04-15 08:45:10 +00:00
release . PublishDate = DateTimeUtil . FromUnknown ( value ) ;
value = release . PublishDate . ToString ( DateTimeUtil . RFC1123ZPattern ) ;
break ;
2017-02-17 18:48:13 +00:00
case "files" :
2017-04-15 08:45:10 +00:00
release . Files = ParseUtil . CoerceLong ( value ) ;
value = release . Files . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "grabs" :
2017-04-15 08:45:10 +00:00
release . Grabs = ParseUtil . CoerceLong ( value ) ;
value = release . Grabs . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "downloadvolumefactor" :
2017-04-15 08:45:10 +00:00
release . DownloadVolumeFactor = ParseUtil . CoerceDouble ( value ) ;
value = release . DownloadVolumeFactor . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "uploadvolumefactor" :
2017-04-15 08:45:10 +00:00
release . UploadVolumeFactor = ParseUtil . CoerceDouble ( value ) ;
value = release . UploadVolumeFactor . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "minimumratio" :
2017-04-15 08:45:10 +00:00
release . MinimumRatio = ParseUtil . CoerceDouble ( value ) ;
value = release . MinimumRatio . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "minimumseedtime" :
2017-04-15 08:45:10 +00:00
release . MinimumSeedTime = ParseUtil . CoerceLong ( value ) ;
value = release . MinimumSeedTime . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "imdb" :
2017-04-15 08:45:10 +00:00
release . Imdb = ParseUtil . GetLongFromString ( value ) ;
value = release . Imdb . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "rageid" :
2017-04-15 08:45:10 +00:00
Regex RageIDRegEx = new Regex ( @"(\d+)" , RegexOptions . Compiled ) ;
var RageIDMatch = RageIDRegEx . Match ( value ) ;
var RageID = RageIDMatch . Groups [ 1 ] . Value ;
release . RageID = ParseUtil . CoerceLong ( RageID ) ;
value = release . RageID . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "tvdbid" :
2017-04-15 08:45:10 +00:00
Regex TVDBIdRegEx = new Regex ( @"(\d+)" , RegexOptions . Compiled ) ;
var TVDBIdMatch = TVDBIdRegEx . Match ( value ) ;
var TVDBId = TVDBIdMatch . Groups [ 1 ] . Value ;
release . TVDBId = ParseUtil . CoerceLong ( TVDBId ) ;
value = release . TVDBId . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "banner" :
2017-07-10 20:58:44 +00:00
if ( ! string . IsNullOrWhiteSpace ( value ) )
{
2017-04-21 18:22:47 +00:00
var bannerurl = resolvePath ( value , searchUrlUri ) ;
2017-04-15 08:45:10 +00:00
release . BannerUrl = bannerurl ;
}
value = release . BannerUrl . ToString ( ) ;
break ;
default :
break ;
}
variables [ variablesKey ] = value ;
}
catch ( Exception ex )
{
if ( ! variables . ContainsKey ( variablesKey ) )
variables [ variablesKey ] = null ;
if ( OptionalFileds . Contains ( Field . Key ) | | FieldModifiers . Contains ( "optional" ) | | Field . Value . Optional )
continue ;
throw new Exception ( string . Format ( "Error while parsing field={0}, selector={1}, value={2}: {3}" , Field . Key , Field . Value . Selector , ( value = = null ? "<null>" : value ) , ex . Message ) ) ;
}
}
var Filters = Definition . Search . Rows . Filters ;
var SkipRelease = false ;
if ( Filters ! = null )
{
foreach ( filterBlock Filter in Filters )
{
switch ( Filter . Name )
{
case "andmatch" :
int CharacterLimit = - 1 ;
if ( Filter . Args ! = null )
CharacterLimit = int . Parse ( Filter . Args ) ;
2019-05-11 03:27:25 +00:00
if ( query . ImdbID ! = null & & TorznabCaps . SupportsImdbMovieSearch )
2017-04-15 08:45:10 +00:00
break ; // skip andmatch filter for imdb searches
if ( ! query . MatchQueryStringAND ( release . Title , CharacterLimit ) )
{
logger . Debug ( string . Format ( "CardigannIndexer ({0}): skipping {1} (andmatch filter)" , ID , release . Title ) ) ;
SkipRelease = true ;
}
break ;
case "strdump" :
// for debugging
2017-08-11 21:55:55 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): row strdump: {1}" , ID , Row . ToHtmlPretty ( ) ) ) ;
2017-04-15 08:45:10 +00:00
break ;
default :
logger . Error ( string . Format ( "CardigannIndexer ({0}): Unsupported rows filter: {1}" , ID , Filter . Name ) ) ;
break ;
}
}
}
if ( SkipRelease )
continue ;
// if DateHeaders is set go through the previous rows and look for the header selector
var DateHeaders = Definition . Search . Rows . Dateheaders ;
if ( release . PublishDate = = DateTime . MinValue & & DateHeaders ! = null )
{
var PrevRow = Row . PreviousElementSibling ;
string value = null ;
2017-05-07 12:05:39 +00:00
if ( PrevRow = = null ) // continue with parent
2017-07-10 20:58:44 +00:00
{
2017-05-07 12:05:39 +00:00
var Parent = Row . ParentElement ;
if ( Parent ! = null )
PrevRow = Parent . PreviousElementSibling ;
}
2017-04-15 08:45:10 +00:00
while ( PrevRow ! = null )
{
2017-05-07 12:05:39 +00:00
var CurRow = PrevRow ;
logger . Info ( PrevRow . OuterHtml ) ;
2017-04-15 08:45:10 +00:00
try
{
2017-05-07 12:05:39 +00:00
value = handleSelector ( DateHeaders , CurRow ) ;
2017-04-15 08:45:10 +00:00
break ;
}
catch ( Exception )
{
// do nothing
}
2017-05-07 12:05:39 +00:00
PrevRow = CurRow . PreviousElementSibling ;
if ( PrevRow = = null ) // continue with parent
{
var Parent = CurRow . ParentElement ;
if ( Parent ! = null )
PrevRow = Parent . PreviousElementSibling ;
}
2017-04-15 08:45:10 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
if ( value = = null & & DateHeaders . Optional = = false )
throw new Exception ( string . Format ( "No date header row found for {0}" , release . ToString ( ) ) ) ;
if ( value ! = null )
release . PublishDate = DateTimeUtil . FromUnknown ( value ) ;
}
releases . Add ( release ) ;
}
catch ( Exception ex )
{
logger . Error ( string . Format ( "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}" , ID , Row . ToHtmlPretty ( ) , ex ) ) ;
}
2017-02-17 18:48:13 +00:00
}
2016-10-27 07:30:03 +00:00
}
2017-02-17 18:48:13 +00:00
catch ( Exception ex )
{
OnParseError ( results , ex ) ;
2017-04-15 08:45:10 +00:00
}
2016-10-27 07:30:03 +00:00
}
2019-03-18 04:41:23 +00:00
if ( query . Limit > 0 )
releases = releases . Take ( query . Limit ) . ToList ( ) ;
2016-10-27 07:30:03 +00:00
return releases ;
}
2016-12-15 08:12:28 +00:00
2017-04-15 08:45:10 +00:00
protected async Task < WebClientByteResult > handleRequest ( requestBlock request , Dictionary < string , object > variables = null , string referer = null )
{
var requestLinkStr = resolvePath ( applyGoTemplateText ( request . Path , variables ) ) . ToString ( ) ;
Dictionary < string , string > pairs = null ;
var queryCollection = new NameValueCollection ( ) ;
RequestType method = RequestType . GET ;
if ( String . Equals ( request . Method , "post" , StringComparison . OrdinalIgnoreCase ) )
{
method = RequestType . POST ;
pairs = new Dictionary < string , string > ( ) ;
}
foreach ( var Input in request . Inputs )
{
var value = applyGoTemplateText ( Input . Value , variables ) ;
if ( method = = RequestType . GET )
queryCollection . Add ( Input . Key , value ) ;
else if ( method = = RequestType . POST )
pairs . Add ( Input . Key , value ) ;
}
if ( queryCollection . Count > 0 )
{
if ( ! requestLinkStr . Contains ( "?" ) )
requestLinkStr + = "?" + queryCollection . GetQueryString ( Encoding ) . Substring ( 1 ) ;
else
requestLinkStr + = queryCollection . GetQueryString ( Encoding ) ;
}
var response = await RequestBytesWithCookiesAndRetry ( requestLinkStr , null , method , referer , pairs ) ;
logger . Debug ( $"CardigannIndexer ({ID}): handleRequest() remote server returned {response.Status.ToString()}" + ( response . IsRedirect ? " => " + response . RedirectingTo : "" ) ) ;
return response ;
2017-01-27 11:14:49 +00:00
}
2017-04-15 08:45:10 +00:00
protected IDictionary < string , object > AddTemplateVariablesFromUri ( IDictionary < string , object > variables , Uri uri , string prefix = "" )
{
variables [ prefix + ".AbsoluteUri" ] = uri . AbsoluteUri ;
variables [ prefix + ".AbsolutePath" ] = uri . AbsolutePath ;
variables [ prefix + ".Scheme" ] = uri . Scheme ;
variables [ prefix + ".Host" ] = uri . Host ;
variables [ prefix + ".Port" ] = uri . Port . ToString ( ) ;
variables [ prefix + ".PathAndQuery" ] = uri . PathAndQuery ;
variables [ prefix + ".Query" ] = uri . Query ;
2017-11-05 09:42:03 +00:00
var queryString = QueryHelpers . ParseQuery ( uri . Query ) ;
2017-04-15 08:45:10 +00:00
foreach ( string key in queryString . Keys )
{
2017-11-05 09:42:03 +00:00
//If we have supplied the same query string multiple time, just take the first.
variables [ prefix + ".Query." + key ] = queryString [ key ] . First ( ) ;
2017-04-15 08:45:10 +00:00
}
return variables ;
2017-01-27 11:14:49 +00:00
}
2017-04-15 08:45:10 +00:00
public override async Task < byte [ ] > Download ( Uri link )
{
var method = RequestType . GET ;
if ( Definition . Download ! = null )
{
var Download = Definition . Download ;
2017-12-12 15:01:26 +00:00
var variables = getTemplateVariablesFromConfigData ( ) ;
AddTemplateVariablesFromUri ( variables , link , ".DownloadUri" ) ;
2017-04-15 08:45:10 +00:00
if ( Download . Before ! = null )
{
2017-12-12 15:01:26 +00:00
var beforeresult = await handleRequest ( Download . Before , variables , link . ToString ( ) ) ;
2017-04-15 08:45:10 +00:00
}
if ( Download . Method ! = null )
{
if ( Download . Method = = "post" )
method = RequestType . POST ;
}
if ( Download . Selector ! = null )
{
2017-12-18 17:11:11 +00:00
var selector = applyGoTemplateText ( Download . Selector , variables ) ;
2017-04-15 08:45:10 +00:00
var response = await RequestStringWithCookies ( link . ToString ( ) ) ;
if ( response . IsRedirect )
response = await RequestStringWithCookies ( response . RedirectingTo ) ;
var results = response . Content ;
var SearchResultParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
var SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-12-18 17:11:11 +00:00
var DlUri = SearchResultDocument . QuerySelector ( selector ) ;
2017-04-15 08:45:10 +00:00
if ( DlUri ! = null )
{
2017-12-18 17:11:11 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): Download selector {1} matched:{2}" , ID , selector , DlUri . ToHtmlPretty ( ) ) ) ;
2017-04-15 08:45:10 +00:00
var href = DlUri . GetAttribute ( "href" ) ;
2017-12-12 15:01:26 +00:00
href = applyFilters ( href , Download . Filters , variables ) ;
2018-02-06 13:02:11 +00:00
link = resolvePath ( href , link ) ;
2017-04-15 08:45:10 +00:00
}
else
{
logger . Error ( string . Format ( "CardigannIndexer ({0}): Download selector {1} didn't match:\n{2}" , ID , Download . Selector , results ) ) ;
throw new Exception ( string . Format ( "Download selector {0} didn't match" , Download . Selector ) ) ;
}
}
}
return await base . Download ( link , method ) ;
2016-12-15 08:12:28 +00:00
}
2016-10-27 07:30:03 +00:00
}
}