2020-02-09 02:35:16 +00:00
using System ;
2018-03-10 08:05:56 +00:00
using System.Collections ;
2016-10-27 07:30:03 +00:00
using System.Collections.Generic ;
2017-04-15 08:45:10 +00:00
using System.Collections.Specialized ;
2018-03-10 08:05:56 +00:00
using System.Linq ;
using System.Net ;
2017-04-15 08:45:10 +00:00
using System.Text ;
using System.Text.RegularExpressions ;
2018-03-10 08:05:56 +00:00
using System.Threading.Tasks ;
2017-04-15 08:45:10 +00:00
using AngleSharp.Dom ;
2019-01-20 00:09:27 +00:00
using AngleSharp.Html.Dom ;
using AngleSharp.Html.Parser ;
2018-03-10 08:05:56 +00:00
using Jackett.Common.Helpers ;
using Jackett.Common.Models ;
using Jackett.Common.Models.IndexerConfig ;
using Jackett.Common.Services.Interfaces ;
using Jackett.Common.Utils ;
using Jackett.Common.Utils.Clients ;
2017-11-05 09:42:03 +00:00
using Microsoft.AspNetCore.WebUtilities ;
2018-03-10 08:05:56 +00:00
using Newtonsoft.Json.Linq ;
using NLog ;
2020-02-09 18:08:34 +00:00
using static Jackett . Common . Models . IndexerConfig . ConfigurationData ;
2017-04-15 08:45:10 +00:00
2018-03-10 08:05:56 +00:00
namespace Jackett.Common.Indexers
2016-10-27 07:30:03 +00:00
{
2017-07-10 20:58:44 +00:00
public class CardigannIndexer : BaseWebIndexer
2016-10-27 07:30:03 +00:00
{
2017-04-15 08:45:10 +00:00
protected IndexerDefinition Definition ;
2020-06-10 21:22:29 +00:00
protected WebResult landingResult ;
2017-04-15 08:45:10 +00:00
protected IHtmlDocument landingResultDocument ;
2018-04-01 12:56:45 +00:00
protected List < string > DefaultCategories = new List < string > ( ) ;
2020-02-10 22:16:19 +00:00
private new ConfigurationData configData
2016-10-27 07:30:03 +00:00
{
2020-02-25 16:08:03 +00:00
get = > base . configData ;
set = > base . configData = value ;
2017-04-15 08:45:10 +00:00
}
2020-11-12 19:38:25 +00:00
protected readonly string [ ] OptionalFields = { "imdb" , "rageid" , "tmdbid" , "tvdbid" , "poster" } ;
2017-04-15 08:45:10 +00:00
2020-04-16 20:52:54 +00:00
private static readonly string [ ] _SupportedLogicFunctions =
{
"and" ,
"or" ,
"eq" ,
"ne"
} ;
private static readonly string [ ] _LogicFunctionsUsingStringLiterals =
{
"eq" ,
"ne"
} ;
// Matches a logic function above and 2 or more of (.varname) or .varname or "string literal" in any combination
private static readonly Regex _LogicFunctionRegex = new Regex (
@ $"\b({string.Join(" | ", _SupportedLogicFunctions.Select(Regex.Escape))})(?:\s+(\(?\.[^\)\s]+\)?|" "[^" "]+" ")){{2,}}" ) ;
2020-05-11 19:59:28 +00:00
public CardigannIndexer ( IIndexerConfigurationService configService , Utils . Clients . WebClient wc , Logger l ,
2020-12-11 22:14:21 +00:00
IProtectionService ps , ICacheService cs , IndexerDefinition Definition )
2017-07-10 20:58:44 +00:00
: base ( configService : configService ,
2016-10-27 07:30:03 +00:00
client : wc ,
logger : l ,
2020-12-11 22:14:21 +00:00
p : ps ,
cacheService : cs
)
2016-10-27 07:30:03 +00:00
{
2017-07-10 20:58:44 +00:00
this . Definition = Definition ;
2020-05-11 19:59:28 +00:00
Id = Definition . Id ;
2016-10-27 07:30:03 +00:00
// Add default data if necessary
2017-04-15 08:45:10 +00:00
if ( Definition . Settings = = null )
2017-07-10 20:58:44 +00:00
{
2020-03-26 22:15:28 +00:00
Definition . Settings = new List < settingsField >
{
new settingsField { Name = "username" , Label = "Username" , Type = "text" } ,
new settingsField { Name = "password" , Label = "Password" , Type = "password" }
} ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Encoding = = null )
2017-02-06 15:33:59 +00:00
Definition . Encoding = "UTF-8" ;
2016-11-28 18:31:12 +00:00
2016-11-29 18:32:50 +00:00
if ( Definition . Login ! = null & & Definition . Login . Method = = null )
2017-04-15 08:45:10 +00:00
Definition . Login . Method = "form" ;
if ( Definition . Search . Paths = = null )
{
Definition . Search . Paths = new List < searchPathBlock > ( ) ;
}
// convert definitions with a single search Path to a Paths entry
if ( Definition . Search . Path ! = null )
{
2020-03-26 22:15:28 +00:00
Definition . Search . Paths . Add ( new searchPathBlock
{
Path = Definition . Search . Path ,
Inheritinputs = true
} ) ;
2017-04-15 08:45:10 +00:00
}
// init missing mandatory attributes
DisplayName = Definition . Name ;
DisplayDescription = Definition . Description ;
if ( Definition . Links . Count > 1 )
AlternativeSiteLinks = Definition . Links . ToArray ( ) ;
DefaultSiteLink = Definition . Links [ 0 ] ;
2017-08-30 16:46:36 +00:00
if ( Definition . Legacylinks ! = null )
LegacySiteLinks = Definition . Legacylinks . ToArray ( ) ;
2017-04-15 08:45:10 +00:00
Encoding = Encoding . GetEncoding ( Definition . Encoding ) ;
if ( ! DefaultSiteLink . EndsWith ( "/" ) )
DefaultSiteLink + = "/" ;
Language = Definition . Language ;
Type = Definition . Type ;
2020-10-19 19:26:43 +00:00
TorznabCaps = new TorznabCapabilities ( ) ;
TorznabCaps . ParseCardigannSearchModes ( Definition . Caps . Modes ) ;
2017-04-15 08:45:10 +00:00
// init config Data
2016-10-27 07:30:03 +00:00
configData = new ConfigurationData ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var Setting in Definition . Settings )
{
2021-03-16 23:29:26 +00:00
ConfigurationItem item ;
var itemName = Setting . Label ? ? Setting . Name ;
2017-05-28 16:12:41 +00:00
if ( Setting . Type ! = null )
2017-04-15 08:45:10 +00:00
{
2017-05-28 16:12:41 +00:00
switch ( Setting . Type )
{
case "checkbox" :
2021-03-16 23:29:26 +00:00
item = new BoolConfigurationItem ( itemName ) { Value = false } ;
2017-05-28 16:12:41 +00:00
if ( Setting . Default ! = null & & Setting . Default = = "true" )
{
2021-03-16 23:29:26 +00:00
( ( BoolConfigurationItem ) item ) . Value = true ;
2017-05-28 16:12:41 +00:00
}
break ;
case "password" :
case "text" :
2021-03-16 23:29:26 +00:00
item = new StringConfigurationItem ( itemName ) { Value = Setting . Default } ;
2017-05-28 16:12:41 +00:00
break ;
2020-02-19 20:23:55 +00:00
case "multi-select" :
if ( Setting . Options = = null )
{
throw new Exception ( "Options must be given for the 'multi-select' type." ) ;
}
2021-03-16 23:29:26 +00:00
item = new MultiSelectConfigurationItem ( itemName , Setting . Options ) { Values = Setting . Defaults } ;
2020-02-19 20:23:55 +00:00
break ;
2017-05-28 16:12:41 +00:00
case "select" :
if ( Setting . Options = = null )
{
throw new Exception ( "Options must be given for the 'select' type." ) ;
}
2021-03-16 23:29:26 +00:00
item = new SingleSelectConfigurationItem ( itemName , Setting . Options ) { Value = Setting . Default } ;
2017-05-28 16:12:41 +00:00
break ;
2017-09-20 09:38:50 +00:00
case "info" :
2021-03-16 23:29:26 +00:00
item = new DisplayInfoConfigurationItem ( itemName , Setting . Default ) ;
2017-09-20 09:38:50 +00:00
break ;
2017-05-28 16:12:41 +00:00
default :
throw new Exception ( $"Invalid setting type '{Setting.Type}' specified." ) ;
}
2017-04-15 08:45:10 +00:00
}
else
{
2021-03-16 23:29:26 +00:00
item = new StringConfigurationItem ( itemName ) { Value = Setting . Default } ;
2017-04-15 08:45:10 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
configData . AddDynamic ( Setting . Name , item ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Caps . Categories ! = null )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
foreach ( var Category in Definition . Caps . Categories )
{
var cat = TorznabCatType . GetCatByName ( Category . Value ) ;
if ( cat = = null )
{
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "CardigannIndexer ({0}): invalid Torznab category for id {1}: {2}" , Id , Category . Key , Category . Value ) ) ;
2017-04-15 08:45:10 +00:00
continue ;
}
AddCategoryMapping ( Category . Key , cat ) ;
}
}
if ( Definition . Caps . Categorymappings ! = null )
{
foreach ( var Categorymapping in Definition . Caps . Categorymappings )
{
TorznabCategory TorznabCat = null ;
if ( Categorymapping . cat ! = null )
{
TorznabCat = TorznabCatType . GetCatByName ( Categorymapping . cat ) ;
if ( TorznabCat = = null )
{
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "CardigannIndexer ({0}): invalid Torznab category for id {1}: {2}" , Id , Categorymapping . id , Categorymapping . cat ) ) ;
2017-04-15 08:45:10 +00:00
continue ;
}
}
AddCategoryMapping ( Categorymapping . id , TorznabCat , Categorymapping . desc ) ;
2018-04-01 12:56:45 +00:00
if ( Categorymapping . Default )
DefaultCategories . Add ( Categorymapping . id ) ;
2017-04-15 08:45:10 +00:00
}
}
LoadValuesFromJson ( null ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
public override void LoadValuesFromJson ( JToken jsonConfig , bool useProtectionService = false )
{
base . LoadValuesFromJson ( jsonConfig , useProtectionService ) ;
// add self signed cert to trusted certs
if ( Definition . Certificates ! = null )
{
foreach ( var certificateHash in Definition . Certificates )
webclient . AddTrustedCertificate ( new Uri ( SiteLink ) . Host , certificateHash ) ;
}
}
2020-04-20 00:16:19 +00:00
protected Dictionary < string , object > GetBaseTemplateVariables ( )
2017-04-15 08:45:10 +00:00
{
2020-03-26 22:15:28 +00:00
var variables = new Dictionary < string , object >
{
2020-04-16 20:52:54 +00:00
[".Config.sitelink"] = SiteLink ,
[".True"] = "True" ,
2020-04-20 00:16:19 +00:00
[".False"] = null ,
[".Today.Year"] = DateTime . Today . Year . ToString ( )
2020-03-26 22:15:28 +00:00
} ;
2021-03-17 13:01:58 +00:00
2020-04-20 00:16:19 +00:00
foreach ( var setting in Definition . Settings )
2021-03-17 13:01:58 +00:00
{
var configurationItem = configData . GetDynamic ( setting . Name ) ;
if ( configurationItem = = null )
continue ;
var variableKey = ".Config." + setting . Name ;
switch ( configurationItem )
2017-07-10 20:58:44 +00:00
{
2021-03-17 13:01:58 +00:00
case BoolConfigurationItem boolItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = variables [ boolItem . Value ? ".True" : ".False" ] ;
break ;
}
2021-03-17 13:01:58 +00:00
case StringConfigurationItem stringItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = stringItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
case PasswordConfigurationItem passwordItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = passwordItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
case SingleSelectConfigurationItem selectItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = selectItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
case MultiSelectConfigurationItem multiSelectItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = multiSelectItem . Values ;
break ;
}
2021-03-17 13:01:58 +00:00
case DisplayImageConfigurationItem displayImageItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = displayImageItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
case DisplayInfoConfigurationItem displayInfoItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = displayInfoItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
case HiddenStringConfigurationItem hiddenStringItem :
2021-05-16 18:13:54 +00:00
{
variables [ variableKey ] = hiddenStringItem . Value ;
break ;
}
2021-03-17 13:01:58 +00:00
default :
2021-05-16 18:13:54 +00:00
{
//TODO Should this throw a NotSupportedException, as it used to?
break ;
}
2021-03-17 13:01:58 +00:00
}
}
2017-04-15 08:45:10 +00:00
return variables ;
2017-02-28 19:05:57 +00:00
}
2017-04-15 08:45:10 +00:00
// A very bad implementation of the golang template/text templating engine.
2016-10-27 07:30:03 +00:00
// But it should work for most basic constucts used by Cardigann definitions.
2017-02-15 10:41:07 +00:00
protected delegate string TemplateTextModifier ( string str ) ;
2017-04-15 08:45:10 +00:00
protected string applyGoTemplateText ( string template , Dictionary < string , object > variables = null , TemplateTextModifier modifier = null )
{
if ( variables = = null )
{
2020-04-20 00:16:19 +00:00
variables = GetBaseTemplateVariables ( ) ;
2017-04-15 08:45:10 +00:00
}
// handle re_replace expression
// Example: {{ re_replace .Query.Keywords "[^a-zA-Z0-9]+" "%" }}
2020-02-10 22:16:19 +00:00
var ReReplaceRegex = new Regex ( @"{{\s*re_replace\s+(\..+?)\s+""(.*?)""\s+""(.*?)""\s*}}" ) ;
2017-04-15 08:45:10 +00:00
var ReReplaceRegexMatches = ReReplaceRegex . Match ( template ) ;
while ( ReReplaceRegexMatches . Success )
{
2020-02-10 22:16:19 +00:00
var all = ReReplaceRegexMatches . Groups [ 0 ] . Value ;
var variable = ReReplaceRegexMatches . Groups [ 1 ] . Value ;
var regexp = ReReplaceRegexMatches . Groups [ 2 ] . Value ;
var newvalue = ReReplaceRegexMatches . Groups [ 3 ] . Value ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
var ReplaceRegex = new Regex ( regexp ) ;
2017-04-15 08:45:10 +00:00
var input = ( string ) variables [ variable ] ;
var expanded = ReplaceRegex . Replace ( input , newvalue ) ;
if ( modifier ! = null )
expanded = modifier ( expanded ) ;
template = template . Replace ( all , expanded ) ;
ReReplaceRegexMatches = ReReplaceRegexMatches . NextMatch ( ) ;
}
2018-04-02 00:22:13 +00:00
// handle join expression
// Example: {{ join .Categories "," }}
2020-02-10 22:16:19 +00:00
var JoinRegex = new Regex ( @"{{\s*join\s+(\..+?)\s+""(.*?)""\s*}}" ) ;
2018-04-02 00:22:13 +00:00
var JoinMatches = JoinRegex . Match ( template ) ;
while ( JoinMatches . Success )
{
2020-02-10 22:16:19 +00:00
var all = JoinMatches . Groups [ 0 ] . Value ;
var variable = JoinMatches . Groups [ 1 ] . Value ;
var delimiter = JoinMatches . Groups [ 2 ] . Value ;
2018-04-02 00:22:13 +00:00
var input = ( ICollection < string > ) variables [ variable ] ;
var expanded = string . Join ( delimiter , input ) ;
if ( modifier ! = null )
expanded = modifier ( expanded ) ;
template = template . Replace ( all , expanded ) ;
JoinMatches = JoinMatches . NextMatch ( ) ;
}
2020-04-16 20:52:54 +00:00
var logicMatch = _LogicFunctionRegex . Match ( template ) ;
2020-01-06 02:13:21 +00:00
2020-04-16 20:52:54 +00:00
while ( logicMatch . Success )
2020-01-06 02:13:21 +00:00
{
2020-04-16 20:52:54 +00:00
var functionStartIndex = logicMatch . Groups [ 0 ] . Index ;
var functionLength = logicMatch . Groups [ 0 ] . Length ;
var functionName = logicMatch . Groups [ 1 ] . Value ;
// Use Group.Captures to get each matching string in a repeating Match.Group
// Strip () around variable names here, as they are optional. Use quotes to differentiate variables and literals
var parameters = logicMatch . Groups [ 2 ] . Captures . Cast < Capture > ( ) . Select ( c = > c . Value . Trim ( '(' , ')' ) ) . ToList ( ) ;
2020-02-10 22:16:19 +00:00
var functionResult = "" ;
2020-01-06 02:13:21 +00:00
2020-04-16 20:52:54 +00:00
// If the function can't use string literals, fail silently by removing the literals.
if ( ! _LogicFunctionsUsingStringLiterals . Contains ( functionName ) )
parameters . RemoveAll ( param = > param . StartsWith ( "\"" ) ) ;
switch ( functionName )
2020-01-06 02:13:21 +00:00
{
2020-04-16 20:52:54 +00:00
case "and" : // returns first null or empty, else last variable
case "or" : // returns first not null or empty, else last variable
var isAnd = functionName = = "and" ;
foreach ( var parameter in parameters )
2020-01-06 02:13:21 +00:00
{
2020-04-16 20:52:54 +00:00
functionResult = parameter ;
// (null as string) == null
// (if null or empty) break if and, continue if or
// (if neither null nor empty) continue if and, break if or
if ( string . IsNullOrWhiteSpace ( variables [ parameter ] as string ) = = isAnd )
break ;
2020-01-06 02:13:21 +00:00
}
2020-04-16 20:52:54 +00:00
break ;
case "eq" : // Returns .True if equal
case "ne" : // Returns .False if equal
2021-05-16 18:13:54 +00:00
{
var wantEqual = functionName = = "eq" ;
// eq/ne take exactly 2 params. Update the length to match
// This removes the whitespace between params 2 and 3.
// It shouldn't matter because the match starts at a word boundary
if ( parameters . Count > 2 )
functionLength = logicMatch . Groups [ 2 ] . Captures [ 2 ] . Index - functionStartIndex ;
// Take first two parameters, convert vars to values and strip quotes on string literals
// Counting distinct gives us 1 if equal and 2 if not.
var isEqual =
parameters . Take ( 2 ) . Select ( param = > param . StartsWith ( "\"" ) ? param . Trim ( '"' ) : variables [ param ] as string )
. Distinct ( ) . Count ( ) = = 1 ;
functionResult = isEqual = = wantEqual ? ".True" : ".False" ;
break ;
}
2020-01-06 02:13:21 +00:00
}
2020-04-16 20:52:54 +00:00
template = template . Remove ( functionStartIndex , functionLength )
. Insert ( functionStartIndex , functionResult ) ;
// Rerunning match instead of using nextMatch allows us to support nested functions
// like {{if and eq (.Var1) "string1" eq (.Var2) "string2"}}
// No performance is lost because Match/NextMatch are lazy evaluated and pause execution after first match
logicMatch = _LogicFunctionRegex . Match ( template ) ;
2020-01-06 02:13:21 +00:00
}
2017-04-15 08:45:10 +00:00
// handle if ... else ... expression
2020-02-10 22:16:19 +00:00
var IfElseRegex = new Regex ( @"{{\s*if\s*(.+?)\s*}}(.*?){{\s*else\s*}}(.*?){{\s*end\s*}}" ) ;
2017-04-15 08:45:10 +00:00
var IfElseRegexMatches = IfElseRegex . Match ( template ) ;
while ( IfElseRegexMatches . Success )
{
string conditionResult = null ;
2020-02-10 22:16:19 +00:00
var all = IfElseRegexMatches . Groups [ 0 ] . Value ;
var condition = IfElseRegexMatches . Groups [ 1 ] . Value ;
var onTrue = IfElseRegexMatches . Groups [ 2 ] . Value ;
var onFalse = IfElseRegexMatches . Groups [ 3 ] . Value ;
2017-04-15 08:45:10 +00:00
if ( condition . StartsWith ( "." ) )
{
2017-11-06 16:16:29 +00:00
var conditionResultState = false ;
var value = variables [ condition ] ;
2017-11-08 15:18:28 +00:00
if ( value = = null )
conditionResultState = false ;
else if ( value is string )
2017-11-06 16:16:29 +00:00
conditionResultState = ! string . IsNullOrWhiteSpace ( ( string ) value ) ;
2017-11-08 15:18:28 +00:00
else if ( value is ICollection )
2017-11-06 16:16:29 +00:00
conditionResultState = ( ( ICollection ) value ) . Count > 0 ;
else
throw new Exception ( string . Format ( "Unexpceted type for variable {0}: {1}" , condition , value . GetType ( ) ) ) ;
if ( conditionResultState )
2017-04-15 08:45:10 +00:00
{
conditionResult = onTrue ;
}
else
{
conditionResult = onFalse ;
}
}
else
{
throw new NotImplementedException ( "CardigannIndexer: Condition operation '" + condition + "' not implemented" ) ;
}
template = template . Replace ( all , conditionResult ) ;
IfElseRegexMatches = IfElseRegexMatches . NextMatch ( ) ;
}
// handle range expression
2020-02-10 22:16:19 +00:00
var RangeRegex = new Regex ( @"{{\s*range\s*(.+?)\s*}}(.*?){{\.}}(.*?){{end}}" ) ;
2017-04-15 08:45:10 +00:00
var RangeRegexMatches = RangeRegex . Match ( template ) ;
while ( RangeRegexMatches . Success )
{
2020-02-10 22:16:19 +00:00
var expanded = string . Empty ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
var all = RangeRegexMatches . Groups [ 0 ] . Value ;
var variable = RangeRegexMatches . Groups [ 1 ] . Value ;
var prefix = RangeRegexMatches . Groups [ 2 ] . Value ;
var postfix = RangeRegexMatches . Groups [ 3 ] . Value ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
foreach ( var value in ( ICollection < string > ) variables [ variable ] )
2017-04-15 08:45:10 +00:00
{
var newvalue = value ;
if ( modifier ! = null )
newvalue = modifier ( newvalue ) ;
expanded + = prefix + newvalue + postfix ;
}
template = template . Replace ( all , expanded ) ;
RangeRegexMatches = RangeRegexMatches . NextMatch ( ) ;
}
// handle simple variables
2020-02-10 22:16:19 +00:00
var VariablesRegEx = new Regex ( @"{{\s*(\..+?)\s*}}" ) ;
2017-04-15 08:45:10 +00:00
var VariablesRegExMatches = VariablesRegEx . Match ( template ) ;
while ( VariablesRegExMatches . Success )
{
2020-02-10 22:16:19 +00:00
var expanded = string . Empty ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
var all = VariablesRegExMatches . Groups [ 0 ] . Value ;
var variable = VariablesRegExMatches . Groups [ 1 ] . Value ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
var value = ( string ) variables [ variable ] ;
2017-04-15 08:45:10 +00:00
if ( modifier ! = null )
value = modifier ( value ) ;
template = template . Replace ( all , value ) ;
VariablesRegExMatches = VariablesRegExMatches . NextMatch ( ) ;
}
return template ;
}
2020-06-10 21:22:29 +00:00
protected bool checkForError ( WebResult loginResult , IList < errorBlock > errorBlocks )
2017-04-15 08:45:10 +00:00
{
2020-02-09 02:35:16 +00:00
if ( loginResult . Status = = HttpStatusCode . Unauthorized ) // e.g. used by YGGtorrent
2018-05-17 14:31:24 +00:00
throw new ExceptionWithConfigData ( "401 Unauthorized, check your credentials" , configData ) ;
2017-04-15 08:45:10 +00:00
if ( errorBlocks = = null )
return true ; // no error
var ResultParser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var ResultDocument = ResultParser . ParseDocument ( loginResult . ContentString ) ;
2020-02-10 22:16:19 +00:00
foreach ( var error in errorBlocks )
2017-04-15 08:45:10 +00:00
{
var selection = ResultDocument . QuerySelector ( error . Selector ) ;
if ( selection ! = null )
{
2020-02-10 22:16:19 +00:00
var errorMessage = selection . TextContent ;
2017-04-15 08:45:10 +00:00
if ( error . Message ! = null )
{
errorMessage = handleSelector ( error . Message , ResultDocument . FirstElementChild ) ;
}
throw new ExceptionWithConfigData ( string . Format ( "Error: {0}" , errorMessage . Trim ( ) ) , configData ) ;
}
}
return true ; // no error
2016-10-27 07:30:03 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
protected async Task < bool > DoLogin ( )
{
2016-10-27 07:30:03 +00:00
var Login = Definition . Login ;
if ( Login = = null )
2017-01-27 12:04:12 +00:00
return true ;
2016-10-27 07:30:03 +00:00
2017-04-15 08:45:10 +00:00
if ( Login . Method = = "post" )
{
2016-10-27 07:30:03 +00:00
var pairs = new Dictionary < string , string > ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
pairs . Add ( Input . Key , value ) ;
2016-10-27 07:30:03 +00:00
}
2016-10-30 14:16:28 +00:00
2017-01-06 14:05:51 +00:00
var LoginUrl = resolvePath ( Login . Path ) . ToString ( ) ;
2016-10-27 07:30:03 +00:00
configData . CookieHeader . Value = null ;
var loginResult = await RequestLoginAndFollowRedirect ( LoginUrl , pairs , null , true , null , SiteLink , true ) ;
configData . CookieHeader . Value = loginResult . Cookies ;
2017-04-15 08:45:10 +00:00
2017-03-07 11:27:34 +00:00
checkForError ( loginResult , Definition . Login . Error ) ;
2017-04-15 08:45:10 +00:00
}
else if ( Login . Method = = "form" )
{
var LoginUrl = resolvePath ( Login . Path ) . ToString ( ) ;
var queryCollection = new NameValueCollection ( ) ;
var pairs = new Dictionary < string , string > ( ) ;
var FormSelector = Login . Form ;
if ( FormSelector = = null )
FormSelector = "form" ;
// landingResultDocument might not be initiated if the login is caused by a relogin during a query
if ( landingResultDocument = = null )
{
var ConfigurationResult = await GetConfigurationForSetup ( true ) ;
if ( ConfigurationResult = = null ) // got captcha
{
return false ;
}
}
var form = landingResultDocument . QuerySelector ( FormSelector ) ;
if ( form = = null )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No form found on {0} using form selector {1}" , LoginUrl , FormSelector ) , configData ) ;
}
var inputs = form . QuerySelectorAll ( "input" ) ;
if ( inputs = = null )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No inputs found on {0} using form selector {1}" , LoginUrl , FormSelector ) , configData ) ;
}
var submitUrlstr = form . GetAttribute ( "action" ) ;
if ( Login . Submitpath ! = null )
submitUrlstr = Login . Submitpath ;
foreach ( var input in inputs )
{
var name = input . GetAttribute ( "name" ) ;
if ( name = = null )
continue ;
var value = input . GetAttribute ( "value" ) ;
if ( value = = null )
value = "" ;
pairs [ name ] = value ;
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
var input = Input . Key ;
if ( Login . Selectors )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
var inputElement = landingResultDocument . QuerySelector ( Input . Key ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No input found using selector {0}" , Input . Key ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = value ;
}
// selector inputs
if ( Login . Selectorinputs ! = null )
{
foreach ( var Selectorinput in Login . Selectorinputs )
{
string value = null ;
try
{
value = handleSelector ( Selectorinput . Value , landingResultDocument . FirstElementChild ) ;
pairs [ Selectorinput . Key ] = value ;
}
catch ( Exception ex )
{
throw new Exception ( string . Format ( "Error while parsing selector input={0}, selector={1}, value={2}: {3}" , Selectorinput . Key , Selectorinput . Value . Selector , value , ex . Message ) ) ;
}
}
}
// getselector inputs
if ( Login . Getselectorinputs ! = null )
{
foreach ( var Selectorinput in Login . Getselectorinputs )
{
string value = null ;
try
{
value = handleSelector ( Selectorinput . Value , landingResultDocument . FirstElementChild ) ;
queryCollection [ Selectorinput . Key ] = value ;
}
catch ( Exception ex )
{
throw new Exception ( string . Format ( "Error while parsing get selector input={0}, selector={1}, value={2}: {3}" , Selectorinput . Key , Selectorinput . Value . Selector , value , ex . Message ) ) ;
}
}
}
if ( queryCollection . Count > 0 )
submitUrlstr + = "?" + queryCollection . GetQueryString ( ) ;
var submitUrl = resolvePath ( submitUrlstr , new Uri ( LoginUrl ) ) ;
// automatically solve simpleCaptchas, if used
var simpleCaptchaPresent = landingResultDocument . QuerySelector ( "script[src*=\"simpleCaptcha\"]" ) ;
2017-07-10 20:58:44 +00:00
if ( simpleCaptchaPresent ! = null )
2017-04-15 08:45:10 +00:00
{
var captchaUrl = resolvePath ( "simpleCaptcha.php?numImages=1" ) ;
2020-09-21 16:39:47 +00:00
var simpleCaptchaResult = await RequestWithCookiesAsync ( captchaUrl . ToString ( ) , referer : LoginUrl ) ;
2020-06-09 17:36:57 +00:00
var simpleCaptchaJSON = JObject . Parse ( simpleCaptchaResult . ContentString ) ;
2017-04-15 08:45:10 +00:00
var captchaSelection = simpleCaptchaJSON [ "images" ] [ 0 ] [ "hash" ] . ToString ( ) ;
pairs [ "captchaSelection" ] = captchaSelection ;
pairs [ "submitme" ] = "X" ;
}
if ( Login . Captcha ! = null )
{
var Captcha = Login . Captcha ;
if ( Captcha . Type = = "image" )
{
2021-03-16 23:29:26 +00:00
var CaptchaText = ( StringConfigurationItem ) configData . GetDynamic ( "CaptchaText" ) ;
2017-04-15 08:45:10 +00:00
if ( CaptchaText ! = null )
{
var input = Captcha . Input ;
if ( Login . Selectors )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
var inputElement = landingResultDocument . QuerySelector ( Captcha . Input ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No captcha input found using {0}" , Captcha . Input ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = CaptchaText . Value ;
}
}
2018-01-10 17:20:11 +00:00
if ( Captcha . Type = = "text" )
{
2021-03-16 23:29:26 +00:00
var CaptchaAnswer = ( StringConfigurationItem ) configData . GetDynamic ( "CaptchaAnswer" ) ;
2018-01-10 17:20:11 +00:00
if ( CaptchaAnswer ! = null )
{
var input = Captcha . Input ;
if ( Login . Selectors )
{
var inputElement = landingResultDocument . QuerySelector ( Captcha . Input ) ;
if ( inputElement = = null )
throw new ExceptionWithConfigData ( string . Format ( "Login failed: No captcha input found using {0}" , Captcha . Input ) , configData ) ;
input = inputElement . GetAttribute ( "name" ) ;
}
pairs [ input ] = CaptchaAnswer . Value ;
}
}
2017-04-15 08:45:10 +00:00
}
// clear landingResults/Document, otherwise we might use an old version for a new relogin (if GetConfigurationForSetup() wasn't called before)
landingResult = null ;
landingResultDocument = null ;
2020-06-10 21:22:29 +00:00
WebResult loginResult = null ;
2017-04-15 08:45:10 +00:00
var enctype = form . GetAttribute ( "enctype" ) ;
if ( enctype = = "multipart/form-data" )
{
var headers = new Dictionary < string , string > ( ) ;
var boundary = "---------------------------" + ( DateTime . UtcNow . Subtract ( new DateTime ( 1970 , 1 , 1 ) ) ) . TotalSeconds . ToString ( ) . Replace ( "." , "" ) ;
var bodyParts = new List < string > ( ) ;
foreach ( var pair in pairs )
{
var part = "--" + boundary + "\r\n" +
"Content-Disposition: form-data; name=\"" + pair . Key + "\"\r\n" +
"\r\n" +
pair . Value ;
bodyParts . Add ( part ) ;
}
bodyParts . Add ( "--" + boundary + "--" ) ;
headers . Add ( "Content-Type" , "multipart/form-data; boundary=" + boundary ) ;
2017-07-10 20:58:44 +00:00
var body = string . Join ( "\r\n" , bodyParts ) ;
2020-09-21 16:39:47 +00:00
loginResult = await RequestWithCookiesAsync (
2020-06-11 15:09:27 +00:00
submitUrl . ToString ( ) , configData . CookieHeader . Value , RequestType . POST , SiteLink , pairs , headers ,
body ) ;
2017-07-10 20:58:44 +00:00
}
else
{
2017-04-15 08:45:10 +00:00
loginResult = await RequestLoginAndFollowRedirect ( submitUrl . ToString ( ) , pairs , configData . CookieHeader . Value , true , null , LoginUrl , true ) ;
}
2016-11-19 11:46:31 +00:00
configData . CookieHeader . Value = loginResult . Cookies ;
2017-03-07 11:27:34 +00:00
checkForError ( loginResult , Definition . Login . Error ) ;
2017-04-15 08:45:10 +00:00
}
else if ( Login . Method = = "cookie" )
{
2021-03-16 23:29:26 +00:00
configData . CookieHeader . Value = ( ( StringConfigurationItem ) configData . GetDynamic ( "cookie" ) ) . Value ;
2017-04-15 08:45:10 +00:00
}
else if ( Login . Method = = "get" )
{
var queryCollection = new NameValueCollection ( ) ;
foreach ( var Input in Definition . Login . Inputs )
{
var value = applyGoTemplateText ( Input . Value ) ;
queryCollection . Add ( Input . Key , value ) ;
}
var LoginUrl = resolvePath ( Login . Path + "?" + queryCollection . GetQueryString ( ) ) . ToString ( ) ;
configData . CookieHeader . Value = null ;
2020-09-21 16:39:47 +00:00
var loginResult = await RequestWithCookiesAsync ( LoginUrl , referer : SiteLink ) ;
2017-04-15 08:45:10 +00:00
configData . CookieHeader . Value = loginResult . Cookies ;
checkForError ( loginResult , Definition . Login . Error ) ;
}
2019-12-09 23:18:13 +00:00
else if ( Login . Method = = "oneurl" )
{
var OneUrl = applyGoTemplateText ( Definition . Login . Inputs [ "oneurl" ] ) ;
var LoginUrl = resolvePath ( Login . Path + OneUrl ) . ToString ( ) ;
configData . CookieHeader . Value = null ;
2020-09-21 16:39:47 +00:00
var loginResult = await RequestWithCookiesAsync ( LoginUrl , referer : SiteLink ) ;
2019-12-09 23:18:13 +00:00
configData . CookieHeader . Value = loginResult . Cookies ;
checkForError ( loginResult , Definition . Login . Error ) ;
}
2017-04-15 08:45:10 +00:00
else
{
throw new NotImplementedException ( "Login method " + Definition . Login . Method + " not implemented" ) ;
}
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): Cookies after login: {1}" , Id , CookieHeader ) ) ;
2017-04-15 08:45:10 +00:00
return true ;
2016-10-27 07:30:03 +00:00
}
2018-01-22 16:35:56 +00:00
protected string getRedirectDomainHint ( string requestUrl , string RedirectUrl )
{
if ( requestUrl . StartsWith ( SiteLink ) & & ! RedirectUrl . StartsWith ( SiteLink ) )
{
var uri = new Uri ( RedirectUrl ) ;
return uri . Scheme + "://" + uri . Host + "/" ;
}
return null ;
}
2020-06-10 21:22:29 +00:00
protected string getRedirectDomainHint ( WebResult result ) = > getRedirectDomainHint ( result . Request . Url , result . RedirectingTo ) ;
2018-01-22 16:35:56 +00:00
2017-04-15 08:45:10 +00:00
protected async Task < bool > TestLogin ( )
{
var Login = Definition . Login ;
2016-10-27 07:30:03 +00:00
if ( Login = = null | | Login . Test = = null )
2017-04-15 08:45:10 +00:00
return false ;
2016-10-27 07:30:03 +00:00
// test if login was successful
2017-01-06 14:05:51 +00:00
var LoginTestUrl = resolvePath ( Login . Test . Path ) . ToString ( ) ;
2020-12-20 18:56:19 +00:00
var headers = ParseCustomHeaders ( Definition . Search ? . Headers , GetBaseTemplateVariables ( ) ) ;
var testResult = await RequestWithCookiesAsync ( LoginTestUrl , headers : headers ) ;
2016-10-27 07:30:03 +00:00
2017-04-15 08:45:10 +00:00
if ( testResult . IsRedirect )
{
2018-01-22 16:35:56 +00:00
var errormessage = "Login Failed, got redirected." ;
var DomainHint = getRedirectDomainHint ( testResult ) ;
if ( DomainHint ! = null )
2018-11-19 13:05:46 +00:00
{
2018-01-22 16:35:56 +00:00
errormessage + = " Try changing the indexer URL to " + DomainHint + "." ;
2018-11-19 13:05:46 +00:00
if ( Definition . Followredirect )
{
configData . SiteLink . Value = DomainHint ;
SiteLink = configData . SiteLink . Value ;
SaveConfig ( ) ;
errormessage + = " Updated site link, please try again." ;
}
}
2018-01-22 16:35:56 +00:00
throw new ExceptionWithConfigData ( errormessage , configData ) ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Login . Test . Selector ! = null )
{
var testResultParser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
var testResultDocument = testResultParser . ParseDocument ( testResult . ContentString ) ;
2016-10-27 07:30:03 +00:00
var selection = testResultDocument . QuerySelectorAll ( Login . Test . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection . Length = = 0 )
{
throw new ExceptionWithConfigData ( string . Format ( "Login failed: Selector \"{0}\" didn't match" , Login . Test . Selector ) , configData ) ;
}
}
return true ;
2016-10-27 07:30:03 +00:00
}
2020-06-10 21:22:29 +00:00
protected bool CheckIfLoginIsNeeded ( WebResult Result , IHtmlDocument document )
2017-04-15 08:45:10 +00:00
{
if ( Result . IsRedirect )
{
2018-01-22 16:35:56 +00:00
var DomainHint = getRedirectDomainHint ( Result ) ;
if ( DomainHint ! = null )
{
var errormessage = "Got redirected to another domain. Try changing the indexer URL to " + DomainHint + "." ;
2018-11-19 13:05:46 +00:00
if ( Definition . Followredirect )
{
configData . SiteLink . Value = DomainHint ;
SiteLink = configData . SiteLink . Value ;
SaveConfig ( ) ;
errormessage + = " Updated site link, please try again." ;
}
2018-01-22 16:35:56 +00:00
throw new ExceptionWithConfigData ( errormessage , configData ) ;
}
2017-04-15 08:45:10 +00:00
return true ;
2016-11-26 17:00:53 +00:00
}
2017-04-15 08:45:10 +00:00
if ( Definition . Login = = null | | Definition . Login . Test = = null )
return false ;
if ( Definition . Login . Test . Selector ! = null )
{
2016-11-26 17:00:53 +00:00
var selection = document . QuerySelectorAll ( Definition . Login . Test . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection . Length = = 0 )
{
return true ;
}
}
return false ;
}
2016-11-29 18:32:50 +00:00
public override async Task < ConfigurationData > GetConfigurationForSetup ( )
2017-04-15 08:45:10 +00:00
{
2020-02-06 04:01:46 +00:00
try
{
return await GetConfigurationForSetup ( false ) ;
}
catch ( Exception e )
{
2020-05-11 19:59:28 +00:00
logger . Error ( "Exception in GetConfigurationForSetup (" + Id + "): " + e ) ;
2020-02-06 04:01:46 +00:00
return configData ;
}
2017-04-15 08:45:10 +00:00
}
2017-01-27 12:04:12 +00:00
public async Task < ConfigurationData > GetConfigurationForSetup ( bool automaticlogin )
2017-04-15 08:45:10 +00:00
{
2016-11-29 18:32:50 +00:00
var Login = Definition . Login ;
if ( Login = = null | | Login . Method ! = "form" )
return configData ;
2017-04-15 08:45:10 +00:00
var LoginUrl = resolvePath ( Login . Path ) ;
configData . CookieHeader . Value = null ;
if ( Login . Cookies ! = null )
2020-02-10 22:16:19 +00:00
configData . CookieHeader . Value = string . Join ( "; " , Login . Cookies ) ;
2020-09-21 16:39:47 +00:00
landingResult = await RequestWithCookiesAsync ( LoginUrl . AbsoluteUri , referer : SiteLink ) ;
2017-04-15 08:45:10 +00:00
2020-10-27 21:00:18 +00:00
// Some sites have a temporary redirect before the login page, we need to process it.
if ( Definition . Followredirect )
{
await FollowIfRedirect ( landingResult , LoginUrl . AbsoluteUri , overrideCookies : landingResult . Cookies , accumulateCookies : true ) ;
}
2020-11-05 02:19:09 +00:00
var hasCaptcha = false ;
2017-04-15 08:45:10 +00:00
var htmlParser = new HtmlParser ( ) ;
2020-06-09 17:36:57 +00:00
landingResultDocument = htmlParser . ParseDocument ( landingResult . ContentString ) ;
2017-04-15 08:45:10 +00:00
if ( Login . Captcha ! = null )
{
var Captcha = Login . Captcha ;
if ( Captcha . Type = = "image" )
{
2018-01-10 17:20:11 +00:00
var captchaElement = landingResultDocument . QuerySelector ( Captcha . Selector ) ;
2017-07-10 20:58:44 +00:00
if ( captchaElement ! = null )
{
2017-04-15 08:45:10 +00:00
hasCaptcha = true ;
var CaptchaUrl = resolvePath ( captchaElement . GetAttribute ( "src" ) , LoginUrl ) ;
2020-09-21 16:39:47 +00:00
var captchaImageData = await RequestWithCookiesAsync (
2020-06-11 15:09:27 +00:00
CaptchaUrl . ToString ( ) , landingResult . Cookies , referer : LoginUrl . AbsoluteUri ) ;
2021-03-16 23:29:26 +00:00
var CaptchaImage = new DisplayImageConfigurationItem ( "Captcha Image" ) ;
var CaptchaText = new StringConfigurationItem ( "Captcha Text" ) ;
2017-04-15 08:45:10 +00:00
2020-03-14 23:58:50 +00:00
CaptchaImage . Value = captchaImageData . ContentBytes ;
2017-04-15 08:45:10 +00:00
configData . AddDynamic ( "CaptchaImage" , CaptchaImage ) ;
configData . AddDynamic ( "CaptchaText" , CaptchaText ) ;
}
else
{
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): No captcha image found" , Id ) ) ;
2017-04-15 08:45:10 +00:00
}
}
2018-01-10 17:20:11 +00:00
else if ( Captcha . Type = = "text" )
{
var captchaElement = landingResultDocument . QuerySelector ( Captcha . Selector ) ;
if ( captchaElement ! = null )
{
hasCaptcha = true ;
2021-03-16 23:29:26 +00:00
var CaptchaChallenge = new DisplayInfoConfigurationItem ( "Captcha Challenge" , captchaElement . TextContent ) ;
var CaptchaAnswer = new StringConfigurationItem ( "Captcha Answer" ) ;
2018-01-10 17:20:11 +00:00
configData . AddDynamic ( "CaptchaChallenge" , CaptchaChallenge ) ;
configData . AddDynamic ( "CaptchaAnswer" , CaptchaAnswer ) ;
}
else
{
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): No captcha image found" , Id ) ) ;
2018-01-10 17:20:11 +00:00
}
}
2017-04-15 08:45:10 +00:00
else
{
throw new NotImplementedException ( string . Format ( "Captcha type \"{0}\" is not implemented" , Captcha . Type ) ) ;
}
2016-12-23 16:18:37 +00:00
}
2017-04-15 08:45:10 +00:00
if ( hasCaptcha & & automaticlogin )
{
configData . LastError . Value = "Got captcha during automatic login, please reconfigure manually" ;
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "CardigannIndexer ({0}): Found captcha during automatic login, aborting" , Id ) ) ;
2017-04-15 08:45:10 +00:00
return null ;
2017-01-27 12:04:12 +00:00
}
2016-11-29 18:32:50 +00:00
return configData ;
2016-11-26 17:00:53 +00:00
}
2017-06-28 05:31:38 +00:00
public override async Task < IndexerConfigurationStatus > ApplyConfiguration ( JToken configJson )
2016-10-27 07:30:03 +00:00
{
2017-01-30 16:40:35 +00:00
LoadValuesFromJson ( configJson ) ;
2016-10-27 07:30:03 +00:00
await DoLogin ( ) ;
await TestLogin ( ) ;
2017-04-15 08:45:10 +00:00
IsConfigured = true ;
2017-05-06 17:39:02 +00:00
SaveConfig ( ) ;
2017-04-15 08:45:10 +00:00
return IndexerConfigurationStatus . Completed ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected string applyFilters ( string Data , List < filterBlock > Filters , Dictionary < string , object > variables = null )
{
if ( Filters = = null )
return Data ;
2020-02-10 22:16:19 +00:00
foreach ( var Filter in Filters )
2017-04-15 08:45:10 +00:00
{
switch ( Filter . Name )
{
case "querystring" :
var param = ( string ) Filter . Args ;
Data = ParseUtil . GetArgumentFromQueryString ( Data , param ) ;
break ;
case "timeparse" :
case "dateparse" :
var layout = ( string ) Filter . Args ;
try
{
var Date = DateTimeUtil . ParseDateTimeGoLang ( Data , layout ) ;
2020-03-14 16:05:10 +00:00
Data = Date . ToString ( DateTimeUtil . Rfc1123ZPattern ) ;
2017-04-15 08:45:10 +00:00
}
catch ( FormatException ex )
{
logger . Debug ( ex . Message ) ;
}
2017-07-10 20:58:44 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "regexp" :
var pattern = ( string ) Filter . Args ;
var Regexp = new Regex ( pattern ) ;
var Match = Regexp . Match ( Data ) ;
Data = Match . Groups [ 1 ] . Value ;
break ;
case "re_replace" :
var regexpreplace_pattern = ( string ) Filter . Args [ 0 ] ;
var regexpreplace_replacement = ( string ) Filter . Args [ 1 ] ;
regexpreplace_replacement = applyGoTemplateText ( regexpreplace_replacement , variables ) ;
2020-02-10 22:16:19 +00:00
var regexpreplace_regex = new Regex ( regexpreplace_pattern ) ;
2017-04-15 08:45:10 +00:00
Data = regexpreplace_regex . Replace ( Data , regexpreplace_replacement ) ;
2017-07-10 20:58:44 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "split" :
var sep = ( string ) Filter . Args [ 0 ] ;
var pos = ( string ) Filter . Args [ 1 ] ;
var posInt = int . Parse ( pos ) ;
var strParts = Data . Split ( sep [ 0 ] ) ;
if ( posInt < 0 )
{
posInt + = strParts . Length ;
}
Data = strParts [ posInt ] ;
break ;
case "replace" :
var from = ( string ) Filter . Args [ 0 ] ;
var to = ( string ) Filter . Args [ 1 ] ;
to = applyGoTemplateText ( to , variables ) ;
Data = Data . Replace ( from , to ) ;
break ;
case "trim" :
var cutset = ( string ) Filter . Args ;
if ( cutset ! = null )
Data = Data . Trim ( cutset [ 0 ] ) ;
else
Data = Data . Trim ( ) ;
break ;
case "prepend" :
var prependstr = ( string ) Filter . Args ;
Data = applyGoTemplateText ( prependstr , variables ) + Data ;
break ;
case "append" :
var str = ( string ) Filter . Args ;
Data + = applyGoTemplateText ( str , variables ) ;
break ;
case "tolower" :
Data = Data . ToLower ( ) ;
break ;
case "toupper" :
Data = Data . ToUpper ( ) ;
break ;
case "urldecode" :
2017-11-05 09:42:03 +00:00
Data = WebUtilityHelpers . UrlDecode ( Data , Encoding ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-06-06 16:52:47 +00:00
case "urlencode" :
2017-11-05 09:42:03 +00:00
Data = WebUtilityHelpers . UrlEncode ( Data , Encoding ) ;
2017-06-06 16:52:47 +00:00
break ;
2017-04-15 08:45:10 +00:00
case "timeago" :
case "reltime" :
2020-03-14 16:05:10 +00:00
Data = DateTimeUtil . FromTimeAgo ( Data ) . ToString ( DateTimeUtil . Rfc1123ZPattern ) ;
2017-04-15 08:45:10 +00:00
break ;
case "fuzzytime" :
2020-03-14 16:05:10 +00:00
Data = DateTimeUtil . FromUnknown ( Data ) . ToString ( DateTimeUtil . Rfc1123ZPattern ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-06-06 16:52:47 +00:00
case "validfilename" :
Data = StringUtil . MakeValidFileName ( Data , '_' , false ) ;
break ;
2017-09-08 17:00:25 +00:00
case "diacritics" :
var diacriticsOp = ( string ) Filter . Args ;
if ( diacriticsOp = = "replace" )
{
// Should replace diacritics charcaters with their base character
// It's not perfect, e.g. "ŠĐĆŽ - šđčćž" becomes "SĐCZ-sđccz"
2020-02-10 22:16:19 +00:00
var stFormD = Data . Normalize ( NormalizationForm . FormD ) ;
var len = stFormD . Length ;
var sb = new StringBuilder ( ) ;
for ( var i = 0 ; i < len ; i + + )
2017-09-08 17:00:25 +00:00
{
2020-02-10 22:16:19 +00:00
var uc = System . Globalization . CharUnicodeInfo . GetUnicodeCategory ( stFormD [ i ] ) ;
2017-09-08 17:00:25 +00:00
if ( uc ! = System . Globalization . UnicodeCategory . NonSpacingMark )
{
sb . Append ( stFormD [ i ] ) ;
}
}
Data = ( sb . ToString ( ) . Normalize ( NormalizationForm . FormC ) ) ;
}
else
2017-09-08 17:05:10 +00:00
throw new Exception ( "unsupported diacritics filter argument" ) ;
2017-09-08 17:00:25 +00:00
break ;
2017-10-17 16:23:13 +00:00
case "jsonjoinarray" :
var jsonjoinarrayJSONPath = ( string ) Filter . Args [ 0 ] ;
var jsonjoinarraySeparator = ( string ) Filter . Args [ 1 ] ;
var jsonjoinarrayO = JObject . Parse ( Data ) ;
var jsonjoinarrayOResult = jsonjoinarrayO . SelectToken ( jsonjoinarrayJSONPath ) ;
var jsonjoinarrayOResultStrings = jsonjoinarrayOResult . Select ( j = > j . ToString ( ) ) ;
Data = string . Join ( jsonjoinarraySeparator , jsonjoinarrayOResultStrings ) ;
break ;
2017-04-15 08:45:10 +00:00
case "hexdump" :
// this is mainly for debugging invisible special char related issues
var HexData = string . Join ( "" , Data . Select ( c = > c + "(" + ( ( int ) c ) . ToString ( "X2" ) + ")" ) ) ;
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): strdump: {1}" , Id , HexData ) ) ;
2017-04-15 08:45:10 +00:00
break ;
case "strdump" :
// for debugging
var DebugData = Data . Replace ( "\r" , "\\r" ) . Replace ( "\n" , "\\n" ) . Replace ( "\xA0" , "\\xA0" ) ;
2020-03-18 20:09:58 +00:00
var strTag = ( string ) Filter . Args ;
if ( strTag ! = null )
strTag = string . Format ( "({0}):" , strTag ) ;
else
strTag = ":" ;
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): strdump{1} {2}" , Id , strTag , DebugData ) ) ;
2017-04-15 08:45:10 +00:00
break ;
default :
break ;
}
}
return Data ;
2016-10-27 07:30:03 +00:00
}
2017-04-15 08:45:10 +00:00
protected IElement QuerySelector ( IElement Element , string Selector )
{
// AngleSharp doesn't support the :root pseudo selector, so we check for it manually
if ( Selector . StartsWith ( ":root" ) )
{
Selector = Selector . Substring ( 5 ) ;
while ( Element . ParentElement ! = null )
{
Element = Element . ParentElement ;
}
}
return Element . QuerySelector ( Selector ) ;
2016-12-02 12:05:00 +00:00
}
2021-05-03 19:59:37 +00:00
protected string handleSelector ( selectorBlock Selector , IElement Dom , Dictionary < string , object > variables = null , bool required = true )
2017-04-15 08:45:10 +00:00
{
if ( Selector . Text ! = null )
{
return applyFilters ( applyGoTemplateText ( Selector . Text , variables ) , Selector . Filters , variables ) ;
}
2020-02-10 22:16:19 +00:00
var selection = Dom ;
2017-04-15 08:45:10 +00:00
string value = null ;
if ( Selector . Selector ! = null )
{
2017-05-07 12:05:39 +00:00
if ( Dom . Matches ( Selector . Selector ) )
selection = Dom ;
else
selection = QuerySelector ( Dom , Selector . Selector ) ;
2017-04-15 08:45:10 +00:00
if ( selection = = null )
{
2021-05-03 19:59:37 +00:00
if ( required )
throw new Exception ( string . Format ( "Selector \"{0}\" didn't match {1}" , Selector . Selector , Dom . ToHtmlPretty ( ) ) ) ;
return null ;
2017-04-15 08:45:10 +00:00
}
}
if ( Selector . Remove ! = null )
{
2017-07-10 20:58:44 +00:00
foreach ( var i in selection . QuerySelectorAll ( Selector . Remove ) )
2017-04-15 08:45:10 +00:00
{
i . Remove ( ) ;
}
}
if ( Selector . Case ! = null )
{
2017-07-10 20:58:44 +00:00
foreach ( var Case in Selector . Case )
2017-04-15 08:45:10 +00:00
{
if ( selection . Matches ( Case . Key ) | | QuerySelector ( selection , Case . Key ) ! = null )
{
value = Case . Value ;
break ;
}
}
2017-07-10 20:58:44 +00:00
if ( value = = null )
2021-05-03 19:59:37 +00:00
{
if ( required )
throw new Exception ( string . Format ( "None of the case selectors \"{0}\" matched {1}" , string . Join ( "," , Selector . Case ) , selection . ToHtmlPretty ( ) ) ) ;
return null ;
}
2017-04-15 08:45:10 +00:00
}
else if ( Selector . Attribute ! = null )
{
value = selection . GetAttribute ( Selector . Attribute ) ;
if ( value = = null )
2021-05-03 19:59:37 +00:00
{
if ( required )
throw new Exception ( string . Format ( "Attribute \"{0}\" is not set for element {1}" , Selector . Attribute , selection . ToHtmlPretty ( ) ) ) ;
return null ;
}
2017-04-15 08:45:10 +00:00
}
else
{
value = selection . TextContent ;
}
return applyFilters ( ParseUtil . NormalizeSpace ( value ) , Selector . Filters , variables ) ;
2016-10-27 07:30:03 +00:00
}
2020-02-25 16:08:03 +00:00
protected Uri resolvePath ( string path , Uri currentUrl = null ) = > new Uri ( currentUrl ? ? new Uri ( SiteLink ) , path ) ;
2016-10-27 07:30:03 +00:00
2017-07-03 05:15:47 +00:00
protected override async Task < IEnumerable < ReleaseInfo > > PerformQuery ( TorznabQuery query )
2016-10-27 07:30:03 +00:00
{
var releases = new List < ReleaseInfo > ( ) ;
2020-02-10 22:16:19 +00:00
var Search = Definition . Search ;
2016-10-27 07:30:03 +00:00
// init template context
2020-04-20 00:16:19 +00:00
var variables = GetBaseTemplateVariables ( ) ;
2017-04-15 08:45:10 +00:00
variables [ ".Query.Type" ] = query . QueryType ;
variables [ ".Query.Q" ] = query . SearchTerm ;
variables [ ".Query.Series" ] = null ;
variables [ ".Query.Ep" ] = query . Episode ;
variables [ ".Query.Season" ] = query . Season ;
variables [ ".Query.Movie" ] = null ;
2017-10-19 14:46:34 +00:00
variables [ ".Query.Year" ] = query . Year . ToString ( ) ;
2019-05-19 23:22:06 +00:00
variables [ ".Query.Limit" ] = query . Limit . ToString ( ) ;
variables [ ".Query.Offset" ] = query . Offset . ToString ( ) ;
variables [ ".Query.Extended" ] = query . Extended . ToString ( ) ;
2017-04-15 08:45:10 +00:00
variables [ ".Query.Categories" ] = query . Categories ;
variables [ ".Query.APIKey" ] = query . ApiKey ;
2020-08-16 22:07:04 +00:00
variables [ ".Query.TVDBID" ] = query . TvdbID . ToString ( ) ;
2017-04-15 08:45:10 +00:00
variables [ ".Query.TVRageID" ] = query . RageID ;
variables [ ".Query.IMDBID" ] = query . ImdbID ;
variables [ ".Query.IMDBIDShort" ] = query . ImdbIDShort ;
2020-08-16 22:07:04 +00:00
variables [ ".Query.TMDBID" ] = query . TmdbID . ToString ( ) ;
2017-04-15 08:45:10 +00:00
variables [ ".Query.TVMazeID" ] = null ;
variables [ ".Query.TraktID" ] = null ;
2017-10-19 14:46:34 +00:00
variables [ ".Query.Album" ] = query . Album ;
variables [ ".Query.Artist" ] = query . Artist ;
variables [ ".Query.Label" ] = query . Label ;
variables [ ".Query.Track" ] = query . Track ;
//variables[".Query.Genre"] = query.Genre ?? new List<string>();
2017-04-15 08:45:10 +00:00
variables [ ".Query.Episode" ] = query . GetEpisodeSearchString ( ) ;
2020-08-16 21:44:12 +00:00
variables [ ".Query.Author" ] = query . Author ;
variables [ ".Query.Title" ] = query . Title ;
2017-04-15 08:45:10 +00:00
var mappedCategories = MapTorznabCapsToTrackers ( query ) ;
2018-04-01 12:56:45 +00:00
if ( mappedCategories . Count = = 0 )
{
2020-02-10 22:16:19 +00:00
mappedCategories = DefaultCategories ;
2018-04-01 12:56:45 +00:00
}
2017-04-15 08:45:10 +00:00
variables [ ".Categories" ] = mappedCategories ;
var KeywordTokens = new List < string > ( ) ;
var KeywordTokenKeys = new List < string > { "Q" , "Series" , "Movie" , "Year" } ;
foreach ( var key in KeywordTokenKeys )
{
var Value = ( string ) variables [ ".Query." + key ] ;
if ( ! string . IsNullOrWhiteSpace ( Value ) )
KeywordTokens . Add ( Value ) ;
}
if ( ! string . IsNullOrWhiteSpace ( ( string ) variables [ ".Query.Episode" ] ) )
KeywordTokens . Add ( ( string ) variables [ ".Query.Episode" ] ) ;
variables [ ".Query.Keywords" ] = string . Join ( " " , KeywordTokens ) ;
variables [ ".Keywords" ] = applyFilters ( ( string ) variables [ ".Query.Keywords" ] , Search . Keywordsfilters ) ;
2020-02-19 20:23:55 +00:00
// TODO: prepare queries first and then send them parallel
2017-04-15 08:45:10 +00:00
var SearchPaths = Search . Paths ;
foreach ( var SearchPath in SearchPaths )
{
// skip path if categories don't match
2021-06-19 20:30:08 +00:00
if ( SearchPath . Categories . Count > 0 )
2017-04-15 08:45:10 +00:00
{
var invertMatch = ( SearchPath . Categories [ 0 ] = = "!" ) ;
var hasIntersect = mappedCategories . Intersect ( SearchPath . Categories ) . Any ( ) ;
if ( invertMatch )
hasIntersect = ! hasIntersect ;
if ( ! hasIntersect )
continue ;
}
// build search URL
// HttpUtility.UrlPathEncode seems to only encode spaces, we use UrlEncode and replace + with %20 as a workaround
2017-11-05 09:42:03 +00:00
var searchUrl = resolvePath ( applyGoTemplateText ( SearchPath . Path , variables , WebUtility . UrlEncode ) . Replace ( "+" , "%20" ) ) . AbsoluteUri ;
2017-04-15 08:45:10 +00:00
var queryCollection = new List < KeyValuePair < string , string > > ( ) ;
2020-02-10 22:16:19 +00:00
var method = RequestType . GET ;
2017-04-15 08:45:10 +00:00
2020-02-10 22:16:19 +00:00
if ( string . Equals ( SearchPath . Method , "post" , StringComparison . OrdinalIgnoreCase ) )
2017-04-15 08:45:10 +00:00
{
method = RequestType . POST ;
}
var InputsList = new List < Dictionary < string , string > > ( ) ;
if ( SearchPath . Inheritinputs )
InputsList . Add ( Search . Inputs ) ;
InputsList . Add ( SearchPath . Inputs ) ;
foreach ( var Inputs in InputsList )
2017-07-10 20:58:44 +00:00
{
2017-04-15 08:45:10 +00:00
if ( Inputs ! = null )
{
foreach ( var Input in Inputs )
{
if ( Input . Key = = "$raw" )
{
2017-11-05 09:42:03 +00:00
var rawStr = applyGoTemplateText ( Input . Value , variables , WebUtility . UrlEncode ) ;
2020-02-10 22:16:19 +00:00
foreach ( var part in rawStr . Split ( '&' ) )
2017-04-15 08:45:10 +00:00
{
2020-10-19 21:19:10 +00:00
var parts = part . Split ( new [ ] { '=' } , 2 ) ;
2017-04-15 08:45:10 +00:00
var key = parts [ 0 ] ;
if ( key . Length = = 0 )
continue ;
var value = "" ;
if ( parts . Count ( ) = = 2 )
value = parts [ 1 ] ;
queryCollection . Add ( key , value ) ;
}
}
else
queryCollection . Add ( Input . Key , applyGoTemplateText ( Input . Value , variables ) ) ;
}
}
}
if ( method = = RequestType . GET )
{
if ( queryCollection . Count > 0 )
searchUrl + = "?" + queryCollection . GetQueryString ( Encoding ) ;
}
2017-04-21 18:22:47 +00:00
var searchUrlUri = new Uri ( searchUrl ) ;
2017-04-15 08:45:10 +00:00
2020-12-20 18:56:19 +00:00
// send HTTP request
var headers = ParseCustomHeaders ( Search . Headers , variables ) ;
2020-09-21 16:39:47 +00:00
var response = await RequestWithCookiesAsync (
2020-06-11 15:09:27 +00:00
searchUrl , method : method , headers : headers , data : queryCollection ) ;
2018-12-01 13:19:50 +00:00
if ( response . IsRedirect & & SearchPath . Followredirect )
await FollowIfRedirect ( response ) ;
2020-06-09 17:36:57 +00:00
var results = response . ContentString ;
2017-10-17 16:23:13 +00:00
2018-12-01 13:19:50 +00:00
2017-02-17 18:48:13 +00:00
try
{
2017-04-15 08:45:10 +00:00
var SearchResultParser = new HtmlParser ( ) ;
2019-01-20 00:09:27 +00:00
var SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-04-15 08:45:10 +00:00
// check if we need to login again
var loginNeeded = CheckIfLoginIsNeeded ( response , SearchResultDocument ) ;
if ( loginNeeded )
{
2020-05-11 19:59:28 +00:00
logger . Info ( string . Format ( "CardigannIndexer ({0}): Relogin required" , Id ) ) ;
2017-04-15 08:45:10 +00:00
var LoginResult = await DoLogin ( ) ;
if ( ! LoginResult )
throw new Exception ( string . Format ( "Relogin failed" ) ) ;
await TestLogin ( ) ;
2020-09-21 16:39:47 +00:00
response = await RequestWithCookiesAsync ( searchUrl , method : method , data : queryCollection ) ;
2018-12-01 13:19:50 +00:00
if ( response . IsRedirect & & SearchPath . Followredirect )
await FollowIfRedirect ( response ) ;
2020-06-09 17:36:57 +00:00
results = response . ContentString ;
2019-01-20 00:09:27 +00:00
SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2017-04-15 08:45:10 +00:00
}
checkForError ( response , Definition . Search . Error ) ;
2017-10-24 09:51:54 +00:00
if ( Search . Preprocessingfilters ! = null )
{
results = applyFilters ( results , Search . Preprocessingfilters , variables ) ;
2019-01-20 00:09:27 +00:00
SearchResultDocument = SearchResultParser . ParseDocument ( results ) ;
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): result after preprocessingfilters: {1}" , Id , results ) ) ;
2017-10-24 09:51:54 +00:00
}
2017-04-15 08:45:10 +00:00
2018-12-01 11:12:27 +00:00
var rowsSelector = applyGoTemplateText ( Search . Rows . Selector , variables ) ;
var RowsDom = SearchResultDocument . QuerySelectorAll ( rowsSelector ) ;
2020-02-10 22:16:19 +00:00
var Rows = new List < IElement > ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var RowDom in RowsDom )
{
Rows . Add ( RowDom ) ;
}
// merge following rows for After selector
var After = Definition . Search . Rows . After ;
if ( After > 0 )
{
2020-02-10 22:16:19 +00:00
for ( var i = 0 ; i < Rows . Count ; i + = 1 )
2017-04-15 08:45:10 +00:00
{
var CurrentRow = Rows [ i ] ;
2020-02-10 22:16:19 +00:00
for ( var j = 0 ; j < After ; j + = 1 )
2017-04-15 08:45:10 +00:00
{
var MergeRowIndex = i + j + 1 ;
var MergeRow = Rows [ MergeRowIndex ] ;
2020-02-10 22:16:19 +00:00
var MergeNodes = new List < INode > ( ) ;
2017-04-15 08:45:10 +00:00
foreach ( var node in MergeRow . ChildNodes )
{
MergeNodes . Add ( node ) ;
}
CurrentRow . Append ( MergeNodes . ToArray ( ) ) ;
}
Rows . RemoveRange ( i + 1 , After ) ;
}
}
foreach ( var Row in Rows )
2017-02-17 18:48:13 +00:00
{
2017-04-15 08:45:10 +00:00
try
{
2020-09-26 22:19:54 +00:00
var release = new ReleaseInfo ( ) ;
2017-02-17 18:48:13 +00:00
// Parse fields
2017-04-15 08:45:10 +00:00
foreach ( var Field in Search . Fields )
{
var FieldParts = Field . Key . Split ( '|' ) ;
var FieldName = FieldParts [ 0 ] ;
var FieldModifiers = new List < string > ( ) ;
for ( var i = 1 ; i < FieldParts . Length ; i + + )
FieldModifiers . Add ( FieldParts [ i ] ) ;
string value = null ;
var variablesKey = ".Result." + FieldName ;
2021-05-03 19:59:37 +00:00
var isOptional = OptionalFields . Contains ( Field . Key ) | | FieldModifiers . Contains ( "optional" ) | | Field . Value . Optional ;
2017-04-15 08:45:10 +00:00
try
{
2021-05-03 19:59:37 +00:00
value = handleSelector ( Field . Value , Row , variables , ! isOptional ) ;
if ( isOptional & & value = = null )
{
variables [ variablesKey ] = null ;
continue ;
}
2017-04-15 08:45:10 +00:00
switch ( FieldName )
{
2017-02-17 18:48:13 +00:00
case "download" :
2017-04-15 08:45:10 +00:00
if ( string . IsNullOrEmpty ( value ) )
{
value = null ;
release . Link = null ;
break ;
}
if ( value . StartsWith ( "magnet:" ) )
{
release . MagnetUri = new Uri ( value ) ;
value = release . MagnetUri . ToString ( ) ;
}
else
{
2017-04-21 18:22:47 +00:00
release . Link = resolvePath ( value , searchUrlUri ) ;
2017-04-15 08:45:10 +00:00
value = release . Link . ToString ( ) ;
2017-02-17 18:48:13 +00:00
}
2017-04-15 08:45:10 +00:00
break ;
case "magnet" :
2019-12-11 04:57:10 +00:00
var magnetUri = new Uri ( value ) ;
release . MagnetUri = magnetUri ;
value = magnetUri . ToString ( ) ;
2017-04-15 08:45:10 +00:00
break ;
2020-11-03 20:45:02 +00:00
case "infohash" :
release . InfoHash = value ;
break ;
2017-02-17 18:48:13 +00:00
case "details" :
2017-04-21 18:22:47 +00:00
var url = resolvePath ( value , searchUrlUri ) ;
2020-11-08 02:11:27 +00:00
release . Details = url ;
2017-04-15 08:45:10 +00:00
value = url . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "title" :
if ( FieldModifiers . Contains ( "append" ) )
release . Title + = value ;
else
2017-04-15 08:45:10 +00:00
release . Title = value ;
value = release . Title ;
break ;
2017-02-17 18:48:13 +00:00
case "description" :
if ( FieldModifiers . Contains ( "append" ) )
release . Description + = value ;
else
release . Description = value ;
value = release . Description ;
2017-04-15 08:45:10 +00:00
break ;
2017-02-17 18:48:13 +00:00
case "category" :
2018-04-10 12:48:46 +00:00
var cats = MapTrackerCatToNewznab ( value ) ;
2020-03-23 02:07:33 +00:00
if ( cats . Any ( ) )
2018-04-10 12:48:46 +00:00
{
2020-03-23 02:07:33 +00:00
if ( release . Category = = null | | FieldModifiers . Contains ( "noappend" ) )
release . Category = cats ;
else
release . Category = release . Category . Union ( cats ) . ToList ( ) ;
2018-04-10 12:48:46 +00:00
}
2017-04-15 08:45:10 +00:00
value = release . Category . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "size" :
2017-04-15 08:45:10 +00:00
release . Size = ReleaseInfo . GetBytes ( value ) ;
value = release . Size . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "leechers" :
2020-09-12 14:32:38 +00:00
var leechers = ParseUtil . CoerceLong ( value ) ;
leechers = leechers < 5000000L ? leechers : 0 ; // to fix #6558
2017-04-15 08:45:10 +00:00
if ( release . Peers = = null )
2020-09-12 14:32:38 +00:00
release . Peers = leechers ;
2017-04-15 08:45:10 +00:00
else
2020-09-12 14:32:38 +00:00
release . Peers + = leechers ;
value = leechers . ToString ( ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-02-17 18:48:13 +00:00
case "seeders" :
2020-09-12 14:32:38 +00:00
release . Seeders = ParseUtil . CoerceLong ( value ) ;
release . Seeders = release . Seeders < 5000000L ? release . Seeders : 0 ; // to fix #6558
2017-04-15 08:45:10 +00:00
if ( release . Peers = = null )
release . Peers = release . Seeders ;
else
release . Peers + = release . Seeders ;
value = release . Seeders . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "date" :
2017-04-15 08:45:10 +00:00
release . PublishDate = DateTimeUtil . FromUnknown ( value ) ;
2020-03-14 16:05:10 +00:00
value = release . PublishDate . ToString ( DateTimeUtil . Rfc1123ZPattern ) ;
2017-04-15 08:45:10 +00:00
break ;
2017-02-17 18:48:13 +00:00
case "files" :
2017-04-15 08:45:10 +00:00
release . Files = ParseUtil . CoerceLong ( value ) ;
value = release . Files . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "grabs" :
2017-04-15 08:45:10 +00:00
release . Grabs = ParseUtil . CoerceLong ( value ) ;
value = release . Grabs . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "downloadvolumefactor" :
2017-04-15 08:45:10 +00:00
release . DownloadVolumeFactor = ParseUtil . CoerceDouble ( value ) ;
value = release . DownloadVolumeFactor . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "uploadvolumefactor" :
2017-04-15 08:45:10 +00:00
release . UploadVolumeFactor = ParseUtil . CoerceDouble ( value ) ;
value = release . UploadVolumeFactor . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "minimumratio" :
2017-04-15 08:45:10 +00:00
release . MinimumRatio = ParseUtil . CoerceDouble ( value ) ;
value = release . MinimumRatio . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "minimumseedtime" :
2017-04-15 08:45:10 +00:00
release . MinimumSeedTime = ParseUtil . CoerceLong ( value ) ;
value = release . MinimumSeedTime . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "imdb" :
2017-04-15 08:45:10 +00:00
release . Imdb = ParseUtil . GetLongFromString ( value ) ;
value = release . Imdb . ToString ( ) ;
break ;
2020-08-16 22:07:04 +00:00
case "tmdbid" :
var TmdbIDRegEx = new Regex ( @"(\d+)" , RegexOptions . Compiled ) ;
var TmdbIDMatch = TmdbIDRegEx . Match ( value ) ;
var TmdbID = TmdbIDMatch . Groups [ 1 ] . Value ;
release . TMDb = ParseUtil . CoerceLong ( TmdbID ) ;
value = release . TMDb . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "rageid" :
2020-02-10 22:16:19 +00:00
var RageIDRegEx = new Regex ( @"(\d+)" , RegexOptions . Compiled ) ;
2017-04-15 08:45:10 +00:00
var RageIDMatch = RageIDRegEx . Match ( value ) ;
var RageID = RageIDMatch . Groups [ 1 ] . Value ;
release . RageID = ParseUtil . CoerceLong ( RageID ) ;
value = release . RageID . ToString ( ) ;
break ;
2017-02-17 18:48:13 +00:00
case "tvdbid" :
2020-02-10 22:16:19 +00:00
var TVDBIdRegEx = new Regex ( @"(\d+)" , RegexOptions . Compiled ) ;
2017-04-15 08:45:10 +00:00
var TVDBIdMatch = TVDBIdRegEx . Match ( value ) ;
var TVDBId = TVDBIdMatch . Groups [ 1 ] . Value ;
release . TVDBId = ParseUtil . CoerceLong ( TVDBId ) ;
value = release . TVDBId . ToString ( ) ;
break ;
2020-08-16 21:44:12 +00:00
case "author" :
release . Author = value ;
break ;
case "booktitle" :
release . BookTitle = value ;
break ;
2020-11-07 23:43:33 +00:00
case "poster" :
2017-07-10 20:58:44 +00:00
if ( ! string . IsNullOrWhiteSpace ( value ) )
{
2020-11-07 23:43:33 +00:00
var poster = resolvePath ( value , searchUrlUri ) ;
release . Poster = poster ;
2017-04-15 08:45:10 +00:00
}
2020-11-07 23:43:33 +00:00
value = release . Poster . ToString ( ) ;
2017-04-15 08:45:10 +00:00
break ;
default :
break ;
}
variables [ variablesKey ] = value ;
}
catch ( Exception ex )
{
if ( ! variables . ContainsKey ( variablesKey ) )
variables [ variablesKey ] = null ;
2021-05-03 19:59:37 +00:00
if ( isOptional )
2020-08-12 06:01:59 +00:00
{
variables [ variablesKey ] = null ;
2017-04-15 08:45:10 +00:00
continue ;
2020-08-12 06:01:59 +00:00
}
2017-04-15 08:45:10 +00:00
throw new Exception ( string . Format ( "Error while parsing field={0}, selector={1}, value={2}: {3}" , Field . Key , Field . Value . Selector , ( value = = null ? "<null>" : value ) , ex . Message ) ) ;
}
}
var Filters = Definition . Search . Rows . Filters ;
var SkipRelease = false ;
if ( Filters ! = null )
{
2020-02-10 22:16:19 +00:00
foreach ( var Filter in Filters )
2017-04-15 08:45:10 +00:00
{
switch ( Filter . Name )
{
case "andmatch" :
2020-02-10 22:16:19 +00:00
var CharacterLimit = - 1 ;
2017-04-15 08:45:10 +00:00
if ( Filter . Args ! = null )
CharacterLimit = int . Parse ( Filter . Args ) ;
2020-10-18 17:26:22 +00:00
if ( query . ImdbID ! = null & & TorznabCaps . MovieSearchImdbAvailable )
2017-04-15 08:45:10 +00:00
break ; // skip andmatch filter for imdb searches
2020-10-18 17:26:22 +00:00
if ( query . TmdbID ! = null & & TorznabCaps . MovieSearchTmdbAvailable )
2020-08-16 22:07:04 +00:00
break ; // skip andmatch filter for tmdb searches
2020-10-18 20:47:36 +00:00
if ( query . TvdbID ! = null & & TorznabCaps . TvSearchTvdbAvailable )
2020-08-16 22:07:04 +00:00
break ; // skip andmatch filter for tvdb searches
2020-04-11 19:02:00 +00:00
var queryKeywords = variables [ ".Keywords" ] as string ;
if ( ! query . MatchQueryStringAND ( release . Title , CharacterLimit , queryKeywords ) )
2017-04-15 08:45:10 +00:00
{
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): skipping {1} (andmatch filter)" , Id , release . Title ) ) ;
2017-04-15 08:45:10 +00:00
SkipRelease = true ;
}
break ;
case "strdump" :
// for debugging
2020-05-11 19:59:28 +00:00
logger . Debug ( string . Format ( "CardigannIndexer ({0}): row strdump: {1}" , Id , Row . ToHtmlPretty ( ) ) ) ;
2017-04-15 08:45:10 +00:00
break ;
default :
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "CardigannIndexer ({0}): Unsupported rows filter: {1}" , Id , Filter . Name ) ) ;
2017-04-15 08:45:10 +00:00
break ;
}
}
}
if ( SkipRelease )
continue ;
// if DateHeaders is set go through the previous rows and look for the header selector
var DateHeaders = Definition . Search . Rows . Dateheaders ;
if ( release . PublishDate = = DateTime . MinValue & & DateHeaders ! = null )
{
var PrevRow = Row . PreviousElementSibling ;
string value = null ;
2017-05-07 12:05:39 +00:00
if ( PrevRow = = null ) // continue with parent
2017-07-10 20:58:44 +00:00
{
2017-05-07 12:05:39 +00:00
var Parent = Row . ParentElement ;
if ( Parent ! = null )
PrevRow = Parent . PreviousElementSibling ;
}
2017-04-15 08:45:10 +00:00
while ( PrevRow ! = null )
{
2017-05-07 12:05:39 +00:00
var CurRow = PrevRow ;
2019-11-17 06:54:33 +00:00
logger . Debug ( PrevRow . OuterHtml ) ;
2017-04-15 08:45:10 +00:00
try
{
2017-05-07 12:05:39 +00:00
value = handleSelector ( DateHeaders , CurRow ) ;
2017-04-15 08:45:10 +00:00
break ;
}
catch ( Exception )
{
// do nothing
}
2017-05-07 12:05:39 +00:00
PrevRow = CurRow . PreviousElementSibling ;
if ( PrevRow = = null ) // continue with parent
{
var Parent = CurRow . ParentElement ;
if ( Parent ! = null )
PrevRow = Parent . PreviousElementSibling ;
}
2017-04-15 08:45:10 +00:00
}
2017-07-10 20:58:44 +00:00
2017-04-15 08:45:10 +00:00
if ( value = = null & & DateHeaders . Optional = = false )
throw new Exception ( string . Format ( "No date header row found for {0}" , release . ToString ( ) ) ) ;
if ( value ! = null )
release . PublishDate = DateTimeUtil . FromUnknown ( value ) ;
}
releases . Add ( release ) ;
}
catch ( Exception ex )
{
2020-05-11 19:59:28 +00:00
logger . Error ( string . Format ( "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}" , Id , Row . ToHtmlPretty ( ) , ex ) ) ;
2017-04-15 08:45:10 +00:00
}
2017-02-17 18:48:13 +00:00
}
2016-10-27 07:30:03 +00:00
}
2017-02-17 18:48:13 +00:00
catch ( Exception ex )
{
OnParseError ( results , ex ) ;
2017-04-15 08:45:10 +00:00
}
2016-10-27 07:30:03 +00:00
}
2019-03-18 04:41:23 +00:00
if ( query . Limit > 0 )
releases = releases . Take ( query . Limit ) . ToList ( ) ;
2016-10-27 07:30:03 +00:00
return releases ;
}
2016-12-15 08:12:28 +00:00
2020-06-10 21:22:29 +00:00
protected async Task < WebResult > handleRequest ( requestBlock request , Dictionary < string , object > variables = null , string referer = null )
2017-04-15 08:45:10 +00:00
{
var requestLinkStr = resolvePath ( applyGoTemplateText ( request . Path , variables ) ) . ToString ( ) ;
2020-09-19 05:05:43 +00:00
logger . Debug ( $"CardigannIndexer ({Id}): handleRequest() requestLinkStr= {requestLinkStr}" ) ;
2017-04-15 08:45:10 +00:00
Dictionary < string , string > pairs = null ;
var queryCollection = new NameValueCollection ( ) ;
2020-02-10 22:16:19 +00:00
var method = RequestType . GET ;
if ( string . Equals ( request . Method , "post" , StringComparison . OrdinalIgnoreCase ) )
2017-04-15 08:45:10 +00:00
{
method = RequestType . POST ;
pairs = new Dictionary < string , string > ( ) ;
}
foreach ( var Input in request . Inputs )
{
var value = applyGoTemplateText ( Input . Value , variables ) ;
if ( method = = RequestType . GET )
queryCollection . Add ( Input . Key , value ) ;
else if ( method = = RequestType . POST )
pairs . Add ( Input . Key , value ) ;
}
if ( queryCollection . Count > 0 )
{
if ( ! requestLinkStr . Contains ( "?" ) )
2020-04-12 07:55:56 +00:00
requestLinkStr + = "?" + queryCollection . GetQueryString ( Encoding , separator : request . Queryseparator ) . Substring ( 1 ) ;
2017-04-15 08:45:10 +00:00
else
2020-04-12 07:55:56 +00:00
requestLinkStr + = queryCollection . GetQueryString ( Encoding , separator : request . Queryseparator ) ;
2017-04-15 08:45:10 +00:00
}
2020-06-11 15:09:27 +00:00
var response = await RequestWithCookiesAndRetryAsync ( requestLinkStr , null , method , referer , pairs ) ;
2020-05-11 19:59:28 +00:00
logger . Debug ( $"CardigannIndexer ({Id}): handleRequest() remote server returned {response.Status.ToString()}" + ( response . IsRedirect ? " => " + response . RedirectingTo : "" ) ) ;
2017-04-15 08:45:10 +00:00
return response ;
2017-01-27 11:14:49 +00:00
}
2017-04-15 08:45:10 +00:00
protected IDictionary < string , object > AddTemplateVariablesFromUri ( IDictionary < string , object > variables , Uri uri , string prefix = "" )
{
variables [ prefix + ".AbsoluteUri" ] = uri . AbsoluteUri ;
variables [ prefix + ".AbsolutePath" ] = uri . AbsolutePath ;
variables [ prefix + ".Scheme" ] = uri . Scheme ;
variables [ prefix + ".Host" ] = uri . Host ;
variables [ prefix + ".Port" ] = uri . Port . ToString ( ) ;
variables [ prefix + ".PathAndQuery" ] = uri . PathAndQuery ;
variables [ prefix + ".Query" ] = uri . Query ;
2017-11-05 09:42:03 +00:00
var queryString = QueryHelpers . ParseQuery ( uri . Query ) ;
2020-02-10 22:16:19 +00:00
foreach ( var key in queryString . Keys )
2017-04-15 08:45:10 +00:00
{
2017-11-05 09:42:03 +00:00
//If we have supplied the same query string multiple time, just take the first.
variables [ prefix + ".Query." + key ] = queryString [ key ] . First ( ) ;
2017-04-15 08:45:10 +00:00
}
return variables ;
2017-01-27 11:14:49 +00:00
}
2017-04-15 08:45:10 +00:00
public override async Task < byte [ ] > Download ( Uri link )
{
var method = RequestType . GET ;
if ( Definition . Download ! = null )
{
var Download = Definition . Download ;
2020-04-20 00:16:19 +00:00
var variables = GetBaseTemplateVariables ( ) ;
2017-12-12 15:01:26 +00:00
AddTemplateVariablesFromUri ( variables , link , ".DownloadUri" ) ;
2017-04-15 08:45:10 +00:00
if ( Download . Before ! = null )
2020-10-08 20:21:40 +00:00
await handleRequest ( Download . Before , variables , link . ToString ( ) ) ;
if ( Download . Method = = "post" )
method = RequestType . POST ;
2021-06-14 05:13:56 +00:00
if ( Download . Selectors ! = null )
2017-04-15 08:45:10 +00:00
{
2020-12-20 18:56:19 +00:00
var headers = ParseCustomHeaders ( Definition . Search ? . Headers , variables ) ;
2021-06-14 05:13:56 +00:00
var results = "" ;
2020-01-11 04:52:11 +00:00
var searchResultParser = new HtmlParser ( ) ;
2021-06-14 05:13:56 +00:00
foreach ( var selector in Download . Selectors )
2017-04-15 08:45:10 +00:00
{
2021-06-14 05:13:56 +00:00
var querySelector = applyGoTemplateText ( selector . Selector , variables ) ;
try
2020-01-11 04:52:11 +00:00
{
2021-06-14 05:13:56 +00:00
var response = await RequestWithCookiesAsync ( link . ToString ( ) , headers : headers ) ;
if ( response . IsRedirect )
response = await RequestWithCookiesAsync ( response . RedirectingTo , headers : headers ) ;
results = response . ContentString ;
var searchResultDocument = searchResultParser . ParseDocument ( results ) ;
var downloadElement = searchResultDocument . QuerySelector ( querySelector ) ;
if ( downloadElement = = null )
{
logger . Debug (
$"CardigannIndexer ({Id}): Download selector {querySelector} could not match any elements, retrying with next available selector." ) ;
continue ;
}
logger . Debug (
$"CardigannIndexer ({Id}): Download selector {querySelector} matched:{downloadElement.ToHtmlPretty()}" ) ;
var href = "" ;
if ( selector . Attribute ! = null )
{
href = downloadElement . GetAttribute ( selector . Attribute ) ;
if ( href = = null )
throw new Exception (
$"Attribute \" { selector . Attribute } \ " is not set for element {downloadElement.ToHtmlPretty()}" ) ;
}
else
{
href = downloadElement . TextContent ;
}
2021-06-14 16:11:23 +00:00
href = applyFilters ( href , selector . Filters , variables ) ;
2021-06-14 05:13:56 +00:00
var torrentLink = resolvePath ( href , link ) ;
if ( torrentLink . Scheme ! = "magnet" )
{
// Test link
response = await base . RequestWithCookiesAsync (
torrentLink . ToString ( ) , null , RequestType . GET , headers : headers ) ;
if ( response . IsRedirect )
await FollowIfRedirect ( response ) ;
var content = response . ContentBytes ;
if ( content . Length > = 1 & & content [ 0 ] ! = 'd' )
{
logger . Debug (
$"CardigannIndexer ({Id}): Download selector {querySelector}'s torrent file is invalid, retrying with next available selector" ) ;
continue ;
}
}
link = torrentLink ;
return await base . Download ( link , method , link . ToString ( ) ) ;
2020-01-11 04:52:11 +00:00
}
2021-06-14 05:13:56 +00:00
catch ( Exception e )
2020-01-11 04:52:11 +00:00
{
2021-06-14 05:13:56 +00:00
logger . Error ( e ,
$"CardigannIndexer ({Id}): An exception occurred while trying selector {querySelector}, retrying with next available selector"
) ;
2020-01-11 04:52:11 +00:00
}
2017-04-15 08:45:10 +00:00
}
2021-06-14 05:13:56 +00:00
logger . Error (
$"CardigannIndexer ({Id}): Download selectors didn't match:\n{results}" ) ;
throw new Exception ( $"Download selectors didn't match" ) ;
2017-04-15 08:45:10 +00:00
}
}
2020-10-08 20:21:40 +00:00
return await base . Download ( link , method , link . ToString ( ) ) ;
2016-12-15 08:12:28 +00:00
}
2020-12-20 18:56:19 +00:00
private Dictionary < string , string > ParseCustomHeaders ( Dictionary < string , List < string > > customHeaders ,
2021-05-16 18:13:54 +00:00
Dictionary < string , object > variables )
2020-12-20 18:56:19 +00:00
{
if ( customHeaders = = null )
return null ;
// FIXME: fix jackett header handling (allow it to specifiy the same header multipe times)
var headers = new Dictionary < string , string > ( ) ;
foreach ( var header in customHeaders )
headers . Add ( header . Key , applyGoTemplateText ( header . Value [ 0 ] , variables ) ) ;
return headers ;
}
2016-10-27 07:30:03 +00:00
}
}