mirror of
https://github.com/Jackett/Jackett
synced 2024-12-25 01:07:38 +00:00
shazbat: refactor search and parsing (#13979)
This commit is contained in:
parent
8a35175d31
commit
fe93e54ac1
3 changed files with 215 additions and 92 deletions
|
@ -5,9 +5,10 @@ using System.Linq;
|
|||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using AngleSharp.Dom;
|
||||
using AngleSharp.Html.Parser;
|
||||
using Jackett.Common.Models;
|
||||
using Jackett.Common.Models.IndexerConfig;
|
||||
using Jackett.Common.Models.IndexerConfig.Bespoke;
|
||||
using Jackett.Common.Services.Interfaces;
|
||||
using Jackett.Common.Utils;
|
||||
using Jackett.Common.Utils.Clients;
|
||||
|
@ -22,20 +23,16 @@ namespace Jackett.Common.Indexers
|
|||
private string LoginUrl => SiteLink + "login";
|
||||
private string SearchUrl => SiteLink + "search";
|
||||
private string TorrentsUrl => SiteLink + "torrents";
|
||||
private string ShowUrl => SiteLink + "show?id=";
|
||||
private string ShowUrl => SiteLink + "show";
|
||||
private string RSSProfile => SiteLink + "rss_feeds";
|
||||
|
||||
private new ConfigurationDataBasicLoginWithRSS configData
|
||||
{
|
||||
get => (ConfigurationDataBasicLoginWithRSS)base.configData;
|
||||
set => base.configData = value;
|
||||
}
|
||||
private new ConfigurationDataShazbat configData => (ConfigurationDataShazbat)base.configData;
|
||||
|
||||
public Shazbat(IIndexerConfigurationService configService, WebClient c, Logger l, IProtectionService ps,
|
||||
ICacheService cs)
|
||||
: base(id: "shazbat",
|
||||
name: "Shazbat",
|
||||
description: "Modern indexer",
|
||||
description: "Shazbat is a PRIVATE Torrent Tracker with highly curated TV content",
|
||||
link: "https://www.shazbat.tv/",
|
||||
caps: new TorznabCapabilities
|
||||
{
|
||||
|
@ -49,141 +46,236 @@ namespace Jackett.Common.Indexers
|
|||
logger: l,
|
||||
p: ps,
|
||||
cacheService: cs,
|
||||
configData: new ConfigurationDataBasicLoginWithRSS())
|
||||
configData: new ConfigurationDataShazbat())
|
||||
{
|
||||
Encoding = Encoding.UTF8;
|
||||
Language = "en-US";
|
||||
Type = "private";
|
||||
|
||||
webclient.requestDelay = 5.1;
|
||||
|
||||
AddCategoryMapping(1, TorznabCatType.TV);
|
||||
AddCategoryMapping(2, TorznabCatType.TVSD);
|
||||
AddCategoryMapping(3, TorznabCatType.TVHD);
|
||||
}
|
||||
|
||||
private int ShowPagesFetchLimit => int.TryParse(configData.ShowPagesFetchLimit.Value, out var limit) && limit > 0 && limit <= 5 ? limit : 2;
|
||||
|
||||
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
|
||||
{
|
||||
LoadValuesFromJson(configJson);
|
||||
|
||||
var pairs = new Dictionary<string, string>
|
||||
{
|
||||
{"referer", "login"},
|
||||
{"query", ""},
|
||||
{"tv_login", configData.Username.Value},
|
||||
{"tv_password", configData.Password.Value},
|
||||
{"email", ""}
|
||||
{ "referer", "" },
|
||||
{ "query", "" },
|
||||
{ "tv_timezone", "0" },
|
||||
{ "tv_login", configData.Username.Value },
|
||||
{ "tv_password", configData.Password.Value }
|
||||
};
|
||||
|
||||
// Get cookie
|
||||
var result = await RequestLoginAndFollowRedirect(LoginUrl, pairs, null, true, null, LoginUrl);
|
||||
await ConfigureIfOK(result.Cookies, result.ContentString?.Contains("glyphicon-log-out") == true,
|
||||
() => throw new ExceptionWithConfigData("The username and password entered do not match.", configData));
|
||||
await ConfigureIfOK(result.Cookies, result.ContentString?.Contains("glyphicon-log-out") == true, () =>
|
||||
{
|
||||
throw new ExceptionWithConfigData("The username and password entered do not match.", configData);
|
||||
});
|
||||
|
||||
var rssProfile = await RequestWithCookiesAndRetryAsync(RSSProfile);
|
||||
var parser = new HtmlParser();
|
||||
var rssDom = parser.ParseDocument(rssProfile.ContentString);
|
||||
configData.RSSKey.Value = rssDom.QuerySelector(".col-sm-9:nth-of-type(1)").TextContent.Trim();
|
||||
|
||||
configData.RSSKey.Value = rssDom.QuerySelector(".col-sm-9:nth-of-type(1)")?.TextContent.Trim();
|
||||
if (string.IsNullOrWhiteSpace(configData.RSSKey.Value))
|
||||
throw new ExceptionWithConfigData("Failed to find RSS key.", configData);
|
||||
|
||||
SaveConfig();
|
||||
|
||||
return IndexerConfigurationStatus.RequiresTesting;
|
||||
}
|
||||
|
||||
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
|
||||
{
|
||||
WebResult response;
|
||||
|
||||
var releases = new List<ReleaseInfo>();
|
||||
var queryString = query.GetQueryString();
|
||||
WebResult results = null;
|
||||
var searchUrls = new List<string>();
|
||||
if (!string.IsNullOrWhiteSpace(query.SanitizedSearchTerm))
|
||||
var searchUrls = new List<WebRequest>();
|
||||
|
||||
var hasGlobalFreeleech = false;
|
||||
|
||||
var searchTerm = query.SanitizedSearchTerm;
|
||||
var term = FixSearchTerm(searchTerm);
|
||||
|
||||
var showTorrentsHeaders = new Dictionary<string, string>
|
||||
{
|
||||
var pairs = new Dictionary<string, string>
|
||||
{ "Content-Type", "application/x-www-form-urlencoded" },
|
||||
{ "X-Requested-With", "XMLHttpRequest" },
|
||||
};
|
||||
|
||||
var showTorrentsBody = new Dictionary<string, string>
|
||||
{
|
||||
{ "portlet", "true" },
|
||||
{ "tab", "true" }
|
||||
};
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(term))
|
||||
{
|
||||
var searchBody = new Dictionary<string, string>
|
||||
{
|
||||
{"search", query.SanitizedSearchTerm}
|
||||
{ "search", term }
|
||||
};
|
||||
results = await RequestWithCookiesAndRetryAsync(
|
||||
SearchUrl, null, RequestType.POST, TorrentsUrl, pairs);
|
||||
results = await ReloginIfNecessary(results);
|
||||
|
||||
response = await RequestWithCookiesAndRetryAsync(SearchUrl, method: RequestType.POST, referer: TorrentsUrl, data: searchBody);
|
||||
response = await ReloginIfNecessaryAsync(response);
|
||||
|
||||
var parser = new HtmlParser();
|
||||
var dom = parser.ParseDocument(results.ContentString);
|
||||
var dom = parser.ParseDocument(response.ContentString);
|
||||
|
||||
hasGlobalFreeleech = dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null;
|
||||
|
||||
releases.AddRange(ParseResults(response, query, searchTerm, hasGlobalFreeleech));
|
||||
|
||||
var shows = dom.QuerySelectorAll("div.show[data-id]");
|
||||
foreach (var show in shows)
|
||||
if (shows.Any())
|
||||
{
|
||||
var showUrl = ShowUrl + show.GetAttribute("data-id");
|
||||
searchUrls.Add(showUrl);
|
||||
}
|
||||
}
|
||||
else
|
||||
searchUrls.Add(TorrentsUrl);
|
||||
var showPagesFetchLimit = ShowPagesFetchLimit;
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var searchUrl in searchUrls)
|
||||
{
|
||||
results = await RequestWithCookiesAsync(searchUrl);
|
||||
results = await ReloginIfNecessary(results);
|
||||
var parser = new HtmlParser();
|
||||
var dom = parser.ParseDocument(results.ContentString);
|
||||
var rows = dom.QuerySelectorAll(
|
||||
string.IsNullOrWhiteSpace(queryString) ? "#torrent-table tr" : "table tr");
|
||||
var globalFreeleech =
|
||||
dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null;
|
||||
foreach (var row in rows.Skip(1))
|
||||
if (showPagesFetchLimit < 1 || showPagesFetchLimit > 5)
|
||||
throw new Exception($"Value for Show Pages Fetch Limit should be between 1 and 5. Current value: {showPagesFetchLimit}.");
|
||||
|
||||
if (shows.Length > showPagesFetchLimit)
|
||||
logger.Debug($"Your search returned {shows.Length} shows. Use a more specific search term for more relevant results.");
|
||||
|
||||
foreach (var show in shows.Take(showPagesFetchLimit))
|
||||
{
|
||||
// TODO switch to initializer
|
||||
var release = new ReleaseInfo();
|
||||
var titleRow = row.QuerySelector("td:nth-of-type(3)");
|
||||
foreach (var child in titleRow.Children)
|
||||
child.Remove();
|
||||
release.Title = titleRow.TextContent.Trim();
|
||||
if ((query.ImdbID == null || !TorznabCaps.MovieSearchImdbAvailable) &&
|
||||
!query.MatchQueryStringAND(release.Title))
|
||||
continue;
|
||||
var posterStyle = row.QuerySelector("div[style^=\"cursor: pointer; background-image:url\"]")
|
||||
?.GetAttribute("style");
|
||||
if (!string.IsNullOrEmpty(posterStyle))
|
||||
var showTorrentsQueryParams = new Dictionary<string, string>
|
||||
{
|
||||
var posterStr = Regex.Match(posterStyle, @"url\('(.*?)'\);").Groups[1].Value;
|
||||
release.Poster = new Uri(SiteLink + posterStr);
|
||||
}
|
||||
{ "id", show.GetAttribute("data-id") },
|
||||
{ "show_mode", "torrents" }
|
||||
};
|
||||
|
||||
var qLink = row.QuerySelector("td:nth-of-type(5) a");
|
||||
release.Link = new Uri(SiteLink + qLink.GetAttribute("href"));
|
||||
release.Guid = release.Link;
|
||||
var qLinkComm = row.QuerySelector("td:nth-of-type(5) a.internal");
|
||||
release.Details = new Uri(SiteLink + qLinkComm.GetAttribute("href"));
|
||||
var dateString = row.QuerySelector(".datetime")?.GetAttribute("data-timestamp");
|
||||
if (dateString != null)
|
||||
release.PublishDate = DateTimeUtil.UnixTimestampToDateTime(ParseUtil.CoerceDouble(dateString));
|
||||
var infoString = row.QuerySelector("td:nth-of-type(4)").TextContent;
|
||||
release.Size = ParseUtil.CoerceLong(
|
||||
Regex.Match(infoString, "\\((\\d+)\\)").Value.Replace("(", "").Replace(")", ""));
|
||||
var infosplit = infoString.Replace("/", string.Empty).Split(":".ToCharArray());
|
||||
release.Seeders = ParseUtil.CoerceInt(infosplit[1]);
|
||||
release.Peers = release.Seeders + ParseUtil.CoerceInt(infosplit[2]);
|
||||
release.DownloadVolumeFactor = globalFreeleech ? 0 : 1;
|
||||
release.UploadVolumeFactor = 1;
|
||||
release.MinimumRatio = 1;
|
||||
release.MinimumSeedTime = 172800; // 48 hours
|
||||
|
||||
// var tags = row.QuerySelector(".label-tag").TextContent; These don't see to parse - bad tags?
|
||||
releases.Add(release);
|
||||
searchUrls.Add(new WebRequest
|
||||
{
|
||||
Url = $"{ShowUrl}?{showTorrentsQueryParams.GetQueryString()}",
|
||||
Type = RequestType.POST,
|
||||
PostData = showTorrentsBody,
|
||||
Headers = showTorrentsHeaders
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
else
|
||||
searchUrls.Add(new WebRequest { Url = TorrentsUrl, Type = RequestType.GET });
|
||||
|
||||
foreach (var searchUrl in searchUrls)
|
||||
{
|
||||
OnParseError(results.ContentString, ex);
|
||||
response = await RequestWithCookiesAsync(url: searchUrl.Url, method: searchUrl.Type, data: searchUrl.PostData, headers: searchUrl.Headers);
|
||||
response = await ReloginIfNecessaryAsync(response);
|
||||
|
||||
try
|
||||
{
|
||||
releases.AddRange(ParseResults(response, query, searchTerm, hasGlobalFreeleech));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
OnParseError(response.ContentString, ex);
|
||||
}
|
||||
}
|
||||
foreach (var release in releases)
|
||||
release.Category = release.Title.Contains("1080p") || release.Title.Contains("720p")
|
||||
? new List<int> { TorznabCatType.TVHD.ID }
|
||||
: new List<int> { TorznabCatType.TVSD.ID };
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
private async Task<WebResult> ReloginIfNecessary(WebResult response)
|
||||
private IList<ReleaseInfo> ParseResults(WebResult response, TorznabQuery query, string searchTerm, bool hasGlobalFreeleech = false)
|
||||
{
|
||||
if (response.ContentString.Contains("onclick=\"document.location='logout'\""))
|
||||
var releases = new List<ReleaseInfo>();
|
||||
|
||||
var parser = new HtmlParser();
|
||||
var dom = parser.ParseDocument(response.ContentString);
|
||||
|
||||
if (!hasGlobalFreeleech)
|
||||
hasGlobalFreeleech = dom.QuerySelector("span:contains(\"Freeleech until:\"):has(span.datetime)") != null;
|
||||
|
||||
var publishDate = DateTime.Now;
|
||||
|
||||
var rows = dom.QuerySelectorAll("#torrent-table tr.eprow, table tr.eprow");
|
||||
foreach (var row in rows)
|
||||
{
|
||||
var title = ParseTitle(row.QuerySelector("td:nth-of-type(3)"));
|
||||
|
||||
if ((query.ImdbID == null || !TorznabCaps.MovieSearchImdbAvailable) && !query.MatchQueryStringAND(title, queryStringOverride: searchTerm))
|
||||
continue;
|
||||
|
||||
var link = new Uri(SiteLink + row.QuerySelector("td:nth-of-type(5) a[href^=\"load_torrent?\"]")?.GetAttribute("href"));
|
||||
var details = new Uri(SiteLink + row.QuerySelector("td:nth-of-type(5) [href^=\"torrent_info?\"]")?.GetAttribute("href"));
|
||||
|
||||
var infoString = row.QuerySelector("td:nth-of-type(4)")?.TextContent.Trim() ?? string.Empty;
|
||||
var infoRegex = new Regex(@"\((?<size>\d+)\):(?<seeders>\d+) \/ :(?<leechers>\d+)$", RegexOptions.Compiled);
|
||||
var matchInfo = infoRegex.Match(infoString);
|
||||
var size = matchInfo.Groups["size"].Success && long.TryParse(matchInfo.Groups["size"].Value, out var outSize) ? outSize : 0;
|
||||
var seeders = matchInfo.Groups["seeders"].Success && int.TryParse(matchInfo.Groups["seeders"].Value, out var outSeeders) ? outSeeders : 0;
|
||||
var leechers = matchInfo.Groups["leechers"].Success && int.TryParse(matchInfo.Groups["leechers"].Value, out var outLeechers) ? outLeechers : 0;
|
||||
|
||||
var dateTimestamp = row.QuerySelector(".datetime[data-timestamp]")?.GetAttribute("data-timestamp");
|
||||
publishDate = dateTimestamp != null && ParseUtil.TryCoerceDouble(dateTimestamp, out var timestamp) ? DateTimeUtil.UnixTimestampToDateTime(timestamp) : publishDate.AddMinutes(-1);
|
||||
|
||||
var release = new ReleaseInfo
|
||||
{
|
||||
Guid = link,
|
||||
Link = link,
|
||||
Details = details,
|
||||
Title = title,
|
||||
Category = ParseCategories(title),
|
||||
Size = size,
|
||||
Seeders = seeders,
|
||||
Peers = seeders + leechers,
|
||||
PublishDate = publishDate,
|
||||
Genres = row.QuerySelectorAll("label.label-tag").Select(t => t.TextContent.Trim()).ToList(),
|
||||
DownloadVolumeFactor = hasGlobalFreeleech ? 0 : 1,
|
||||
UploadVolumeFactor = 1,
|
||||
MinimumRatio = 1,
|
||||
MinimumSeedTime = 172800 // 48 hours
|
||||
};
|
||||
|
||||
var posterStyle = row.QuerySelector("div[style^=\"cursor: pointer; background-image:url\"]")?.GetAttribute("style");
|
||||
if (!string.IsNullOrEmpty(posterStyle))
|
||||
{
|
||||
var posterStr = Regex.Match(posterStyle, @"url\('(?<poster>.*)'\);").Groups["poster"].Value;
|
||||
release.Poster = new Uri(SiteLink + posterStr);
|
||||
}
|
||||
|
||||
releases.Add(release);
|
||||
}
|
||||
|
||||
return releases;
|
||||
}
|
||||
|
||||
private static string ParseTitle(IElement titleRow)
|
||||
{
|
||||
var title = titleRow?.ChildNodes.First(n => n.NodeType == NodeType.Text && n.TextContent.Trim() != string.Empty);
|
||||
|
||||
return title?.TextContent.Trim();
|
||||
}
|
||||
|
||||
private static string FixSearchTerm(string term)
|
||||
{
|
||||
term = Regex.Replace(term, @"\b[S|E]\d+\b", string.Empty, RegexOptions.IgnoreCase);
|
||||
term = Regex.Replace(term, @".+\b\d{4}(\.\d{2}\.\d{2})?\b", string.Empty);
|
||||
term = Regex.Replace(term, @"[\.\s\(\)\[\]]+", " ");
|
||||
|
||||
return term.ToLower().Trim();
|
||||
}
|
||||
|
||||
protected virtual List<int> ParseCategories(string title) => title.Contains("1080p") || title.Contains("1080i") || title.Contains("720p") ? new List<int> { TorznabCatType.TVHD.ID } : new List<int> { TorznabCatType.TVSD.ID };
|
||||
|
||||
private async Task<WebResult> ReloginIfNecessaryAsync(WebResult response)
|
||||
{
|
||||
if (response.ContentString.Contains("onclick=\"document.location='logout'\"") ||
|
||||
response.ContentString.Contains("show_id") || response.ContentString.Contains("Filename") ||
|
||||
response.ContentString.Contains("Peers") || response.ContentString.Contains("Download"))
|
||||
return response;
|
||||
|
||||
logger.Warn("Session expired. Relogin.");
|
||||
|
||||
await ApplyConfiguration(null);
|
||||
response.Request.Cookies = CookieHeader;
|
||||
return await webclient.GetResultAsync(response.Request);
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Jackett.Common.Models.IndexerConfig.Bespoke
|
||||
{
|
||||
[ExcludeFromCodeCoverage]
|
||||
internal class ConfigurationDataShazbat : ConfigurationDataBasicLoginWithRSS
|
||||
{
|
||||
public SingleSelectConfigurationItem ShowPagesFetchLimit { get; private set; }
|
||||
|
||||
public DisplayInfoConfigurationItem ShowPagesFetchLimitInstructions { get; private set; }
|
||||
|
||||
public ConfigurationDataShazbat()
|
||||
{
|
||||
ShowPagesFetchLimit = new SingleSelectConfigurationItem(
|
||||
"Show Pages Fetch Limit (sub-requests when searching)",
|
||||
new Dictionary<string, string>
|
||||
{
|
||||
{"1", "1"},
|
||||
{"2", "2"},
|
||||
{"3", "3"},
|
||||
{"4", "4"},
|
||||
{"5", "5"}
|
||||
})
|
||||
{ Value = "2" };
|
||||
|
||||
ShowPagesFetchLimitInstructions = new DisplayInfoConfigurationItem("Show Pages Fetch Limit Warning", "Higher values may risk your account being flagged for bot activity when used with automation software such as Sonarr.");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,7 +3,7 @@ using System.Diagnostics.CodeAnalysis;
|
|||
namespace Jackett.Common.Models.IndexerConfig.Bespoke
|
||||
{
|
||||
[ExcludeFromCodeCoverage]
|
||||
public class ConfigurationDataSpeedCD : ConfigurationDataBasicLogin
|
||||
internal class ConfigurationDataSpeedCD : ConfigurationDataBasicLogin
|
||||
{
|
||||
public BoolConfigurationItem Freeleech { get; set; }
|
||||
public BoolConfigurationItem ExcludeArchives { get; set; }
|
||||
|
|
Loading…
Reference in a new issue