1
0
Fork 0
mirror of https://github.com/Jackett/Jackett synced 2025-03-09 13:52:03 +00:00

pornolab: refactor parsing (#13956)

This commit is contained in:
Bogdan 2023-02-01 10:11:00 +02:00 committed by GitHub
parent 0ba4d305b0
commit 2ca375c33f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -24,7 +24,7 @@ namespace Jackett.Common.Indexers
protected string cap_sid = null; protected string cap_sid = null;
protected string cap_code_field = null; protected string cap_code_field = null;
private static readonly Regex s_StripRussianRegex = new Regex(@"(\([А-Яа-яЁё\W]+\))|(^[А-Яа-яЁё\W\d]+\/ )|([а-яА-ЯЁё \-]+,+)|([а-яА-ЯЁё]+)"); private static readonly Regex s_StripRussianRegex = new Regex(@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)");
private new ConfigurationDataPornolab configData private new ConfigurationDataPornolab configData
{ {
@ -288,36 +288,34 @@ namespace Jackett.Common.Indexers
} }
try try
{ {
var RowsSelector = "table#tor-tbl > tbody > tr"; var searchResultParser = new HtmlParser();
var searchResultDocument = searchResultParser.ParseDocument(results.ContentString);
var SearchResultParser = new HtmlParser(); var rows = searchResultDocument.QuerySelectorAll("table#tor-tbl > tbody > tr");
var SearchResultDocument = SearchResultParser.ParseDocument(results.ContentString); foreach (var row in rows)
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector);
foreach (var Row in Rows)
{ {
try try
{ {
var qDownloadLink = Row.QuerySelector("a.tr-dl"); var qDownloadLink = row.QuerySelector("a.tr-dl");
if (qDownloadLink == null) // Expects moderation if (qDownloadLink == null) // Expects moderation
continue; continue;
var qForumLink = Row.QuerySelector("a.f"); var qForumLink = row.QuerySelector("a.f");
var qDetailsLink = Row.QuerySelector("a.tLink"); var qDetailsLink = row.QuerySelector("a.tLink");
var qSize = Row.QuerySelector("td:nth-child(6) u"); var qSize = row.QuerySelector("td:nth-child(6) u");
var link = new Uri(SiteLink + "forum/" + qDetailsLink.GetAttribute("href")); var link = new Uri(SiteLink + "forum/" + qDetailsLink.GetAttribute("href"));
var seederString = Row.QuerySelector("td:nth-child(7) b").TextContent; var seederString = row.QuerySelector("td:nth-child(7) b").TextContent;
var seeders = string.IsNullOrWhiteSpace(seederString) ? 0 : ParseUtil.CoerceInt(seederString); var seeders = string.IsNullOrWhiteSpace(seederString) ? 0 : ParseUtil.CoerceInt(seederString);
var timestr = Row.QuerySelector("td:nth-child(11) u").TextContent; var forumid = ParseUtil.GetArgumentFromQueryString(qForumLink?.GetAttribute("href"), "f");
var forum = qForumLink;
var forumid = forum.GetAttribute("href").Split('=')[1];
var title = configData.StripRussianLetters.Value var title = configData.StripRussianLetters.Value
? s_StripRussianRegex.Replace(qDetailsLink.TextContent, "") ? s_StripRussianRegex.Replace(qDetailsLink.TextContent, "")
: qDetailsLink.TextContent; : qDetailsLink.TextContent;
var size = ReleaseInfo.GetBytes(qSize.TextContent); var size = ReleaseInfo.GetBytes(qSize.TextContent);
var leechers = ParseUtil.CoerceInt(Row.QuerySelector("td:nth-child(8)").TextContent); var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(8)").TextContent);
var grabs = ParseUtil.CoerceLong(Row.QuerySelector("td:nth-child(9)").TextContent); var grabs = ParseUtil.CoerceLong(row.QuerySelector("td:nth-child(9)").TextContent);
var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(timestr)); var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(row.QuerySelector("td:nth-child(11) u").TextContent));
var release = new ReleaseInfo var release = new ReleaseInfo
{ {
MinimumRatio = 1, MinimumRatio = 1,
@ -340,7 +338,7 @@ namespace Jackett.Common.Indexers
} }
catch (Exception ex) catch (Exception ex)
{ {
logger.Error(string.Format("{0}: Error while parsing row '{1}':\n\n{2}", Id, Row.OuterHtml, ex)); logger.Error($"{Id}: Error while parsing row '{row.OuterHtml}':\n\n{ex}");
} }
} }
} }