1
0
Fork 0
mirror of https://github.com/Jackett/Jackett synced 2025-02-23 23:00:51 +00:00

Bj share: improve search results (#3126)

* Improved anime search and speed-share resolution detection

* - Code Refactored to new standards
- Removed publish date from search mode, since the tracker does not provide that information, it was based on the serie year (but it does provide it on last 24h page, that's still prssent)
- Code clean
- Added season to all animes but One Piece (every anime that i searched in this tracker have the correct season and episode numbering, except One Piece that have an incorrect season set and episode is in absolute format, its added automatically on every new release, so must be the source from where they get that info that is wrong, since its an popular show, added it as an workaround and explained on code as comment)
This commit is contained in:
DarkSupremo 2018-05-20 16:07:00 -03:00 committed by kaso17
parent f67fda3bf4
commit 6293c787e7

View file

@ -20,19 +20,23 @@ namespace Jackett.Common.Indexers
{ {
public class BJShare : BaseWebIndexer public class BJShare : BaseWebIndexer
{ {
private string LoginUrl { get { return SiteLink + "login.php"; } } private string LoginUrl => SiteLink + "login.php";
private string BrowseUrl { get { return SiteLink + "torrents.php"; } } private string BrowseUrl => SiteLink + "torrents.php";
private string TodayUrl { get { return SiteLink + "torrents.php?action=today"; } } private string TodayUrl => SiteLink + "torrents.php?action=today";
private char[] digits = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; private readonly char[] _digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
private readonly Dictionary<string, string> _commonSearchTerms = new Dictionary<string, string>
private new ConfigurationDataBasicLoginWithRSSAndDisplay configData
{ {
get { return (ConfigurationDataBasicLoginWithRSSAndDisplay)base.configData; } { "agents of shield", "Agents of S.H.I.E.L.D."}
set { base.configData = value; } };
private ConfigurationDataBasicLoginWithRSSAndDisplay ConfigData
{
get => (ConfigurationDataBasicLoginWithRSSAndDisplay)configData;
set => configData = value;
} }
public BJShare(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps) public BJShare(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps)
: base(name: "BJ-Share", : base("BJ-Share",
description: "A brazilian tracker.", description: "A brazilian tracker.",
link: "https://bj-share.me/", link: "https://bj-share.me/",
caps: TorznabUtil.CreateDefaultTorznabTVCaps(), caps: TorznabUtil.CreateDefaultTorznabTVCaps(),
@ -76,8 +80,8 @@ namespace Jackett.Common.Indexers
var pairs = new Dictionary<string, string> var pairs = new Dictionary<string, string>
{ {
{ "username", configData.Username.Value }, { "username", ConfigData.Username.Value },
{ "password", configData.Password.Value }, { "password", ConfigData.Password.Value },
{ "keeplogged", "1" } { "keeplogged", "1" }
}; };
@ -85,7 +89,7 @@ namespace Jackett.Common.Indexers
await ConfigureIfOK(result.Cookies, result.Content != null && result.Content.Contains("logout.php"), () => await ConfigureIfOK(result.Cookies, result.Content != null && result.Content.Contains("logout.php"), () =>
{ {
var errorMessage = result.Content; var errorMessage = result.Content;
throw new ExceptionWithConfigData(errorMessage, configData); throw new ExceptionWithConfigData(errorMessage, ConfigData);
}); });
return IndexerConfigurationStatus.RequiresTesting; return IndexerConfigurationStatus.RequiresTesting;
} }
@ -94,13 +98,24 @@ namespace Jackett.Common.Indexers
{ {
// Search does not support searching with episode numbers so strip it if we have one // Search does not support searching with episode numbers so strip it if we have one
// Ww AND filter the result later to archive the proper result // Ww AND filter the result later to archive the proper result
if (isAnime) return isAnime ? term.TrimEnd(_digits) : Regex.Replace(term, @"[S|E]\d\d", string.Empty).Trim();
{ }
return term.TrimEnd(digits);
}
var ret = Regex.Replace(term, @"[S|E]\d\d", string.Empty).Trim(); private static string FixAbsoluteNumbering(string title)
return ret.Replace("Agents of SHIELD", "Agents of S.H.I.E.L.D."); {
// if result is One piece, convert title from SXXEXX to EXX
// One piece is the only anime that i'm aware that is in "absolute" numbering, the problem is that they include
// the season (wrong season) and episode as absolute, eg: One Piece - S08E836
// 836 is the latest episode in absolute numbering, that is correct, but S08 is not the current season...
// So for this show, i don't see a other way to make it work...
//
// All others animes that i tested is with correct season and episode set, so i can't remove the season from all
// or will break everything else
//
// In this indexer, it looks that it is added "automatically", so all current and new releases will be broken
// until they or the source from where they get that info fix it...
return title.Contains("One Piece") ? Regex.Replace(title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E") : title;
} }
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query) protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
@ -113,51 +128,53 @@ namespace Jackett.Common.Indexers
var results = await RequestStringWithCookies(TodayUrl); var results = await RequestStringWithCookies(TodayUrl);
try try
{ {
string RowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)"; const string rowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
var SearchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
var SearchResultDocument = SearchResultParser.Parse(results.Content); var searchResultDocument = searchResultParser.Parse(results.Content);
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector); var rows = searchResultDocument.QuerySelectorAll(rowsSelector);
foreach (var Row in Rows) foreach (var row in rows)
{ {
try try
{ {
var release = new ReleaseInfo(); var release = new ReleaseInfo
{
MinimumRatio = 1,
MinimumSeedTime = 0
};
release.MinimumRatio = 1;
release.MinimumSeedTime = 0;
var qDetailsLink = Row.QuerySelector("a.BJinfoBox");
var qTitle = qDetailsLink.QuerySelector("font");
release.Title = qTitle.TextContent;
var qDetailsLink = row.QuerySelector("a.BJinfoBox");
var qBJinfoBox = qDetailsLink.QuerySelector("span"); var qBJinfoBox = qDetailsLink.QuerySelector("span");
var qCatLink = Row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]"); var qCatLink = row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
var qDLLink = Row.QuerySelector("a[href^=\"torrents.php?action=download\"]"); var qDlLink = row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
var qSeeders = Row.QuerySelector("td:nth-child(4)"); var qSeeders = row.QuerySelector("td:nth-child(4)");
var qLeechers = Row.QuerySelector("td:nth-child(5)"); var qLeechers = row.QuerySelector("td:nth-child(5)");
var qQuality = Row.QuerySelector("font[color=\"red\"]"); var qQuality = row.QuerySelector("font[color=\"red\"]");
var qFreeLeech = Row.QuerySelector("font[color=\"green\"]:contains(Free)"); var qFreeLeech = row.QuerySelector("font[color=\"green\"]:contains(Free)");
var qTitle = qDetailsLink.QuerySelector("font");
// Get international title if available, or use the full title if not
release.Title = Regex.Replace(qTitle.TextContent, @".* \[(.*?)\](.*)", "$1$2");
release.Description = ""; release.Description = "";
foreach (var Child in qBJinfoBox.ChildNodes) foreach (var child in qBJinfoBox.ChildNodes)
{ {
var type = Child.NodeType; var type = child.NodeType;
if (type != NodeType.Text) if (type != NodeType.Text)
continue; continue;
var line = Child.TextContent; var line = child.TextContent;
if (line.StartsWith("Tamanho:")) if (line.StartsWith("Tamanho:"))
{ {
string Size = line.Substring("Tamanho: ".Length); ; var size = line.Substring("Tamanho: ".Length); ;
release.Size = ReleaseInfo.GetBytes(Size); release.Size = ReleaseInfo.GetBytes(size);
} }
else if (line.StartsWith("Lançado em: ")) else if (line.StartsWith("Lançado em: "))
{ {
string PublishDateStr = line.Substring("Lançado em: ".Length).Replace("às ", ""); var publishDateStr = line.Substring("Lançado em: ".Length).Replace("às ", "");
PublishDateStr += " +0"; publishDateStr += " +0";
var PublishDate = DateTime.SpecifyKind(DateTime.ParseExact(PublishDateStr, "dd/MM/yyyy HH:mm z", CultureInfo.InvariantCulture), DateTimeKind.Unspecified); var publishDate = DateTime.SpecifyKind(DateTime.ParseExact(publishDateStr, "dd/MM/yyyy HH:mm z", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
release.PublishDate = PublishDate.ToLocalTime(); release.PublishDate = publishDate.ToLocalTime();
} }
else else
{ {
@ -166,39 +183,36 @@ namespace Jackett.Common.Indexers
} }
var catStr = qCatLink.GetAttribute("href").Split('=')[1]; var catStr = qCatLink.GetAttribute("href").Split('=')[1];
// if result is an anime, convert title from SXXEXX to EXX release.Title = FixAbsoluteNumbering(release.Title);
if (catStr == "14")
var quality = qQuality.TextContent;
switch (quality)
{ {
release.Title = Regex.Replace(release.Title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E"); case "Full HD":
release.Title += " 1080p";
break;
case "HD":
release.Title += " 720p";
break;
default:
release.Title += " 480p";
break;
} }
var Quality = qQuality.TextContent;
if (Quality == "Full HD")
release.Title += " 1080p";
else if(Quality == "HD")
release.Title += " 720p";
release.Category = MapTrackerCatToNewznab(catStr); release.Category = MapTrackerCatToNewznab(catStr);
release.Link = new Uri(SiteLink + qDlLink.GetAttribute("href"));
release.Link = new Uri(SiteLink + qDLLink.GetAttribute("href"));
release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href")); release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href"));
release.Guid = release.Link; release.Guid = release.Link;
release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent); release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent);
release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders; release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders;
release.DownloadVolumeFactor = qFreeLeech != null ? 0 : 1;
if (qFreeLeech != null)
release.DownloadVolumeFactor = 0;
else
release.DownloadVolumeFactor = 1;
release.UploadVolumeFactor = 1; release.UploadVolumeFactor = 1;
releases.Add(release); releases.Add(release);
} }
catch (Exception ex) catch (Exception ex)
{ {
logger.Error(string.Format("{0}: Error while parsing row '{1}': {2}", ID, Row.OuterHtml, ex.Message)); logger.Error($"{ID}: Error while parsing row '{row.OuterHtml}': {ex.Message}");
} }
} }
} }
@ -212,9 +226,12 @@ namespace Jackett.Common.Indexers
var searchUrl = BrowseUrl; var searchUrl = BrowseUrl;
var isSearchAnime = query.Categories.Any(s => s == TorznabCatType.TVAnime.ID); var isSearchAnime = query.Categories.Any(s => s == TorznabCatType.TVAnime.ID);
query.SearchTerm = query.SearchTerm.Replace("Agents of SHIELD", "Agents of S.H.I.E.L.D."); foreach (var searchTerm in _commonSearchTerms)
var searchString = query.GetQueryString(); {
query.SearchTerm = query.SearchTerm.ToLower().Replace(searchTerm.Key.ToLower(), searchTerm.Value);
}
var searchString = query.GetQueryString();
var queryCollection = new NameValueCollection var queryCollection = new NameValueCollection
{ {
{"searchstr", StripSearchString(searchString, isSearchAnime)}, {"searchstr", StripSearchString(searchString, isSearchAnime)},
@ -235,132 +252,115 @@ namespace Jackett.Common.Indexers
var results = await RequestStringWithCookies(searchUrl); var results = await RequestStringWithCookies(searchUrl);
try try
{ {
string RowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)"; const string rowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
var SearchResultParser = new HtmlParser(); var searchResultParser = new HtmlParser();
var SearchResultDocument = SearchResultParser.Parse(results.Content); var searchResultDocument = searchResultParser.Parse(results.Content);
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector); var rows = searchResultDocument.QuerySelectorAll(rowsSelector);
ICollection<int> GroupCategory = null; ICollection<int> groupCategory = null;
string GroupTitle = null; string groupTitle = null;
string GroupYearStr = null; string groupYearStr = null;
Nullable<DateTime> GroupPublishDate = null; DateTime? groupPublishDate = null;
foreach (var Row in Rows) foreach (var row in rows)
{ {
try try
{ {
var qDetailsLink = Row.QuerySelector("a[href^=\"torrents.php?id=\"]"); var qDetailsLink = row.QuerySelector("a[href^=\"torrents.php?id=\"]");
string Title = qDetailsLink.TextContent; var title = qDetailsLink.TextContent;
ICollection<int> Category = null; ICollection<int> category = null;
string YearStr = null; string yearStr = null;
Nullable<DateTime> YearPublishDate = null; var categoryStr = "";
string CategoryStr = "";
if (Row.ClassList.Contains("group") || Row.ClassList.Contains("torrent")) // group/ungrouped headers if (row.ClassList.Contains("group") || row.ClassList.Contains("torrent")) // group/ungrouped headers
{ {
var qCatLink = Row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]"); var qCatLink = row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
CategoryStr = qCatLink.GetAttribute("href").Split('=')[1].Split('&')[0]; categoryStr = qCatLink.GetAttribute("href").Split('=')[1].Split('&')[0];
Category = MapTrackerCatToNewznab(CategoryStr); category = MapTrackerCatToNewznab(categoryStr);
YearStr = qDetailsLink.NextSibling.TextContent.Trim().TrimStart('[').TrimEnd(']'); yearStr = qDetailsLink.NextSibling.TextContent.Trim().TrimStart('[').TrimEnd(']');
YearPublishDate = DateTime.SpecifyKind(DateTime.ParseExact(YearStr, "yyyy", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
// if result is an anime, convert title from SXXEXX to EXX title = FixAbsoluteNumbering(title);
if (CategoryStr == "14")
if (row.ClassList.Contains("group")) // group headers
{ {
Title = Regex.Replace(Title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E"); groupCategory = category;
} groupTitle = title;
groupYearStr = yearStr;
if (Row.ClassList.Contains("group")) // group headers
{
GroupCategory = Category;
GroupTitle = Title;
GroupYearStr = YearStr;
GroupPublishDate = YearPublishDate;
continue; continue;
} }
} }
var release = new ReleaseInfo(); var release = new ReleaseInfo
{
MinimumRatio = 1,
MinimumSeedTime = 0
};
release.MinimumRatio = 1; var qDlLink = row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
release.MinimumSeedTime = 0; var qSize = row.QuerySelector("td:nth-last-child(4)");
var qGrabs = row.QuerySelector("td:nth-last-child(3)");
var qSeeders = row.QuerySelector("td:nth-last-child(2)");
var qLeechers = row.QuerySelector("td:nth-last-child(1)");
var qFreeLeech = row.QuerySelector("strong[title=\"Free\"]");
var qDLLink = Row.QuerySelector("a[href^=\"torrents.php?action=download\"]"); if (row.ClassList.Contains("group_torrent")) // torrents belonging to a group
var qSize = Row.QuerySelector("td:nth-last-child(4)");
var qGrabs = Row.QuerySelector("td:nth-last-child(3)");
var qSeeders = Row.QuerySelector("td:nth-last-child(2)");
var qLeechers = Row.QuerySelector("td:nth-last-child(1)");
var qFreeLeech = Row.QuerySelector("strong[title=\"Free\"]");
if (Row.ClassList.Contains("group_torrent")) // torrents belonging to a group
{ {
release.Description = qDetailsLink.TextContent; release.Description = qDetailsLink.TextContent;
string cleanTitle = Regex.Replace(GroupTitle, @" - S?(?<season>\d{1,2})?E?(?<episode>\d{1,4})?", ""); var cleanTitle = Regex.Replace(groupTitle, @" - S?(?<season>\d{1,2})?E?(?<episode>\d{1,4})?", "");
string seasonEp = Regex.Replace(GroupTitle, @"^(.*?) - (S?(\d{1,2})?E?(\d{1,4})?)?", "$2"); var seasonEp = Regex.Replace(groupTitle, @"^(.*?) - (S?(\d{1,2})?E?(\d{1,4})?)?", "$2");
release.Title = CategoryStr == "14" ? GroupTitle : cleanTitle + " " + GroupYearStr + " " + seasonEp;
release.PublishDate = GroupPublishDate.Value; release.Title = categoryStr == "14" ? groupTitle : cleanTitle + " " + groupYearStr + " " + seasonEp;
release.Category = GroupCategory; release.PublishDate = groupPublishDate.Value;
release.Category = groupCategory;
} }
else if (Row.ClassList.Contains("torrent")) // standalone/un grouped torrents else if (row.ClassList.Contains("torrent")) // standalone/un grouped torrents
{ {
var qDescription = Row.QuerySelector("div.torrent_info"); var qDescription = row.QuerySelector("div.torrent_info");
release.Description = qDescription.TextContent; release.Description = qDescription.TextContent;
string cleanTitle = Regex.Replace(Title, @" - ((S(\d{1,2}))?E(\d{1,4}))", ""); var cleanTitle = Regex.Replace(title, @" - ((S(\d{1,2}))?E(\d{1,4}))", "");
string seasonEp = Regex.Replace(Title, @"^(.*?) - ((S(\d{1,2}))?E(\d{1,4}))", "$2"); var seasonEp = Regex.Replace(title, @"^(.*?) - ((S(\d{1,2}))?E(\d{1,4}))", "$2");
release.Title = CategoryStr == "14" ? Title : cleanTitle + " " + YearStr + " " + seasonEp;
release.PublishDate = YearPublishDate.Value; release.Title = categoryStr == "14" ? title : cleanTitle + " " + yearStr + " " + seasonEp;
release.Category = Category; release.Category = category;
} }
release.Description = release.Description.Replace(" / Free", ""); // Remove Free Tag release.Description = release.Description.Replace(" / Free", ""); // Remove Free Tag
release.Description = release.Description.Replace("Full HD", "1080p"); release.Description = release.Description.Replace("Full HD", "1080p");
release.Description = release.Description.Replace("/ HD / ", "/ 720p /"); release.Description = release.Description.Replace("/ HD / ", "/ 720p /");
release.Description = release.Description.Replace(" / HD]", " / 720p]"); release.Description = release.Description.Replace(" / HD]", " / 720p]");
release.Description = release.Description.Replace("4K", "2160p"); release.Description = release.Description.Replace("4K", "2160p");
int nBarra = release.Title.IndexOf("["); // Get international title if available, or use the full title if not
if (nBarra != -1) release.Title = Regex.Replace(title, @".* \[(.*?)\](.*)", "$1$2");
{
release.Title = release.Title.Substring(nBarra + 1);
release.Title = release.Title.Replace("]", "");
}
release.Title += " " + release.Description; // add year and Description to the release Title to add some meaning to it release.Title += " " + release.Description; // add year and Description to the release Title to add some meaning to it
// This tracker does not provide an publish date to search terms (only on last 24h page)
release.PublishDate = DateTime.Today;
// check for previously stripped search terms // check for previously stripped search terms
if (!query.MatchQueryStringAND(release.Title)) if (!query.MatchQueryStringAND(release.Title))
continue; continue;
var Size = qSize.TextContent; var size = qSize.TextContent;
release.Size = ReleaseInfo.GetBytes(Size); release.Size = ReleaseInfo.GetBytes(size);
release.Link = new Uri(SiteLink + qDlLink.GetAttribute("href"));
release.Link = new Uri(SiteLink + qDLLink.GetAttribute("href"));
release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href")); release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href"));
release.Guid = release.Link; release.Guid = release.Link;
release.Grabs = ParseUtil.CoerceLong(qGrabs.TextContent); release.Grabs = ParseUtil.CoerceLong(qGrabs.TextContent);
release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent); release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent);
release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders; release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders;
release.DownloadVolumeFactor = qFreeLeech != null ? 0 : 1;
if (qFreeLeech != null)
release.DownloadVolumeFactor = 0;
else
release.DownloadVolumeFactor = 1;
release.UploadVolumeFactor = 1; release.UploadVolumeFactor = 1;
releases.Add(release); releases.Add(release);
} }
catch (Exception ex) catch (Exception ex)
{ {
logger.Error(string.Format("{0}: Error while parsing row '{1}': {2}", ID, Row.OuterHtml, ex.Message)); logger.Error($"{ID}: Error while parsing row '{row.OuterHtml}': {ex.Message}");
} }
} }
} }