Bj share: improve search results (#3126)

* Improved anime search and speed-share resolution detection

* - Code Refactored to new standards
- Removed publish date from search mode, since the tracker does not provide that information, it was based on the serie year (but it does provide it on last 24h page, that's still prssent)
- Code clean
- Added season to all animes but One Piece (every anime that i searched in this tracker have the correct season and episode numbering, except One Piece that have an incorrect season set and episode is in absolute format, its added automatically on every new release, so must be the source from where they get that info that is wrong, since its an popular show, added it as an workaround and explained on code as comment)
This commit is contained in:
DarkSupremo 2018-05-20 16:07:00 -03:00 committed by kaso17
parent f67fda3bf4
commit 6293c787e7
1 changed files with 144 additions and 144 deletions

View File

@ -20,19 +20,23 @@ namespace Jackett.Common.Indexers
{
public class BJShare : BaseWebIndexer
{
private string LoginUrl { get { return SiteLink + "login.php"; } }
private string BrowseUrl { get { return SiteLink + "torrents.php"; } }
private string TodayUrl { get { return SiteLink + "torrents.php?action=today"; } }
private char[] digits = new[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
private new ConfigurationDataBasicLoginWithRSSAndDisplay configData
private string LoginUrl => SiteLink + "login.php";
private string BrowseUrl => SiteLink + "torrents.php";
private string TodayUrl => SiteLink + "torrents.php?action=today";
private readonly char[] _digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
private readonly Dictionary<string, string> _commonSearchTerms = new Dictionary<string, string>
{
get { return (ConfigurationDataBasicLoginWithRSSAndDisplay)base.configData; }
set { base.configData = value; }
{ "agents of shield", "Agents of S.H.I.E.L.D."}
};
private ConfigurationDataBasicLoginWithRSSAndDisplay ConfigData
{
get => (ConfigurationDataBasicLoginWithRSSAndDisplay)configData;
set => configData = value;
}
public BJShare(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps)
: base(name: "BJ-Share",
: base("BJ-Share",
description: "A brazilian tracker.",
link: "https://bj-share.me/",
caps: TorznabUtil.CreateDefaultTorznabTVCaps(),
@ -76,8 +80,8 @@ namespace Jackett.Common.Indexers
var pairs = new Dictionary<string, string>
{
{ "username", configData.Username.Value },
{ "password", configData.Password.Value },
{ "username", ConfigData.Username.Value },
{ "password", ConfigData.Password.Value },
{ "keeplogged", "1" }
};
@ -85,7 +89,7 @@ namespace Jackett.Common.Indexers
await ConfigureIfOK(result.Cookies, result.Content != null && result.Content.Contains("logout.php"), () =>
{
var errorMessage = result.Content;
throw new ExceptionWithConfigData(errorMessage, configData);
throw new ExceptionWithConfigData(errorMessage, ConfigData);
});
return IndexerConfigurationStatus.RequiresTesting;
}
@ -94,13 +98,24 @@ namespace Jackett.Common.Indexers
{
// Search does not support searching with episode numbers so strip it if we have one
// Ww AND filter the result later to archive the proper result
if (isAnime)
{
return term.TrimEnd(digits);
}
var ret = Regex.Replace(term, @"[S|E]\d\d", string.Empty).Trim();
return ret.Replace("Agents of SHIELD", "Agents of S.H.I.E.L.D.");
return isAnime ? term.TrimEnd(_digits) : Regex.Replace(term, @"[S|E]\d\d", string.Empty).Trim();
}
private static string FixAbsoluteNumbering(string title)
{
// if result is One piece, convert title from SXXEXX to EXX
// One piece is the only anime that i'm aware that is in "absolute" numbering, the problem is that they include
// the season (wrong season) and episode as absolute, eg: One Piece - S08E836
// 836 is the latest episode in absolute numbering, that is correct, but S08 is not the current season...
// So for this show, i don't see a other way to make it work...
//
// All others animes that i tested is with correct season and episode set, so i can't remove the season from all
// or will break everything else
//
// In this indexer, it looks that it is added "automatically", so all current and new releases will be broken
// until they or the source from where they get that info fix it...
return title.Contains("One Piece") ? Regex.Replace(title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E") : title;
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
@ -113,51 +128,53 @@ namespace Jackett.Common.Indexers
var results = await RequestStringWithCookies(TodayUrl);
try
{
string RowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
const string rowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
var SearchResultParser = new HtmlParser();
var SearchResultDocument = SearchResultParser.Parse(results.Content);
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector);
foreach (var Row in Rows)
var searchResultParser = new HtmlParser();
var searchResultDocument = searchResultParser.Parse(results.Content);
var rows = searchResultDocument.QuerySelectorAll(rowsSelector);
foreach (var row in rows)
{
try
{
var release = new ReleaseInfo();
var release = new ReleaseInfo
{
MinimumRatio = 1,
MinimumSeedTime = 0
};
release.MinimumRatio = 1;
release.MinimumSeedTime = 0;
var qDetailsLink = Row.QuerySelector("a.BJinfoBox");
var qTitle = qDetailsLink.QuerySelector("font");
release.Title = qTitle.TextContent;
var qDetailsLink = row.QuerySelector("a.BJinfoBox");
var qBJinfoBox = qDetailsLink.QuerySelector("span");
var qCatLink = Row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
var qDLLink = Row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
var qSeeders = Row.QuerySelector("td:nth-child(4)");
var qLeechers = Row.QuerySelector("td:nth-child(5)");
var qQuality = Row.QuerySelector("font[color=\"red\"]");
var qFreeLeech = Row.QuerySelector("font[color=\"green\"]:contains(Free)");
var qCatLink = row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
var qDlLink = row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
var qSeeders = row.QuerySelector("td:nth-child(4)");
var qLeechers = row.QuerySelector("td:nth-child(5)");
var qQuality = row.QuerySelector("font[color=\"red\"]");
var qFreeLeech = row.QuerySelector("font[color=\"green\"]:contains(Free)");
var qTitle = qDetailsLink.QuerySelector("font");
// Get international title if available, or use the full title if not
release.Title = Regex.Replace(qTitle.TextContent, @".* \[(.*?)\](.*)", "$1$2");
release.Description = "";
foreach (var Child in qBJinfoBox.ChildNodes)
foreach (var child in qBJinfoBox.ChildNodes)
{
var type = Child.NodeType;
var type = child.NodeType;
if (type != NodeType.Text)
continue;
var line = Child.TextContent;
var line = child.TextContent;
if (line.StartsWith("Tamanho:"))
{
string Size = line.Substring("Tamanho: ".Length); ;
release.Size = ReleaseInfo.GetBytes(Size);
var size = line.Substring("Tamanho: ".Length); ;
release.Size = ReleaseInfo.GetBytes(size);
}
else if (line.StartsWith("Lançado em: "))
{
string PublishDateStr = line.Substring("Lançado em: ".Length).Replace("às ", "");
PublishDateStr += " +0";
var PublishDate = DateTime.SpecifyKind(DateTime.ParseExact(PublishDateStr, "dd/MM/yyyy HH:mm z", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
release.PublishDate = PublishDate.ToLocalTime();
var publishDateStr = line.Substring("Lançado em: ".Length).Replace("às ", "");
publishDateStr += " +0";
var publishDate = DateTime.SpecifyKind(DateTime.ParseExact(publishDateStr, "dd/MM/yyyy HH:mm z", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
release.PublishDate = publishDate.ToLocalTime();
}
else
{
@ -166,39 +183,36 @@ namespace Jackett.Common.Indexers
}
var catStr = qCatLink.GetAttribute("href").Split('=')[1];
// if result is an anime, convert title from SXXEXX to EXX
if (catStr == "14")
release.Title = FixAbsoluteNumbering(release.Title);
var quality = qQuality.TextContent;
switch (quality)
{
release.Title = Regex.Replace(release.Title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E");
case "Full HD":
release.Title += " 1080p";
break;
case "HD":
release.Title += " 720p";
break;
default:
release.Title += " 480p";
break;
}
var Quality = qQuality.TextContent;
if (Quality == "Full HD")
release.Title += " 1080p";
else if(Quality == "HD")
release.Title += " 720p";
release.Category = MapTrackerCatToNewznab(catStr);
release.Link = new Uri(SiteLink + qDLLink.GetAttribute("href"));
release.Link = new Uri(SiteLink + qDlLink.GetAttribute("href"));
release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href"));
release.Guid = release.Link;
release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent);
release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders;
if (qFreeLeech != null)
release.DownloadVolumeFactor = 0;
else
release.DownloadVolumeFactor = 1;
release.DownloadVolumeFactor = qFreeLeech != null ? 0 : 1;
release.UploadVolumeFactor = 1;
releases.Add(release);
}
catch (Exception ex)
{
logger.Error(string.Format("{0}: Error while parsing row '{1}': {2}", ID, Row.OuterHtml, ex.Message));
logger.Error($"{ID}: Error while parsing row '{row.OuterHtml}': {ex.Message}");
}
}
}
@ -211,10 +225,13 @@ namespace Jackett.Common.Indexers
{
var searchUrl = BrowseUrl;
var isSearchAnime = query.Categories.Any(s => s == TorznabCatType.TVAnime.ID);
query.SearchTerm = query.SearchTerm.Replace("Agents of SHIELD", "Agents of S.H.I.E.L.D.");
var searchString = query.GetQueryString();
foreach (var searchTerm in _commonSearchTerms)
{
query.SearchTerm = query.SearchTerm.ToLower().Replace(searchTerm.Key.ToLower(), searchTerm.Value);
}
var searchString = query.GetQueryString();
var queryCollection = new NameValueCollection
{
{"searchstr", StripSearchString(searchString, isSearchAnime)},
@ -235,132 +252,115 @@ namespace Jackett.Common.Indexers
var results = await RequestStringWithCookies(searchUrl);
try
{
string RowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
const string rowsSelector = "table.torrent_table > tbody > tr:not(tr.colhead)";
var SearchResultParser = new HtmlParser();
var SearchResultDocument = SearchResultParser.Parse(results.Content);
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector);
var searchResultParser = new HtmlParser();
var searchResultDocument = searchResultParser.Parse(results.Content);
var rows = searchResultDocument.QuerySelectorAll(rowsSelector);
ICollection<int> GroupCategory = null;
string GroupTitle = null;
string GroupYearStr = null;
Nullable<DateTime> GroupPublishDate = null;
ICollection<int> groupCategory = null;
string groupTitle = null;
string groupYearStr = null;
DateTime? groupPublishDate = null;
foreach (var Row in Rows)
foreach (var row in rows)
{
try
{
var qDetailsLink = Row.QuerySelector("a[href^=\"torrents.php?id=\"]");
string Title = qDetailsLink.TextContent;
ICollection<int> Category = null;
string YearStr = null;
Nullable<DateTime> YearPublishDate = null;
string CategoryStr = "";
var qDetailsLink = row.QuerySelector("a[href^=\"torrents.php?id=\"]");
var title = qDetailsLink.TextContent;
ICollection<int> category = null;
string yearStr = null;
var categoryStr = "";
if (Row.ClassList.Contains("group") || Row.ClassList.Contains("torrent")) // group/ungrouped headers
if (row.ClassList.Contains("group") || row.ClassList.Contains("torrent")) // group/ungrouped headers
{
var qCatLink = Row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
CategoryStr = qCatLink.GetAttribute("href").Split('=')[1].Split('&')[0];
Category = MapTrackerCatToNewznab(CategoryStr);
var qCatLink = row.QuerySelector("a[href^=\"/torrents.php?filter_cat\"]");
categoryStr = qCatLink.GetAttribute("href").Split('=')[1].Split('&')[0];
category = MapTrackerCatToNewznab(categoryStr);
YearStr = qDetailsLink.NextSibling.TextContent.Trim().TrimStart('[').TrimEnd(']');
YearPublishDate = DateTime.SpecifyKind(DateTime.ParseExact(YearStr, "yyyy", CultureInfo.InvariantCulture), DateTimeKind.Unspecified);
yearStr = qDetailsLink.NextSibling.TextContent.Trim().TrimStart('[').TrimEnd(']');
// if result is an anime, convert title from SXXEXX to EXX
if (CategoryStr == "14")
{
Title = Regex.Replace(Title, @"(Ep[\.]?[ ]?)|([S]\d\d[Ee])", "E");
}
title = FixAbsoluteNumbering(title);
if (Row.ClassList.Contains("group")) // group headers
if (row.ClassList.Contains("group")) // group headers
{
GroupCategory = Category;
GroupTitle = Title;
GroupYearStr = YearStr;
GroupPublishDate = YearPublishDate;
groupCategory = category;
groupTitle = title;
groupYearStr = yearStr;
continue;
}
}
var release = new ReleaseInfo();
var release = new ReleaseInfo
{
MinimumRatio = 1,
MinimumSeedTime = 0
};
release.MinimumRatio = 1;
release.MinimumSeedTime = 0;
var qDlLink = row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
var qSize = row.QuerySelector("td:nth-last-child(4)");
var qGrabs = row.QuerySelector("td:nth-last-child(3)");
var qSeeders = row.QuerySelector("td:nth-last-child(2)");
var qLeechers = row.QuerySelector("td:nth-last-child(1)");
var qFreeLeech = row.QuerySelector("strong[title=\"Free\"]");
var qDLLink = Row.QuerySelector("a[href^=\"torrents.php?action=download\"]");
var qSize = Row.QuerySelector("td:nth-last-child(4)");
var qGrabs = Row.QuerySelector("td:nth-last-child(3)");
var qSeeders = Row.QuerySelector("td:nth-last-child(2)");
var qLeechers = Row.QuerySelector("td:nth-last-child(1)");
var qFreeLeech = Row.QuerySelector("strong[title=\"Free\"]");
if (Row.ClassList.Contains("group_torrent")) // torrents belonging to a group
if (row.ClassList.Contains("group_torrent")) // torrents belonging to a group
{
release.Description = qDetailsLink.TextContent;
string cleanTitle = Regex.Replace(GroupTitle, @" - S?(?<season>\d{1,2})?E?(?<episode>\d{1,4})?", "");
string seasonEp = Regex.Replace(GroupTitle, @"^(.*?) - (S?(\d{1,2})?E?(\d{1,4})?)?", "$2");
release.Title = CategoryStr == "14" ? GroupTitle : cleanTitle + " " + GroupYearStr + " " + seasonEp;
release.PublishDate = GroupPublishDate.Value;
release.Category = GroupCategory;
var cleanTitle = Regex.Replace(groupTitle, @" - S?(?<season>\d{1,2})?E?(?<episode>\d{1,4})?", "");
var seasonEp = Regex.Replace(groupTitle, @"^(.*?) - (S?(\d{1,2})?E?(\d{1,4})?)?", "$2");
release.Title = categoryStr == "14" ? groupTitle : cleanTitle + " " + groupYearStr + " " + seasonEp;
release.PublishDate = groupPublishDate.Value;
release.Category = groupCategory;
}
else if (Row.ClassList.Contains("torrent")) // standalone/un grouped torrents
else if (row.ClassList.Contains("torrent")) // standalone/un grouped torrents
{
var qDescription = Row.QuerySelector("div.torrent_info");
var qDescription = row.QuerySelector("div.torrent_info");
release.Description = qDescription.TextContent;
string cleanTitle = Regex.Replace(Title, @" - ((S(\d{1,2}))?E(\d{1,4}))", "");
string seasonEp = Regex.Replace(Title, @"^(.*?) - ((S(\d{1,2}))?E(\d{1,4}))", "$2");
release.Title = CategoryStr == "14" ? Title : cleanTitle + " " + YearStr + " " + seasonEp;
release.PublishDate = YearPublishDate.Value;
release.Category = Category;
var cleanTitle = Regex.Replace(title, @" - ((S(\d{1,2}))?E(\d{1,4}))", "");
var seasonEp = Regex.Replace(title, @"^(.*?) - ((S(\d{1,2}))?E(\d{1,4}))", "$2");
release.Title = categoryStr == "14" ? title : cleanTitle + " " + yearStr + " " + seasonEp;
release.Category = category;
}
release.Description = release.Description.Replace(" / Free", ""); // Remove Free Tag
release.Description = release.Description.Replace("Full HD", "1080p");
release.Description = release.Description.Replace("/ HD / ", "/ 720p /");
release.Description = release.Description.Replace(" / HD]", " / 720p]");
release.Description = release.Description.Replace("4K", "2160p");
int nBarra = release.Title.IndexOf("[");
if (nBarra != -1)
{
release.Title = release.Title.Substring(nBarra + 1);
release.Title = release.Title.Replace("]", "");
}
// Get international title if available, or use the full title if not
release.Title = Regex.Replace(title, @".* \[(.*?)\](.*)", "$1$2");
release.Title += " " + release.Description; // add year and Description to the release Title to add some meaning to it
// This tracker does not provide an publish date to search terms (only on last 24h page)
release.PublishDate = DateTime.Today;
// check for previously stripped search terms
if (!query.MatchQueryStringAND(release.Title))
continue;
var Size = qSize.TextContent;
release.Size = ReleaseInfo.GetBytes(Size);
release.Link = new Uri(SiteLink + qDLLink.GetAttribute("href"));
var size = qSize.TextContent;
release.Size = ReleaseInfo.GetBytes(size);
release.Link = new Uri(SiteLink + qDlLink.GetAttribute("href"));
release.Comments = new Uri(SiteLink + qDetailsLink.GetAttribute("href"));
release.Guid = release.Link;
release.Grabs = ParseUtil.CoerceLong(qGrabs.TextContent);
release.Seeders = ParseUtil.CoerceInt(qSeeders.TextContent);
release.Peers = ParseUtil.CoerceInt(qLeechers.TextContent) + release.Seeders;
if (qFreeLeech != null)
release.DownloadVolumeFactor = 0;
else
release.DownloadVolumeFactor = 1;
release.DownloadVolumeFactor = qFreeLeech != null ? 0 : 1;
release.UploadVolumeFactor = 1;
releases.Add(release);
}
catch (Exception ex)
{
logger.Error(string.Format("{0}: Error while parsing row '{1}': {2}", ID, Row.OuterHtml, ex.Message));
logger.Error($"{ID}: Error while parsing row '{row.OuterHtml}': {ex.Message}");
}
}
}