mirror of https://github.com/Jackett/Jackett
Norbits: duplicate search without diacritics
This commit is contained in:
parent
34cdedae12
commit
a34a3fb4b6
|
@ -226,14 +226,14 @@ namespace Jackett.Indexers
|
||||||
{
|
{
|
||||||
var releases = new List<ReleaseInfo>();
|
var releases = new List<ReleaseInfo>();
|
||||||
var torrentRowList = new List<CQ>();
|
var torrentRowList = new List<CQ>();
|
||||||
var searchTerm = query.GetQueryString();
|
var exactSearchTerm = query.GetQueryString();
|
||||||
var searchUrl = SearchUrl;
|
var searchUrl = SearchUrl;
|
||||||
|
|
||||||
// Check login before performing a query
|
// Check login before performing a query
|
||||||
await CheckLogin();
|
await CheckLogin();
|
||||||
|
|
||||||
// Check cache first so we don't query the server (if search term used or not in dev mode)
|
// Check cache first so we don't query the server (if search term used or not in dev mode)
|
||||||
if (!DevMode && !string.IsNullOrEmpty(searchTerm))
|
if (!DevMode && !string.IsNullOrEmpty(exactSearchTerm))
|
||||||
{
|
{
|
||||||
lock (cache)
|
lock (cache)
|
||||||
{
|
{
|
||||||
|
@ -241,31 +241,40 @@ namespace Jackett.Indexers
|
||||||
CleanCache();
|
CleanCache();
|
||||||
|
|
||||||
// Search in cache
|
// Search in cache
|
||||||
var cachedResult = cache.FirstOrDefault(i => i.Query == searchTerm);
|
var cachedResult = cache.FirstOrDefault(i => i.Query == exactSearchTerm);
|
||||||
if (cachedResult != null)
|
if (cachedResult != null)
|
||||||
return cachedResult.Results.Select(s => (ReleaseInfo)s.Clone()).ToArray();
|
return cachedResult.Results.Select(s => (ReleaseInfo)s.Clone()).ToArray();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build our query
|
var SearchTerms = new List<string> { exactSearchTerm };
|
||||||
var request = BuildQuery(searchTerm, query, searchUrl);
|
|
||||||
|
|
||||||
// Getting results & Store content
|
// duplicate search without diacritics
|
||||||
var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value);
|
var baseSearchTerm = StringUtil.RemoveDiacritics(exactSearchTerm);
|
||||||
_fDom = response.Content;
|
if (baseSearchTerm != exactSearchTerm)
|
||||||
|
SearchTerms.Add(baseSearchTerm);
|
||||||
|
|
||||||
try
|
foreach (var searchTerm in SearchTerms)
|
||||||
{
|
{
|
||||||
var firstPageRows = FindTorrentRows();
|
// Build our query
|
||||||
|
var request = BuildQuery(searchTerm, query, searchUrl);
|
||||||
|
|
||||||
|
// Getting results & Store content
|
||||||
|
var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value);
|
||||||
|
_fDom = response.Content;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var firstPageRows = FindTorrentRows();
|
||||||
|
|
||||||
// Add them to torrents list
|
// Add them to torrents list
|
||||||
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq()));
|
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq()));
|
||||||
|
|
||||||
// If pagination available
|
// If pagination available
|
||||||
int nbResults;
|
int nbResults;
|
||||||
int pageLinkCount;
|
int pageLinkCount;
|
||||||
nbResults = 1;
|
nbResults = 1;
|
||||||
pageLinkCount = 1;
|
pageLinkCount = 1;
|
||||||
|
|
||||||
// Check if we have a minimum of one result
|
// Check if we have a minimum of one result
|
||||||
if (firstPageRows.Length > 1)
|
if (firstPageRows.Length > 1)
|
||||||
|
@ -282,145 +291,145 @@ namespace Jackett.Indexers
|
||||||
Output("\nNo result found for your query, please try another search term ...\n", "info");
|
Output("\nNo result found for your query, please try another search term ...\n", "info");
|
||||||
|
|
||||||
// No result found for this query
|
// No result found for this query
|
||||||
return releases;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !");
|
Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !");
|
||||||
Output("\nThere are " + firstPageRows.Length + " results on the first page !");
|
Output("\nThere are " + firstPageRows.Length + " results on the first page !");
|
||||||
|
|
||||||
// Loop on results
|
// Loop on results
|
||||||
|
|
||||||
foreach (var tRow in torrentRowList)
|
foreach (var tRow in torrentRowList)
|
||||||
{
|
|
||||||
Output("Torrent #" + (releases.Count + 1));
|
|
||||||
|
|
||||||
// ID
|
|
||||||
var id = tRow.Find("td:eq(1) > a:eq(0)").Attr("href").Split('=').Last();
|
|
||||||
Output("ID: " + id);
|
|
||||||
|
|
||||||
// Release Name
|
|
||||||
var name = tRow.Find("td:eq(1) > a:eq(0)").Attr("title");
|
|
||||||
|
|
||||||
// Category
|
|
||||||
var categoryId = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("href").Split('?').Last();
|
|
||||||
var categoryName = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("title");
|
|
||||||
|
|
||||||
var MainCat = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("href").Split('?').Last();
|
|
||||||
var SubCat1 = "none";
|
|
||||||
var SubCat2 = "none";
|
|
||||||
|
|
||||||
var testcat = MainCat;
|
|
||||||
|
|
||||||
if (tRow.Find("td:eq(0) > div > a:eq(1)").Length == 1)
|
|
||||||
{
|
{
|
||||||
SubCat1 = tRow.Find("td:eq(0) > div > a:eq(1)").Attr("href").Split('?').Last();
|
Output("Torrent #" + (releases.Count + 1));
|
||||||
}
|
|
||||||
if (tRow.Find("td:eq(0) > div > a[href^=\"/browse.php?sub2_cat[]=\"]").Length == 1)
|
// ID
|
||||||
{
|
var id = tRow.Find("td:eq(1) > a:eq(0)").Attr("href").Split('=').Last();
|
||||||
SubCat2 = tRow.Find("td:eq(0) > div > a[href^=\"/browse.php?sub2_cat[]=\"]").Attr("href").Split('?').Last();
|
Output("ID: " + id);
|
||||||
testcat = MainCat + '&' + SubCat2;
|
|
||||||
|
// Release Name
|
||||||
|
var name = tRow.Find("td:eq(1) > a:eq(0)").Attr("title");
|
||||||
|
|
||||||
|
// Category
|
||||||
|
var categoryId = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("href").Split('?').Last();
|
||||||
|
var categoryName = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("title");
|
||||||
|
|
||||||
|
var MainCat = tRow.Find("td:eq(0) > div > a:eq(0)").Attr("href").Split('?').Last();
|
||||||
|
var SubCat1 = "none";
|
||||||
|
var SubCat2 = "none";
|
||||||
|
|
||||||
|
var testcat = MainCat;
|
||||||
|
|
||||||
|
if (tRow.Find("td:eq(0) > div > a:eq(1)").Length == 1)
|
||||||
|
{
|
||||||
|
SubCat1 = tRow.Find("td:eq(0) > div > a:eq(1)").Attr("href").Split('?').Last();
|
||||||
|
}
|
||||||
|
if (tRow.Find("td:eq(0) > div > a[href^=\"/browse.php?sub2_cat[]=\"]").Length == 1)
|
||||||
|
{
|
||||||
|
SubCat2 = tRow.Find("td:eq(0) > div > a[href^=\"/browse.php?sub2_cat[]=\"]").Attr("href").Split('?').Last();
|
||||||
|
testcat = MainCat + '&' + SubCat2;
|
||||||
|
}
|
||||||
|
|
||||||
|
Output("Category: " + testcat + " - " + categoryName);
|
||||||
|
|
||||||
|
// Seeders
|
||||||
|
var seeders = ParseUtil.CoerceInt(tRow.Find("td:eq(9)").Text());
|
||||||
|
Output("Seeders: " + seeders);
|
||||||
|
|
||||||
|
// Leechers
|
||||||
|
var leechers = ParseUtil.CoerceInt(tRow.Find("td:eq(10)").Text());
|
||||||
|
Output("Leechers: " + leechers);
|
||||||
|
|
||||||
|
// Completed
|
||||||
|
Regex regexObj = new Regex(@"[^\d]");
|
||||||
|
var completed2 = tRow.Find("td:eq(7)").Text();
|
||||||
|
var completed = ParseUtil.CoerceLong(regexObj.Replace(completed2, ""));
|
||||||
|
Output("Completed: " + completed);
|
||||||
|
|
||||||
|
// Files
|
||||||
|
var files = 1;
|
||||||
|
if (tRow.Find("td:eq(2) > a").Length == 1)
|
||||||
|
{
|
||||||
|
files = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(2) > a").Text(), @"\d+").Value);
|
||||||
|
}
|
||||||
|
Output("Files: " + files);
|
||||||
|
|
||||||
|
// Health
|
||||||
|
var percent = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(8)").Text(), @"\d+").Value.Trim());
|
||||||
|
Output("Health: " + percent + "%");
|
||||||
|
|
||||||
|
// Size
|
||||||
|
var humanSize = tRow.Find("td:eq(6)").Text().ToLowerInvariant();
|
||||||
|
var size = ReleaseInfo.GetBytes(humanSize);
|
||||||
|
Output("Size: " + humanSize + " (" + size + " bytes)");
|
||||||
|
|
||||||
|
// --> Date
|
||||||
|
var dateTimeOrig = tRow.Find("td:eq(4)").Text();
|
||||||
|
var dateTime = Regex.Replace(dateTimeOrig, @"<[^>]+>| ", "").Trim();
|
||||||
|
var date = DateTime.ParseExact(dateTime, "yyyy-MM-ddHH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal).ToLocalTime();
|
||||||
|
Output("Released on: " + date);
|
||||||
|
|
||||||
|
// Torrent Details URL
|
||||||
|
var detailsLink = new Uri(TorrentDescriptionUrl.Replace("{id}", id.ToString()));
|
||||||
|
Output("Details: " + detailsLink.AbsoluteUri);
|
||||||
|
|
||||||
|
// Torrent Comments URL
|
||||||
|
var commentsLink = new Uri(TorrentCommentUrl.Replace("{id}", id.ToString()));
|
||||||
|
Output("Comments Link: " + commentsLink.AbsoluteUri);
|
||||||
|
|
||||||
|
// Torrent Download URL
|
||||||
|
var passkey = tRow.Find("td:eq(1) > a:eq(1)").Attr("href");
|
||||||
|
var key = Regex.Match(passkey, "(?<=passkey\\=)([a-zA-z0-9]*)");
|
||||||
|
Uri downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString()));
|
||||||
|
Output("Download Link: " + downloadLink.AbsoluteUri);
|
||||||
|
|
||||||
|
// Building release infos
|
||||||
|
var release = new ReleaseInfo
|
||||||
|
{
|
||||||
|
Category = MapTrackerCatToNewznab(testcat.ToString()),
|
||||||
|
Title = name,
|
||||||
|
Seeders = seeders,
|
||||||
|
Peers = seeders + leechers,
|
||||||
|
MinimumRatio = 1,
|
||||||
|
MinimumSeedTime = 172800,
|
||||||
|
PublishDate = date,
|
||||||
|
Size = size,
|
||||||
|
Files = files,
|
||||||
|
Grabs = completed,
|
||||||
|
Guid = detailsLink,
|
||||||
|
Comments = commentsLink,
|
||||||
|
Link = downloadLink
|
||||||
|
};
|
||||||
|
|
||||||
|
var genres = tRow.Find("span.genres").Text();
|
||||||
|
if (!string.IsNullOrEmpty(genres))
|
||||||
|
release.Description = genres;
|
||||||
|
|
||||||
|
// IMDB
|
||||||
|
var imdbLink = tRow.Find("a[href*=\"http://imdb.com/title/\"]").First().Attr("href");
|
||||||
|
release.Imdb = ParseUtil.GetLongFromString(imdbLink);
|
||||||
|
|
||||||
|
if (tRow.Find("img[title=\"100% freeleech\"]").Length >= 1)
|
||||||
|
release.DownloadVolumeFactor = 0;
|
||||||
|
else if (tRow.Find("img[title=\"Halfleech\"]").Length >= 1)
|
||||||
|
release.DownloadVolumeFactor = 0.5;
|
||||||
|
else if (tRow.Find("img[title=\"90% Freeleech\"]").Length >= 1)
|
||||||
|
release.DownloadVolumeFactor = 0.1;
|
||||||
|
else
|
||||||
|
release.DownloadVolumeFactor = 1;
|
||||||
|
|
||||||
|
release.UploadVolumeFactor = 1;
|
||||||
|
|
||||||
|
releases.Add(release);
|
||||||
}
|
}
|
||||||
|
|
||||||
Output("Category: " + testcat + " - " + categoryName);
|
|
||||||
|
|
||||||
// Seeders
|
|
||||||
var seeders = ParseUtil.CoerceInt(tRow.Find("td:eq(9)").Text());
|
|
||||||
Output("Seeders: " + seeders);
|
|
||||||
|
|
||||||
// Leechers
|
|
||||||
var leechers = ParseUtil.CoerceInt(tRow.Find("td:eq(10)").Text());
|
|
||||||
Output("Leechers: " + leechers);
|
|
||||||
|
|
||||||
// Completed
|
|
||||||
Regex regexObj = new Regex(@"[^\d]");
|
|
||||||
var completed2 = tRow.Find("td:eq(7)").Text();
|
|
||||||
var completed = ParseUtil.CoerceLong(regexObj.Replace(completed2, ""));
|
|
||||||
Output("Completed: " + completed);
|
|
||||||
|
|
||||||
// Files
|
|
||||||
var files = 1;
|
|
||||||
if (tRow.Find("td:eq(2) > a").Length == 1)
|
|
||||||
{
|
|
||||||
files = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(2) > a").Text(), @"\d+").Value);
|
|
||||||
}
|
|
||||||
Output("Files: " + files);
|
|
||||||
|
|
||||||
// Health
|
|
||||||
var percent = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(8)").Text(), @"\d+").Value.Trim());
|
|
||||||
Output("Health: " + percent + "%");
|
|
||||||
|
|
||||||
// Size
|
|
||||||
var humanSize = tRow.Find("td:eq(6)").Text().ToLowerInvariant();
|
|
||||||
var size = ReleaseInfo.GetBytes(humanSize);
|
|
||||||
Output("Size: " + humanSize + " (" + size + " bytes)");
|
|
||||||
|
|
||||||
// --> Date
|
|
||||||
var dateTimeOrig = tRow.Find("td:eq(4)").Text();
|
|
||||||
var dateTime = Regex.Replace(dateTimeOrig, @"<[^>]+>| ", "").Trim();
|
|
||||||
var date = DateTime.ParseExact(dateTime, "yyyy-MM-ddHH:mm:ss", CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal).ToLocalTime();
|
|
||||||
Output("Released on: " + date);
|
|
||||||
|
|
||||||
// Torrent Details URL
|
|
||||||
var detailsLink = new Uri(TorrentDescriptionUrl.Replace("{id}", id.ToString()));
|
|
||||||
Output("Details: " + detailsLink.AbsoluteUri);
|
|
||||||
|
|
||||||
// Torrent Comments URL
|
|
||||||
var commentsLink = new Uri(TorrentCommentUrl.Replace("{id}", id.ToString()));
|
|
||||||
Output("Comments Link: " + commentsLink.AbsoluteUri);
|
|
||||||
|
|
||||||
// Torrent Download URL
|
|
||||||
var passkey = tRow.Find("td:eq(1) > a:eq(1)").Attr("href");
|
|
||||||
var key = Regex.Match(passkey, "(?<=passkey\\=)([a-zA-z0-9]*)");
|
|
||||||
Uri downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString()));
|
|
||||||
Output("Download Link: " + downloadLink.AbsoluteUri);
|
|
||||||
|
|
||||||
// Building release infos
|
|
||||||
var release = new ReleaseInfo
|
|
||||||
{
|
|
||||||
Category = MapTrackerCatToNewznab(testcat.ToString()),
|
|
||||||
Title = name,
|
|
||||||
Seeders = seeders,
|
|
||||||
Peers = seeders + leechers,
|
|
||||||
MinimumRatio = 1,
|
|
||||||
MinimumSeedTime = 172800,
|
|
||||||
PublishDate = date,
|
|
||||||
Size = size,
|
|
||||||
Files = files,
|
|
||||||
Grabs = completed,
|
|
||||||
Guid = detailsLink,
|
|
||||||
Comments = commentsLink,
|
|
||||||
Link = downloadLink
|
|
||||||
};
|
|
||||||
|
|
||||||
var genres = tRow.Find("span.genres").Text();
|
|
||||||
if (!string.IsNullOrEmpty(genres))
|
|
||||||
release.Description = genres;
|
|
||||||
|
|
||||||
// IMDB
|
|
||||||
var imdbLink = tRow.Find("a[href*=\"http://imdb.com/title/\"]").First().Attr("href");
|
|
||||||
release.Imdb = ParseUtil.GetLongFromString(imdbLink);
|
|
||||||
|
|
||||||
if (tRow.Find("img[title=\"100% freeleech\"]").Length >= 1)
|
|
||||||
release.DownloadVolumeFactor = 0;
|
|
||||||
else if (tRow.Find("img[title=\"Halfleech\"]").Length >= 1)
|
|
||||||
release.DownloadVolumeFactor = 0.5;
|
|
||||||
else if (tRow.Find("img[title=\"90% Freeleech\"]").Length >= 1)
|
|
||||||
release.DownloadVolumeFactor = 0.1;
|
|
||||||
else
|
|
||||||
release.DownloadVolumeFactor = 1;
|
|
||||||
|
|
||||||
release.UploadVolumeFactor = 1;
|
|
||||||
|
|
||||||
releases.Add(release);
|
|
||||||
}
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
OnParseError("Error, unable to parse result \n" + ex.StackTrace, ex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
OnParseError("Error, unable to parse result \n" + ex.StackTrace, ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return found releases
|
// Return found releases
|
||||||
return releases;
|
return releases;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ using AngleSharp.Html;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Collections.Specialized;
|
using System.Collections.Specialized;
|
||||||
|
using System.Globalization;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Net.Http;
|
using System.Net.Http;
|
||||||
|
@ -28,6 +29,22 @@ namespace Jackett.Utils
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// replaces culture specific characters with the corresponding base characters (e.g. è becomes e).
|
||||||
|
public static String RemoveDiacritics(String s)
|
||||||
|
{
|
||||||
|
String normalizedString = s.Normalize(NormalizationForm.FormD);
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
|
||||||
|
for (int i = 0; i < normalizedString.Length; i++)
|
||||||
|
{
|
||||||
|
Char c = normalizedString[i];
|
||||||
|
if (CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark)
|
||||||
|
stringBuilder.Append(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
return stringBuilder.ToString();
|
||||||
|
}
|
||||||
|
|
||||||
public static string FromBase64(string str)
|
public static string FromBase64(string str)
|
||||||
{
|
{
|
||||||
return Encoding.UTF8.GetString(Convert.FromBase64String(str));
|
return Encoding.UTF8.GetString(Convert.FromBase64String(str));
|
||||||
|
|
Loading…
Reference in New Issue