Add MejorTorrent (#13320) resolves #12559

This commit is contained in:
Alberto Díaz 2022-06-26 20:14:41 +02:00 committed by GitHub
parent d08f569213
commit c6e6eea192
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 631 additions and 0 deletions

View File

@ -92,6 +92,7 @@ A third-party Golang SDK for Jackett is available from [webtor-io/go-jackett](ht
* LinuxTracker
* Mac Torrents Download
* MegaPeer
* MejorTorrent
* Mikan
* MioBT
* MixTapeTorrent

View File

@ -0,0 +1,630 @@
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp.Html.Parser;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig;
using Jackett.Common.Services.Interfaces;
using Jackett.Common.Utils;
using Newtonsoft.Json.Linq;
using NLog;
using static Jackett.Common.Models.IndexerConfig.ConfigurationData;
using WebClient = Jackett.Common.Utils.Clients.WebClient;
namespace Jackett.Common.Indexers
{
[ExcludeFromCodeCoverage]
public class MejorTorrent : BaseWebIndexer
{
private static class MejorTorrentCatType
{
public static string Pelicula => "Película";
public static string Serie => "Serie";
public static string SerieHd => "SerieHD"; // this category is created, doesn't exist in the site
public static string Musica => "Música";
public static string Otro => "Otro";
}
private const string NewTorrentsUrl = "torrents";
private const string SearchUrl = "busqueda/page/";
private const int PagesToSearch = 3;
// uncomment when there are more than one domain available
// public override string[] AlternativeSiteLinks { get; protected set; } = {
// "https://mejortorrent.wtf/"
// };
public override string[] LegacySiteLinks { get; protected set; } = {
"https://www.mejortorrentt.net/",
"http://www.mejortorrent.org/",
"http://www.mejortorrent.tv/",
"http://www.mejortorrentt.com/",
"https://www.mejortorrentt.org/",
"http://www.mejortorrentt.org/",
"https://www.mejortorrents.net/",
"https://www.mejortorrents1.com/",
"https://www.mejortorrents1.net/",
"https://www.mejortorrento.com/",
"https://www.mejortorrento.org/",
"https://www.mejortorrento.net/",
"https://www.mejortorrento.info/",
"https://mejortorrent.nocensor.space/",
"https://www.mejortorrentes.com/",
"https://www.mejortorrento.info/",
"https://mejortorrent.nocensor.work/",
"https://www.mejortorrentes.net/",
"https://mejortorrent.unblockit.tv/",
"https://mejortorrent.unblockit.how/",
"https://mejortorrent.unblockit.cam/",
"https://mejortorrent.nocensor.biz/",
"https://mejortorrent.unblockit.day/",
"https://mejortorrent.unblockit.llc/",
"https://www.mejortorrentes.org/",
"https://mejortorrent.unblockit.blue/",
"https://mejortorrent.nocensor.sbs/",
"https://mejortorrent.unblockit.name/"
};
public MejorTorrent(IIndexerConfigurationService configService, WebClient w, Logger l, IProtectionService ps,
ICacheService cs)
: base(id: "mejortorrent",
name: "MejorTorrent",
description: "MejorTorrent - Hay veces que un torrent viene mejor! :)",
link: "https://mejortorrent.wtf/",
caps: new TorznabCapabilities
{
TvSearchParams = new List<TvSearchParam>
{
TvSearchParam.Q, TvSearchParam.Season, TvSearchParam.Ep
},
MovieSearchParams = new List<MovieSearchParam>
{
MovieSearchParam.Q
},
MusicSearchParams = new List<MusicSearchParam>
{
MusicSearchParam.Q
}
},
configService: configService,
client: w,
logger: l,
p: ps,
cacheService: cs,
configData: new ConfigurationData())
{
Encoding = Encoding.UTF8;
Language = "es-ES";
Type = "public";
var matchWords = new BoolConfigurationItem("Match words in title") { Value = true };
configData.AddDynamic("MatchWords", matchWords);
// Uncomment to enable FlareSolverr in the future
//configData.AddDynamic("flaresolverr", new DisplayInfoConfigurationItem("FlareSolverr", "This site may use Cloudflare DDoS Protection, therefore Jackett requires <a href=\"https://github.com/Jackett/Jackett#configuring-flaresolverr\" target=\"_blank\">FlareSolverr</a> to access it."));
AddCategoryMapping(MejorTorrentCatType.Pelicula, TorznabCatType.Movies, "Pelicula");
AddCategoryMapping(MejorTorrentCatType.Serie, TorznabCatType.TVSD, "Serie");
AddCategoryMapping(MejorTorrentCatType.SerieHd, TorznabCatType.TVHD, "Serie HD");
AddCategoryMapping(MejorTorrentCatType.Musica, TorznabCatType.Audio, "Musica");
// Other category is disabled because we have problems parsing documentaries
//AddCategoryMapping(MejorTorrentCatType.Otro, TorznabCatType.Other, "Otro");
}
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
{
LoadValuesFromJson(configJson);
var releases = await PerformQuery(new TorznabQuery());
await ConfigureIfOK(string.Empty, releases.Any(), () =>
throw new Exception("Could not find releases from this URL"));
return IndexerConfigurationStatus.Completed;
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
var matchWords = ((BoolConfigurationItem)configData.GetDynamic("MatchWords")).Value;
matchWords = query.SearchTerm != "" && matchWords;
// we remove parts from the original query
query = ParseQuery(query);
var releases = string.IsNullOrEmpty(query.SearchTerm) ?
await PerformQueryNewest(query) :
await PerformQuerySearch(query, matchWords);
return releases;
}
public override async Task<byte[]> Download(Uri link)
{
var downloadUrl = link.ToString();
var content = await base.Download(new Uri(downloadUrl));
return content;
}
private async Task<List<ReleaseInfo>> PerformQueryNewest(TorznabQuery query)
{
var releases = new List<ReleaseInfo>();
var url = SiteLink + NewTorrentsUrl;
var result = await RequestWithCookiesAsync(url);
if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData);
try
{
var searchResultParser = new HtmlParser();
var doc = searchResultParser.ParseDocument(result.ContentString);
var container = doc.QuerySelector(".gap-y-3 > div:nth-child(1) > div:nth-child(1)");
var parsedDetailsLink = new List<string>();
string rowTitle = null;
string rowDetailsLink = null;
string rowPublishDate = null;
string rowQuality = null;
foreach (var row in container.Children)
{
rowPublishDate = row.Children[0].TextContent;
rowQuality = row.Children[1].Children[0].Children[0].TextContent;
rowTitle = row.Children[1].Children[0].TextContent.Replace(rowQuality, String.Empty).Trim();
rowDetailsLink = row.Children[1].GetAttribute("href");
// we add parsed items to rowDetailsLink to avoid duplicates in newest torrents
// list results
if (!parsedDetailsLink.Contains(rowDetailsLink))
{
await ParseRelease(releases, rowTitle, rowDetailsLink, null,
rowPublishDate, rowQuality, query, false);
parsedDetailsLink.Add(rowDetailsLink);
}
// clean the current row
rowTitle = null;
rowDetailsLink = null;
rowPublishDate = null;
rowQuality = null;
}
}
catch (Exception ex)
{
OnParseError(result.ContentString, ex);
throw ex;
}
return releases;
}
private async Task<List<ReleaseInfo>> PerformQuerySearch(TorznabQuery query, bool matchWords)
{
var releases = new List<ReleaseInfo>();
var qc = new NameValueCollection { { "q", query.SearchTerm } };
// We search in the first "PagesToSearch" pages
for (int i = 1; i <= PagesToSearch; i++)
{
var url = SiteLink + SearchUrl + i + "?" + qc.GetQueryString();
var result = await RequestWithCookiesAsync(url);
if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData);
try
{
var searchResultParser = new HtmlParser();
var doc = searchResultParser.ParseDocument(result.ContentString);
var table = doc.QuerySelector(".w-11\\/12");
// check the search term is valid
if (table?.QuerySelector("div.flex-row:nth-child(1)") != null)
{
// check there are results
var rows = table.Children;
if (rows != null && rows.Length > 0)
foreach (var row in rows)
{
var rowQuality = row.Children[0].Children[0].Children[0].TextContent;
var rowTitle = row.Children[0].Children[0].TextContent.Replace(rowQuality, String.Empty).Trim();
var rowDetailsLink = row.Children[0].GetAttribute("href");
var rowMejortorrentCat = row.Children[1].TextContent;
await ParseRelease(releases, rowTitle, rowDetailsLink, rowMejortorrentCat,
null, rowQuality, query, matchWords);
}
}
else
{
i = PagesToSearch;
}
}
catch (Exception ex)
{
OnParseError(result.ContentString, ex);
}
}
return releases;
}
private async Task ParseRelease(ICollection<ReleaseInfo> releases, string title, string detailsStr,
string mejortorrentCat, string publishStr, string quality, TorznabQuery query, bool matchWords)
{
// Remove trailing dot. Eg Harry Potter Y La Orden Del Fénix.
title = title.Trim();
if (title.EndsWith("."))
title = title.Remove(title.Length - 1).Trim();
var cat = GetMejortorrentCategory(mejortorrentCat, detailsStr, title, quality);
if (cat == MejorTorrentCatType.Otro)
return; // skip releases from this category
var categories = MapTrackerCatToNewznab(cat);
var publishDate = TryToParseDate(publishStr, DateTime.Now);
// return results only for requested categories
if (query.Categories.Any() && !query.Categories.Contains(categories.First()))
return;
// match the words in the query with the titles
if (matchWords && !CheckTitleMatchWords(query.SearchTerm, title))
return;
// parsing is different for each category
if (cat == MejorTorrentCatType.Serie || cat == MejorTorrentCatType.SerieHd)
await ParseSeriesRelease(releases, query, title, detailsStr, cat, publishDate, quality);
else if (query.Episode == null) // if it's scene series, we don't return other categories
{
if (cat == MejorTorrentCatType.Pelicula)
await ParseMovieRelease(releases, query, title, detailsStr, cat, publishDate, quality);
else
{
const long size = 104857600L; // 100 MB
var release = GenerateRelease(title, detailsStr, detailsStr, cat, publishDate, size);
releases.Add(release);
}
}
}
private async Task ParseSeriesRelease(ICollection<ReleaseInfo> releases, TorznabQuery query, string title,
string detailsStr, string cat, DateTime publishDate, string quality)
{
var result = await RequestWithCookiesAsync(detailsStr);
if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser();
var doc = searchResultParser.ParseDocument(result.ContentString);
var rows = doc.QuerySelectorAll("tr.border");
quality = CleanQuality(quality);
ParseTags(title, quality);
foreach (var row in rows)
{
var episodeTitle = row.Children[1].TextContent.Replace("\n", String.Empty);
var downloadLink = row.Children.Last().Children[0].GetAttribute("href");
var episodePublishStr = row.Children[2].TextContent.Replace("\n", String.Empty);
var episodePublish = TryToParseDate(episodePublishStr, publishDate);
// Convert the title to Scene format
episodeTitle = ParseMejorTorrentSeriesTitle(title, episodeTitle, quality, query);
// if the original query was in scene format, we filter the results to match episode
// query.Episode != null means scene title
if (query.Episode != null && !episodeTitle.Contains(query.GetEpisodeSearchString()))
continue;
// guess size
var size = 536870912L; // 512 MB
if (title.ToLower().Contains("720p"))
size = 1073741824L; // 1 GB
var release = GenerateRelease(episodeTitle, detailsStr, downloadLink, cat, episodePublish, size);
releases.Add(release);
}
}
private async Task ParseMovieRelease(ICollection<ReleaseInfo> releases, TorznabQuery query, string title,
string detailsStr, string cat, DateTime publishDate, string quality)
{
var result = await RequestWithCookiesAsync(detailsStr);
if (result.Status != HttpStatusCode.OK)
throw new ExceptionWithConfigData(result.ContentString, configData);
var searchResultParser = new HtmlParser();
var doc = searchResultParser.ParseDocument(result.ContentString);
var downloadLink = doc.QuerySelector(".ml-2").GetAttribute("href");
// clean quality
quality = CleanQuality(quality);
// add the year
var detailsYear = doc.QuerySelector("div.py-4:nth-child(2) > p:nth-child(2) > a:nth-child(2)").TextContent;
if (detailsYear != null)
{
title = title + " " + detailsYear;
}
else
{
title = query.Year != null ? title + " " + query.Year : title;
}
ParseTags(title, quality);
// add spanish
title += " SPANISH";
// add quality
if (quality != null)
title += " " + quality;
// guess size 1.5 GB
var size = GuessSize(title, 1610612736L);
var release = GenerateRelease(title, detailsStr, downloadLink, cat, publishDate, size);
releases.Add(release);
}
private ReleaseInfo GenerateRelease(string title, string detailsStr, string downloadLink, string cat,
DateTime publishDate, long size)
{
var link = new Uri(downloadLink);
var details = new Uri(detailsStr);
var release = new ReleaseInfo
{
Title = title,
Details = details,
Link = link,
Guid = link,
Category = MapTrackerCatToNewznab(cat),
PublishDate = publishDate,
Size = size,
Files = 1,
Seeders = 1,
Peers = 2,
DownloadVolumeFactor = 0,
UploadVolumeFactor = 1
};
return release;
}
private static bool CheckTitleMatchWords(string queryStr, string title)
{
// this code split the words, remove words with 2 letters or less, remove accents and lowercase
var queryMatches = Regex.Matches(queryStr, @"\b[\w']*\b");
var queryWords = from m in queryMatches.Cast<Match>()
where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2
select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower()));
var titleMatches = Regex.Matches(title, @"\b[\w']*\b");
var titleWords = from m in titleMatches.Cast<Match>()
where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2
select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower()));
titleWords = titleWords.ToArray();
return queryWords.All(word => titleWords.Contains(word));
}
private static TorznabQuery ParseQuery(TorznabQuery query)
{
// Eg. Marco.Polo.2014.S02E08
// the season/episode part is already parsed by Jackett
// query.SanitizedSearchTerm = Marco.Polo.2014.
// query.Season = 2
// query.Episode = 8
var searchTerm = query.SanitizedSearchTerm;
// replace punctuation symbols with spaces
// searchTerm = Marco Polo 2014
searchTerm = Regex.Replace(searchTerm, @"[-._\(\)@/\\\[\]\+\%]", " ");
searchTerm = Regex.Replace(searchTerm, @"\s+", " ");
searchTerm = searchTerm.Trim();
// we parse the year and remove it from search
// searchTerm = Marco Polo
// query.Year = 2014
var r = new Regex("([ ]+([0-9]{4}))$", RegexOptions.IgnoreCase);
var m = r.Match(searchTerm);
if (m.Success)
{
query.Year = int.Parse(m.Groups[2].Value);
searchTerm = searchTerm.Replace(m.Groups[1].Value, "");
}
// remove some words
searchTerm = Regex.Replace(searchTerm, @"\b(espa[ñn]ol|spanish|castellano|spa)\b", "", RegexOptions.IgnoreCase);
query.SearchTerm = searchTerm;
return query;
}
private static string ParseMejorTorrentSeriesTitle(string title, string episodeTitle, string quality, TorznabQuery query)
{
// parse title
// title = The Mandalorian - 1ª Temporada
// title = The Mandalorian - 1ª Temporada [720p]
// title = Grace and Frankie - 5ª Temporada [720p]: 5x08 al 5x13.
var newTitle = title.Split(new[] { " - " }, StringSplitOptions.RemoveEmptyEntries)[0].Trim();
// newTitle = The Mandalorian
// parse episode title
var newEpisodeTitle = episodeTitle.Trim();
// episodeTitle = 5x08 al 5x13.
// episodeTitle = 2x01 - 2x02 - 2x03.
var matches = Regex.Matches(newEpisodeTitle, "([0-9]+)x([0-9]+)", RegexOptions.IgnoreCase);
if (matches.Count > 1)
{
newEpisodeTitle = "";
foreach (Match m in matches)
if (newEpisodeTitle.Equals(""))
newEpisodeTitle += "S" + m.Groups[1].Value.PadLeft(2, '0')
+ "E" + m.Groups[2].Value.PadLeft(2, '0');
else
newEpisodeTitle += "-E" + m.Groups[2].Value.PadLeft(2, '0');
// newEpisodeTitle = S05E08-E13
// newEpisodeTitle = S02E01-E02-E03
}
else
{
// episodeTitle = 1x04 - 05.
var m = Regex.Match(newEpisodeTitle, "^([0-9]+)x([0-9]+)[^0-9]+([0-9]+)[.]?$", RegexOptions.IgnoreCase);
if (m.Success)
newEpisodeTitle = "S" + m.Groups[1].Value.PadLeft(2, '0')
+ "E" + m.Groups[2].Value.PadLeft(2, '0') + "-"
+ "E" + m.Groups[3].Value.PadLeft(2, '0');
// newEpisodeTitle = S01E04-E05
else
{
// episodeTitle = 1x02
// episodeTitle = 1x02 -
// episodeTitle = 1x08 - CONTRASEÑA: WWW.PCTNEW ORG bebe
m = Regex.Match(newEpisodeTitle, "^([0-9]+)x([0-9]+)(.*)$", RegexOptions.IgnoreCase);
if (m.Success)
{
newEpisodeTitle = "S" + m.Groups[1].Value.PadLeft(2, '0')
+ "E" + m.Groups[2].Value.PadLeft(2, '0');
// newEpisodeTitle = S01E02
if (!m.Groups[3].Value.Equals(""))
newEpisodeTitle += " " + m.Groups[3].Value.Replace(" -", "").Trim();
// newEpisodeTitle = S01E08 CONTRASEÑA: WWW.PCTNEW ORG bebe
}
}
}
// if the original query was in scene format, we have to put the year back
// query.Episode != null means scene title
var year = query.Episode != null && query.Year != null ? " " + query.Year : "";
newTitle += year + " " + newEpisodeTitle;
// add quality
if (quality != null)
newTitle += " SPANISH " + quality;
else if (title.ToLower().Contains("[720p]"))
newTitle += " SPANISH 720p HDTV x264";
else
newTitle += " SPANISH SDTV XviD";
// return The Mandalorian S01E04 SPANISH 720p HDTV x264
return newTitle;
}
private static string GetMejortorrentCategory(string mejortorrentCat, string detailsStr, string title, string quality)
{
// get root category
var cat = MejorTorrentCatType.Otro;
if (mejortorrentCat == null)
{
if (detailsStr.Contains("peliculas_extend"))
cat = MejorTorrentCatType.Pelicula;
else if (detailsStr.Contains("series_extend"))
cat = MejorTorrentCatType.Serie;
else if (detailsStr.Contains("musica_extend"))
cat = MejorTorrentCatType.Musica;
else if (detailsStr.Contains("pelicula"))
cat = MejorTorrentCatType.Pelicula;
}
else if (mejortorrentCat.Equals(MejorTorrentCatType.Pelicula) ||
mejortorrentCat.Equals(MejorTorrentCatType.Serie) ||
mejortorrentCat.Equals(MejorTorrentCatType.Musica))
cat = mejortorrentCat;
else if (mejortorrentCat.Equals("peliculas"))
cat = MejorTorrentCatType.Pelicula;
else if (mejortorrentCat.Equals("series") || mejortorrentCat.Equals("documentales"))
cat = MejorTorrentCatType.Serie;
// hack to separate SD & HD series
if (cat.Equals(MejorTorrentCatType.Serie))
{
if (title.ToLower().Contains("720p") ||
title.ToLower().Contains("1080p") ||
quality.ToLower().Contains("720p") ||
quality.ToLower().Contains("1080p"))
cat = MejorTorrentCatType.SerieHd;
}
return cat;
}
private void ParseTags(string title, string quality)
{
title = title.Trim();
// parse tags in title, we need to put the year after the real title (before the tags)
// Harry Potter And The Deathly Hallows: Part 1 [subs. Integrados]
var tags = "";
var queryMatches = Regex.Matches(title, @"[\[\(]([^\]\)]+)[\]\)]", RegexOptions.IgnoreCase);
foreach (Match m in queryMatches)
{
var tag = m.Groups[1].Value.Trim().ToUpper();
if (tag.Equals("4K")) // Fix 4K quality. Eg Harry Potter Y La Orden Del Fénix [4k]
quality = "(UHD 4K 2160p)";
else if (tag.Equals("FULLBLURAY")) // Fix 4K quality. Eg Harry Potter Y El Cáliz De Fuego (fullbluray)
quality = "(COMPLETE BLURAY)";
else // Add the tag to the title
tags += " " + tag;
title = title.Replace(m.Groups[0].Value, "");
}
title += tags;
}
private long GuessSize(string title, long initialQuality)
{
var size = initialQuality;
if (title.ToLower().Contains("microhd"))
size = 7516192768L; // 7 GB
else if (title.ToLower().Contains("complete bluray") || title.ToLower().Contains("2160p"))
size = 53687091200L; // 50 GB
else if (title.ToLower().Contains("bluray"))
size = 17179869184L; // 16 GB
else if (title.ToLower().Contains("bdremux"))
size = 21474836480L; // 20 GB
return size;
}
private static string CleanQuality(string quality)
{
if (quality != null)
{
var queryMatch = Regex.Match(quality, @"[\[\(]([^\]\)]+)[\]\)]", RegexOptions.IgnoreCase);
if (queryMatch.Success)
quality = queryMatch.Groups[1].Value;
quality = quality.Trim().Replace("-", " ");
quality = Regex.Replace(quality, "HDRip", "BDRip", RegexOptions.IgnoreCase); // fix for Radarr
}
return quality;
}
private static DateTime TryToParseDate(string dateToParse, DateTime dateDefault)
{
try
{
return DateTime.ParseExact(dateToParse, "yyyy-MM-dd", CultureInfo.InvariantCulture);
}
catch
{
// ignored
}
return dateDefault;
}
}
}