Jackett/src/Jackett.Common/Indexers/EpubLibre.cs

212 lines
9.1 KiB
C#

using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp.Html.Parser;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig;
using Jackett.Common.Services.Interfaces;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NLog;
using WebClient = Jackett.Common.Utils.Clients.WebClient;
namespace Jackett.Common.Indexers
{
[ExcludeFromCodeCoverage]
public class EpubLibre : BaseWebIndexer
{
private string SearchUrl => SiteLink + "catalogo/index/{0}/nuevo/todos/sin/todos/{1}/ajax";
private string SobrecargaUrl => SiteLink + "inicio/sobrecarga";
private const int MaxItemsPerPage = 18;
private const int MaxSearchPageLimit = 6; // 18 items per page * 6 pages = 108
private readonly Dictionary<string, string> _apiHeaders = new Dictionary<string, string>
{
{"X-Requested-With", "XMLHttpRequest"}
};
private readonly Dictionary<string, string> _languages = new Dictionary<string, string>
{
{"1", "español"},
{"2", "catalán"},
{"3", "euskera"},
{"4", "gallego"},
{"5", "inglés"},
{"6", "francés"},
{"7", "alemán"},
{"8", "sueco"},
{"9", "mandarín"},
{"10", "italiano"},
{"11", "portugués"},
{"12", "esperanto"}
};
public override string[] AlternativeSiteLinks { get; protected set; } = {
"https://www.epublibre.org/",
"https://epublibre.unblockit.onl/"
};
public override string[] LegacySiteLinks { get; protected set; } = {
"https://epublibre.unblockit.lat/",
"https://epublibre.unblockit.app/",
"https://epublibre.unblockit.dev/",
"https://epublibre.unblockit.ltd/",
"https://epublibre.unblockit.link/",
"https://epublibre.unblockit.buzz/",
"https://epublibre.unblockit.club/",
"https://epublibre.org/"
};
public EpubLibre(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps,
ICacheService cs)
: base(id: "epublibre",
name: "EpubLibre",
description: "Más libros, Más libres",
link: "https://www.epublibre.org/",
caps: new TorznabCapabilities
{
BookSearchParams = new List<BookSearchParam>
{
BookSearchParam.Q // TODO: add more book parameters
}
},
configService: configService,
client: wc,
logger: l,
p: ps,
cacheService: cs,
configData: new ConfigurationData())
{
Encoding = Encoding.UTF8;
Language = "es-es";
Type = "public";
AddCategoryMapping(1, TorznabCatType.BooksEBook);
}
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
{
base.LoadValuesFromJson(configJson);
var releases = await PerformQuery(new TorznabQuery());
await ConfigureIfOK(string.Empty, releases.Any(), () =>
throw new Exception("Could not find any release from this URL"));
return IndexerConfigurationStatus.Completed;
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
var releases = new List<ReleaseInfo>();
var searchString = "--";
var maxPages = 2; // we scrape only 2 pages for recent torrents
if (!string.IsNullOrWhiteSpace(query.GetQueryString()))
{
searchString = Uri.EscapeUriString(query.GetQueryString());
maxPages = MaxSearchPageLimit;
}
var lastPublishDate = DateTime.Now;
for (var page = 0; page < maxPages; page++)
{
var searchUrl = string.Format(SearchUrl, page * MaxItemsPerPage, searchString);
var result = await RequestWithCookiesAsync(searchUrl, headers: _apiHeaders);
try
{
var json = JsonConvert.DeserializeObject<dynamic>(result.ContentString);
var parser = new HtmlParser();
var doc = parser.ParseDocument((string)json["contenido"]);
var rows = doc.QuerySelectorAll("div.span2");
foreach (var row in rows)
{
var title = row.QuerySelector("h2").TextContent + " - " +
row.QuerySelector("h1").TextContent;
if (!CheckTitleMatchWords(query.GetQueryString(), title))
continue; // skip if it doesn't contain all words
var poster = new Uri(row.QuerySelector("img[id=catalog]").GetAttribute("src"));
var qLink = row.QuerySelector("a");
var details = new Uri(qLink.GetAttribute("href"));
var qTooltip = parser.ParseDocument(qLink.GetAttribute("data-content"));
// we get the language from the last class tag => class="pull-right sprite idioma_5"
var languageId = qTooltip.QuerySelector("div.pull-right").GetAttribute("class").Split('_')[1];
title += $" [{_languages[languageId]}] [epub]";
var qDesc = qTooltip.QuerySelectorAll("div.row-fluid > div");
var description = $"Rev: {qDesc[0].TextContent} Páginas: {qDesc[1].TextContent} Puntación: {qDesc[2].TextContent} Likes: {qDesc[3].TextContent}";
// publish date is not available in the torrent list, but we add a relative date so we can sort
lastPublishDate = lastPublishDate.AddMinutes(-1);
var release = new ReleaseInfo
{
Title = title,
Details = details,
Link = details,
Guid = details,
PublishDate = lastPublishDate,
Poster = poster,
Description = description,
Category = new List<int> { TorznabCatType.BooksEBook.ID },
Size = 5242880, // 5 MB
Seeders = 1,
Peers = 2,
DownloadVolumeFactor = 0,
UploadVolumeFactor = 1
};
releases.Add(release);
}
if (rows.Length < MaxItemsPerPage)
break; // this is the last page
}
catch (Exception ex)
{
OnParseError(result.ContentString, ex);
}
}
return releases;
}
public override async Task<byte[]> Download(Uri link)
{
var result = await RequestWithCookiesAndRetryAsync(link.AbsoluteUri);
if (SobrecargaUrl.Equals(result.RedirectingTo))
throw new Exception("El servidor se encuentra sobrecargado en estos momentos. / The server is currently overloaded.");
try
{
var parser = new HtmlParser();
var doc = parser.ParseDocument(result.ContentString);
var magnetLink = doc.QuerySelector("a[id=en_desc]").GetAttribute("href");
return Encoding.UTF8.GetBytes(magnetLink);
}
catch (Exception ex)
{
OnParseError(result.ContentString, ex);
}
return null;
}
// TODO: merge this method with query.MatchQueryStringAND
private static bool CheckTitleMatchWords(string queryStr, string title)
{
// this code split the words, remove words with 2 letters or less, remove accents and lowercase
var queryMatches = Regex.Matches(queryStr, @"\b[\w']*\b");
var queryWords = from m in queryMatches.Cast<Match>()
where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2
select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower()));
var titleMatches = Regex.Matches(title, @"\b[\w']*\b");
var titleWords = from m in titleMatches.Cast<Match>()
where !string.IsNullOrEmpty(m.Value) && m.Value.Length > 2
select Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(m.Value.ToLower()));
titleWords = titleWords.ToArray();
return queryWords.All(word => titleWords.Contains(word));
}
}
}